~angelsl/ubuntu/wily/gcc-5/mips-cross : revision 1

1

# DP: Changes for the Linaro 5-2015.09 release.

2

3

LANG=C git diff 2006973fa839ccbe189a1e7408400dc96ed880b4..ac19ac6481a3f326d9f41403f5dadab548b2c8a6 \

4

| egrep -v '^(diff|index) ' \

5

| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/

6

7

--- a/src/fixincludes/mkfixinc.sh

8

+++ b/src/fixincludes/mkfixinc.sh

9

@@ -19,7 +19,8 @@ case $machine in

10

powerpc-*-eabi* | \

11

powerpc-*-rtems* | \

12

powerpcle-*-eabisim* | \

13

- powerpcle-*-eabi* )

14

+ powerpcle-*-eabi* | \

15

+ *-musl* )

16

# IF there is no include fixing,

17

# THEN create a no-op fixer and exit

18

(echo "#! /bin/sh" ; echo "exit 0" ) > ${target}

19

--- a/src//dev/null

20

+++ b/src/gcc/LINARO-VERSION

21

@@ -0,0 +1 @@

22

+5.1-2015.07~dev

23

--- a/src/gcc/Makefile.in

24

+++ b/src/gcc/Makefile.in

25

@@ -527,10 +527,6 @@ xm_include_list=@xm_include_list@

26

xm_defines=@xm_defines@

27

lang_checks=

28

lang_checks_parallelized=

29

-# Upper limit to which it is useful to parallelize this lang target.

30

-# It doesn't make sense to try e.g. 128 goals for small testsuites

31

-# like objc or go.

32

-check_gcc_parallelize=10000

33

lang_opt_files=@lang_opt_files@ $(srcdir)/c-family/c.opt $(srcdir)/common.opt

34

lang_specs_files=@lang_specs_files@

35

lang_tree_files=@lang_tree_files@

36

@@ -814,10 +810,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y

37

DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, ""

38

DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty

39

REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX]

40

+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]

41

42

BASEVER_c := $(shell cat $(BASEVER))

43

DEVPHASE_c := $(shell cat $(DEVPHASE))

44

DATESTAMP_c := $(shell cat $(DATESTAMP))

45

+LINAROVER_c := $(shell cat $(LINAROVER))

46

47

ifeq (,$(wildcard $(REVISION)))

48

REVISION_c :=

49

@@ -844,6 +842,7 @@ DATESTAMP_s := \

50

"\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""

51

PKGVERSION_s:= "\"@PKGVERSION@\""

52

BUGURL_s := "\"@REPORT_BUGS_TO@\""

53

+LINAROVER_s := "\"$(LINAROVER_c)\""

54

55

PKGVERSION := @PKGVERSION@

56

BUGURL_TEXI := @REPORT_BUGS_TEXI@

57

@@ -2623,8 +2622,9 @@ PREPROCESSOR_DEFINES = \

58

-DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \

59

@TARGET_SYSTEM_ROOT_DEFINE@

60

61

-CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)

62

-cppbuiltin.o: $(BASEVER)

63

+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \

64

+ -DLINAROVER=$(LINAROVER_s)

65

+cppbuiltin.o: $(BASEVER) $(LINAROVER)

66

67

CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)

68

69

@@ -3736,7 +3736,9 @@ check_p_subdirs=$(wordlist 1,$(check_p_count),$(wordlist 1, \

70

#

71

# To parallelize some language check, add the corresponding check-$lang

72

# to lang_checks_parallelized variable and define check_$lang_parallelize

73

-# variable (see above check_gcc_parallelize description).

74

+# variable. This is the upper limit to which it is useful to parallelize the

75

+# check-$lang target. It doesn't make sense to try e.g. 128 goals for small

76

+# testsuites like objc or go.

77

$(lang_checks_parallelized): check-% : site.exp

78

-rm -rf $(TESTSUITEDIR)/$*-parallel

79

@if [ "$(filter -j, $(MFLAGS))" = "-j" ]; then \

80

--- a/src/gcc/ada/gcc-interface/Make-lang.in

81

+++ b/src/gcc/ada/gcc-interface/Make-lang.in

82

@@ -811,6 +811,7 @@ ada.mostlyclean:

83

-$(RM) ada/*$(coverageexts)

84

-$(RM) ada/sdefault.adb ada/stamp-sdefault ada/stamp-snames

85

-$(RMDIR) ada/tools

86

+ -$(RM) gnatbind$(exeext) gnat1$(exeext)

87

ada.clean:

88

ada.distclean:

89

-$(RM) ada/Makefile

90

--- a/src/gcc/c/Make-lang.in

91

+++ b/src/gcc/c/Make-lang.in

92

@@ -95,6 +95,8 @@ c.srcman:

93

# List of targets that can use the generic check- rule and its // variant.

94

lang_checks += check-gcc

95

lang_checks_parallelized += check-gcc

96

+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.

97

+check_gcc_parallelize=10000

98

99

# 'make check' in gcc/ looks for check-c. Redirect it to check-gcc.

100

check-c : check-gcc

101

--- a/src/gcc/combine.c

102

+++ b/src/gcc/combine.c

103

@@ -1650,6 +1650,73 @@ setup_incoming_promotions (rtx_insn *first)

104

}

105

}

106

107

+#ifdef SHORT_IMMEDIATES_SIGN_EXTEND

108

+/* If MODE has a precision lower than PREC and SRC is a non-negative constant

109

+ that would appear negative in MODE, sign-extend SRC for use in nonzero_bits

110

+ because some machines (maybe most) will actually do the sign-extension and

111

+ this is the conservative approach.

112

+

113

+ ??? For 2.5, try to tighten up the MD files in this regard instead of this

114

+ kludge. */

115

+

116

+static rtx

117

+sign_extend_short_imm (rtx src, machine_mode mode, unsigned int prec)

118

+{

119

+ if (GET_MODE_PRECISION (mode) < prec

120

+ && CONST_INT_P (src)

121

+ && INTVAL (src) > 0

122

+ && val_signbit_known_set_p (mode, INTVAL (src)))

123

+ src = GEN_INT (INTVAL (src) | ~GET_MODE_MASK (mode));

124

+

125

+ return src;

126

+}

127

+#endif

128

+

129

+/* Update RSP for pseudo-register X from INSN's REG_EQUAL note (if one exists)

130

+ and SET. */

131

+

132

+static void

133

+update_rsp_from_reg_equal (reg_stat_type *rsp, rtx_insn *insn, const_rtx set,

134

+ rtx x)

135

+{

136

+ rtx reg_equal_note = insn ? find_reg_equal_equiv_note (insn) : NULL_RTX;

137

+ unsigned HOST_WIDE_INT bits = 0;

138

+ rtx reg_equal = NULL, src = SET_SRC (set);

139

+ unsigned int num = 0;

140

+

141

+ if (reg_equal_note)

142

+ reg_equal = XEXP (reg_equal_note, 0);

143

+

144

+#ifdef SHORT_IMMEDIATES_SIGN_EXTEND

145

+ src = sign_extend_short_imm (src, GET_MODE (x), BITS_PER_WORD);

146

+ if (reg_equal)

147

+ reg_equal = sign_extend_short_imm (reg_equal, GET_MODE (x), BITS_PER_WORD);

148

+#endif

149

+

150

+ /* Don't call nonzero_bits if it cannot change anything. */

151

+ if (rsp->nonzero_bits != ~(unsigned HOST_WIDE_INT) 0)

152

+ {

153

+ bits = nonzero_bits (src, nonzero_bits_mode);

154

+ if (reg_equal && bits)

155

+ bits &= nonzero_bits (reg_equal, nonzero_bits_mode);

156

+ rsp->nonzero_bits |= bits;

157

+ }

158

+

159

+ /* Don't call num_sign_bit_copies if it cannot change anything. */

160

+ if (rsp->sign_bit_copies != 1)

161

+ {

162

+ num = num_sign_bit_copies (SET_SRC (set), GET_MODE (x));

163

+ if (reg_equal && num != GET_MODE_PRECISION (GET_MODE (x)))

164

+ {

165

+ unsigned int numeq = num_sign_bit_copies (reg_equal, GET_MODE (x));

166

+ if (num == 0 || numeq > num)

167

+ num = numeq;

168

+ }

169

+ if (rsp->sign_bit_copies == 0 || num < rsp->sign_bit_copies)

170

+ rsp->sign_bit_copies = num;

171

+ }

172

+}

173

+

174

/* Called via note_stores. If X is a pseudo that is narrower than

175

HOST_BITS_PER_WIDE_INT and is being set, record what bits are known zero.

176

177

@@ -1665,7 +1732,6 @@ static void

178

set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data)

179

{

180

rtx_insn *insn = (rtx_insn *) data;

181

- unsigned int num;

182

183

if (REG_P (x)

184

&& REGNO (x) >= FIRST_PSEUDO_REGISTER

185

@@ -1725,34 +1791,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data)

186

if (SET_DEST (set) == x

187

|| (paradoxical_subreg_p (SET_DEST (set))

188

&& SUBREG_REG (SET_DEST (set)) == x))

189

- {

190

- rtx src = SET_SRC (set);

191

-

192

-#ifdef SHORT_IMMEDIATES_SIGN_EXTEND

193

- /* If X is narrower than a word and SRC is a non-negative

194

- constant that would appear negative in the mode of X,

195

- sign-extend it for use in reg_stat[].nonzero_bits because some

196

- machines (maybe most) will actually do the sign-extension

197

- and this is the conservative approach.

198

-

199

- ??? For 2.5, try to tighten up the MD files in this regard

200

- instead of this kludge. */

201

-

202

- if (GET_MODE_PRECISION (GET_MODE (x)) < BITS_PER_WORD

203

- && CONST_INT_P (src)

204

- && INTVAL (src) > 0

205

- && val_signbit_known_set_p (GET_MODE (x), INTVAL (src)))

206

- src = GEN_INT (INTVAL (src) | ~GET_MODE_MASK (GET_MODE (x)));

207

-#endif

208

-

209

- /* Don't call nonzero_bits if it cannot change anything. */

210

- if (rsp->nonzero_bits != ~(unsigned HOST_WIDE_INT) 0)

211

- rsp->nonzero_bits |= nonzero_bits (src, nonzero_bits_mode);

212

- num = num_sign_bit_copies (SET_SRC (set), GET_MODE (x));

213

- if (rsp->sign_bit_copies == 0

214

- || rsp->sign_bit_copies > num)

215

- rsp->sign_bit_copies = num;

216

- }

217

+ update_rsp_from_reg_equal (rsp, insn, set, x);

218

else

219

{

220

rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x));

221

@@ -1914,6 +1953,15 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED,

222

set = expand_field_assignment (set);

223

src = SET_SRC (set), dest = SET_DEST (set);

224

225

+ /* Do not eliminate user-specified register if it is in an

226

+ asm input because we may break the register asm usage defined

227

+ in GCC manual if allow to do so.

228

+ Be aware that this may cover more cases than we expect but this

229

+ should be harmless. */

230

+ if (REG_P (dest) && REG_USERVAR_P (dest) && HARD_REGISTER_P (dest)

231

+ && extract_asm_operands (PATTERN (i3)))

232

+ return 0;

233

+

234

/* Don't eliminate a store in the stack pointer. */

235

if (dest == stack_pointer_rtx

236

/* Don't combine with an insn that sets a register to itself if it has

237

@@ -7723,9 +7771,8 @@ extract_left_shift (rtx x, int count)

238

We try, as much as possible, to re-use rtl expressions to save memory.

239

240

IN_CODE says what kind of expression we are processing. Normally, it is

241

- SET. In a memory address (inside a MEM, PLUS or minus, the latter two

242

- being kludges), it is MEM. When processing the arguments of a comparison

243

- or a COMPARE against zero, it is COMPARE. */

244

+ SET. In a memory address it is MEM. When processing the arguments of

245

+ a comparison or a COMPARE against zero, it is COMPARE. */

246

247

rtx

248

make_compound_operation (rtx x, enum rtx_code in_code)

249

@@ -7745,8 +7792,6 @@ make_compound_operation (rtx x, enum rtx_code in_code)

250

but once inside, go back to our default of SET. */

251

252

next_code = (code == MEM ? MEM

253

- : ((code == PLUS || code == MINUS)

254

- && SCALAR_INT_MODE_P (mode)) ? MEM

255

: ((code == COMPARE || COMPARISON_P (x))

256

&& XEXP (x, 1) == const0_rtx) ? COMPARE

257

: in_code == COMPARE ? SET : in_code);

258

@@ -9797,20 +9842,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode,

259

if (tem)

260

{

261

#ifdef SHORT_IMMEDIATES_SIGN_EXTEND

262

- /* If X is narrower than MODE and TEM is a non-negative

263

- constant that would appear negative in the mode of X,

264

- sign-extend it for use in reg_nonzero_bits because some

265

- machines (maybe most) will actually do the sign-extension

266

- and this is the conservative approach.

267

-

268

- ??? For 2.5, try to tighten up the MD files in this regard

269

- instead of this kludge. */

270

-

271

- if (GET_MODE_PRECISION (GET_MODE (x)) < GET_MODE_PRECISION (mode)

272

- && CONST_INT_P (tem)

273

- && INTVAL (tem) > 0

274

- && val_signbit_known_set_p (GET_MODE (x), INTVAL (tem)))

275

- tem = GEN_INT (INTVAL (tem) | ~GET_MODE_MASK (GET_MODE (x)));

276

+ tem = sign_extend_short_imm (tem, GET_MODE (x),

277

+ GET_MODE_PRECISION (mode));

278

#endif

279

return tem;

280

}

281

--- a/src/gcc/config.gcc

282

+++ b/src/gcc/config.gcc

283

@@ -575,7 +575,7 @@ case ${target} in

284

esac

285

286

# Common C libraries.

287

-tm_defines="$tm_defines LIBC_GLIBC=1 LIBC_UCLIBC=2 LIBC_BIONIC=3"

288

+tm_defines="$tm_defines LIBC_GLIBC=1 LIBC_UCLIBC=2 LIBC_BIONIC=3 LIBC_MUSL=4"

289

290

# 32-bit x86 processors supported by --with-arch=. Each processor

291

# MUST be separated by exactly one space.

292

@@ -720,6 +720,9 @@ case ${target} in

293

*-*-*uclibc*)

294

tm_defines="$tm_defines DEFAULT_LIBC=LIBC_UCLIBC"

295

;;

296

+ *-*-*musl*)

297

+ tm_defines="$tm_defines DEFAULT_LIBC=LIBC_MUSL"

298

+ ;;

299

*)

300

tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC"

301

;;

302

--- a/src/gcc/config.host

303

+++ b/src/gcc/config.host

304

@@ -99,6 +99,14 @@ case ${host} in

305

esac

306

307

case ${host} in

308

+ aarch64*-*-linux*)

309

+ case ${target} in

310

+ aarch64*-*-*)

311

+ host_extra_gcc_objs="driver-aarch64.o"

312

+ host_xmake_file="${host_xmake_file} aarch64/x-aarch64"

313

+ ;;

314

+ esac

315

+ ;;

316

arm*-*-freebsd* | arm*-*-linux*)

317

case ${target} in

318

arm*-*-*)

319

--- a/src/gcc/config/aarch64/aarch64-cores.def

320

+++ b/src/gcc/config/aarch64/aarch64-cores.def

321

@@ -21,7 +21,7 @@

322

323

Before using #include to read this file, define a macro:

324

325

- AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS)

326

+ AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS, IMP, PART)

327

328

The CORE_NAME is the name of the core, represented as a string constant.

329

The CORE_IDENT is the name of the core, represented as an identifier.

330

@@ -30,18 +30,23 @@

331

ARCH is the architecture revision implemented by the chip.

332

FLAGS are the bitwise-or of the traits that apply to that core.

333

This need not include flags implied by the architecture.

334

- COSTS is the name of the rtx_costs routine to use. */

335

+ COSTS is the name of the rtx_costs routine to use.

336

+ IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can

337

+ be found in /proc/cpuinfo.

338

+ PART is the part number of the CPU. On a GNU/Linux system it can be found

339

+ in /proc/cpuinfo. For big.LITTLE systems this should have the form at of

340

+ "<big core part number>.<LITTLE core part number>". */

341

342

/* V8 Architecture Processors. */

343

344

-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)

345

-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)

346

-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)

347

-AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)

348

-AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)

349

-AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1)

350

+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")

351

+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")

352

+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08")

353

+AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x53", "0x001")

354

+AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")

355

+AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")

356

357

/* V8 big.LITTLE implementations. */

358

359

-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)

360

-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)

361

+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")

362

+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03")

363

--- a/src/gcc/config/aarch64/aarch64-cost-tables.h

364

+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h

365

@@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs =

366

0, /* N/A: Stm_regs_per_insn_subsequent. */

367

0, /* Storef. */

368

0, /* Stored. */

369

- COSTS_N_INSNS (1) /* Store_unaligned. */

370

+ COSTS_N_INSNS (1), /* Store_unaligned. */

371

+ COSTS_N_INSNS (1), /* Loadv. */

372

+ COSTS_N_INSNS (1) /* Storev. */

373

},

374

{

375

/* FP SFmode */

376

--- a/src/gcc/config/aarch64/aarch64-elf.h

377

+++ b/src/gcc/config/aarch64/aarch64-elf.h

378

@@ -132,7 +132,8 @@

379

#undef DRIVER_SELF_SPECS

380

#define DRIVER_SELF_SPECS \

381

" %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \

382

- " %{!mabi=*:" ABI_SPEC "}"

383

+ " %{!mabi=*:" ABI_SPEC "}" \

384

+ MCPU_MTUNE_NATIVE_SPECS

385

386

#ifdef HAVE_AS_MABI_OPTION

387

#define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}"

388

--- a/src/gcc/config/aarch64/aarch64-linux.h

389

+++ b/src/gcc/config/aarch64/aarch64-linux.h

390

@@ -23,6 +23,9 @@

391

392

#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"

393

394

+#undef MUSL_DYNAMIC_LINKER

395

+#define MUSL_DYNAMIC_LINKER "/lib/ld-musl-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"

396

+

397

#undef ASAN_CC1_SPEC

398

#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"

399

400

--- a/src/gcc/config/aarch64/aarch64-option-extensions.def

401

+++ b/src/gcc/config/aarch64/aarch64-option-extensions.def

402

@@ -21,18 +21,25 @@

403

404

Before using #include to read this file, define a macro:

405

406

- AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF)

407

+ AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING)

408

409

EXT_NAME is the name of the extension, represented as a string constant.

410

FLAGS_ON are the bitwise-or of the features that the extension adds.

411

- FLAGS_OFF are the bitwise-or of the features that the extension removes. */

412

+ FLAGS_OFF are the bitwise-or of the features that the extension removes.

413

+ FEAT_STRING is a string containing the entries in the 'Features' field of

414

+ /proc/cpuinfo on a GNU/Linux system that correspond to this architecture

415

+ extension being available. Sometimes multiple entries are needed to enable

416

+ the extension (for example, the 'crypto' extension depends on four

417

+ entries: aes, pmull, sha1, sha2 being present). In that case this field

418

+ should contain a whitespace-separated list of the strings in 'Features'

419

+ that are required. Their order is not important. */

420

421

/* V8 Architecture Extensions.

422

This list currently contains example extensions for CPUs that implement

423

AArch64, and therefore serves as a template for adding more CPUs in the

424

future. */

425

426

-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO)

427

-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO)

428

-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO)

429

-AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC)

430

+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")

431

+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd")

432

+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")

433

+AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32")

434

--- a/src/gcc/config/aarch64/aarch64-opts.h

435

+++ b/src/gcc/config/aarch64/aarch64-opts.h

436

@@ -25,7 +25,7 @@

437

/* The various cores that implement AArch64. */

438

enum aarch64_processor

439

{

440

-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \

441

+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \

442

INTERNAL_IDENT,

443

#include "aarch64-cores.def"

444

#undef AARCH64_CORE

445

--- a/src/gcc/config/aarch64/aarch64-protos.h

446

+++ b/src/gcc/config/aarch64/aarch64-protos.h

447

@@ -162,12 +162,20 @@ struct cpu_vector_cost

448

const int cond_not_taken_branch_cost; /* Cost of not taken branch. */

449

};

450

451

+/* Branch costs. */

452

+struct cpu_branch_cost

453

+{

454

+ const int predictable; /* Predictable branch or optimizing for size. */

455

+ const int unpredictable; /* Unpredictable branch or optimizing for speed. */

456

+};

457

+

458

struct tune_params

459

{

460

const struct cpu_cost_table *const insn_extra_cost;

461

const struct cpu_addrcost_table *const addr_cost;

462

const struct cpu_regmove_cost *const regmove_cost;

463

const struct cpu_vector_cost *const vec_costs;

464

+ const struct cpu_branch_cost *const branch_costs;

465

const int memmov_cost;

466

const int issue_rate;

467

const unsigned int fuseable_ops;

468

@@ -177,11 +185,14 @@ struct tune_params

469

const int int_reassoc_width;

470

const int fp_reassoc_width;

471

const int vec_reassoc_width;

472

+ const int min_div_recip_mul_sf;

473

+ const int min_div_recip_mul_df;

474

};

475

476

HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);

477

int aarch64_get_condition_code (rtx);

478

bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);

479

+int aarch64_branch_cost (bool, bool);

480

enum aarch64_symbol_type

481

aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);

482

bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);

483

@@ -264,12 +275,6 @@ void init_aarch64_simd_builtins (void);

484

485

void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int);

486

487

-/* Emit code to place a AdvSIMD pair result in memory locations (with equal

488

- registers). */

489

-void aarch64_simd_emit_pair_result_insn (machine_mode,

490

- rtx (*intfn) (rtx, rtx, rtx), rtx,

491

- rtx);

492

-

493

/* Expand builtins for SIMD intrinsics. */

494

rtx aarch64_simd_expand_builtin (int, tree, rtx);

495

496

--- a/src/gcc/config/aarch64/aarch64-simd.md

497

+++ b/src/gcc/config/aarch64/aarch64-simd.md

498

@@ -2057,13 +2057,13 @@

499

})

500

501

(define_expand "aarch64_vcond_internal<mode><mode>"

502

- [(set (match_operand:VDQ_I 0 "register_operand")

503

- (if_then_else:VDQ_I

504

+ [(set (match_operand:VSDQ_I_DI 0 "register_operand")

505

+ (if_then_else:VSDQ_I_DI

506

(match_operator 3 "comparison_operator"

507

- [(match_operand:VDQ_I 4 "register_operand")

508

- (match_operand:VDQ_I 5 "nonmemory_operand")])

509

- (match_operand:VDQ_I 1 "nonmemory_operand")

510

- (match_operand:VDQ_I 2 "nonmemory_operand")))]

511

+ [(match_operand:VSDQ_I_DI 4 "register_operand")

512

+ (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])

513

+ (match_operand:VSDQ_I_DI 1 "nonmemory_operand")

514

+ (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]

515

"TARGET_SIMD"

516

{

517

rtx op1 = operands[1];

518

@@ -2365,13 +2365,13 @@

519

})

520

521

(define_expand "vcond<mode><mode>"

522

- [(set (match_operand:VALL 0 "register_operand")

523

- (if_then_else:VALL

524

+ [(set (match_operand:VALLDI 0 "register_operand")

525

+ (if_then_else:VALLDI

526

(match_operator 3 "comparison_operator"

527

- [(match_operand:VALL 4 "register_operand")

528

- (match_operand:VALL 5 "nonmemory_operand")])

529

- (match_operand:VALL 1 "nonmemory_operand")

530

- (match_operand:VALL 2 "nonmemory_operand")))]

531

+ [(match_operand:VALLDI 4 "register_operand")

532

+ (match_operand:VALLDI 5 "nonmemory_operand")])

533

+ (match_operand:VALLDI 1 "nonmemory_operand")

534

+ (match_operand:VALLDI 2 "nonmemory_operand")))]

535

"TARGET_SIMD"

536

{

537

emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],

538

@@ -2398,13 +2398,13 @@

539

})

540

541

(define_expand "vcondu<mode><mode>"

542

- [(set (match_operand:VDQ_I 0 "register_operand")

543

- (if_then_else:VDQ_I

544

+ [(set (match_operand:VSDQ_I_DI 0 "register_operand")

545

+ (if_then_else:VSDQ_I_DI

546

(match_operator 3 "comparison_operator"

547

- [(match_operand:VDQ_I 4 "register_operand")

548

- (match_operand:VDQ_I 5 "nonmemory_operand")])

549

- (match_operand:VDQ_I 1 "nonmemory_operand")

550

- (match_operand:VDQ_I 2 "nonmemory_operand")))]

551

+ [(match_operand:VSDQ_I_DI 4 "register_operand")

552

+ (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])

553

+ (match_operand:VSDQ_I_DI 1 "nonmemory_operand")

554

+ (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]

555

"TARGET_SIMD"

556

{

557

emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],

558

@@ -3955,6 +3955,7 @@

559

[(set_attr "type" "neon_store2_2reg<q>")]

560

)

561

562

+;; RTL uses GCC vector extension indices, so flip only for assembly.

563

(define_insn "vec_store_lanesoi_lane<mode>"

564

[(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")

565

(unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")

566

@@ -3962,7 +3963,10 @@

567

(match_operand:SI 2 "immediate_operand" "i")]

568

UNSPEC_ST2_LANE))]

569

"TARGET_SIMD"

570

- "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"

571

+ {

572

+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));

573

+ return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";

574

+ }

575

[(set_attr "type" "neon_store3_one_lane<q>")]

576

)

577

578

@@ -4046,6 +4050,7 @@

579

[(set_attr "type" "neon_store3_3reg<q>")]

580

)

581

582

+;; RTL uses GCC vector extension indices, so flip only for assembly.

583

(define_insn "vec_store_lanesci_lane<mode>"

584

[(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")

585

(unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")

586

@@ -4053,7 +4058,10 @@

587

(match_operand:SI 2 "immediate_operand" "i")]

588

UNSPEC_ST3_LANE))]

589

"TARGET_SIMD"

590

- "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"

591

+ {

592

+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));

593

+ return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";

594

+ }

595

[(set_attr "type" "neon_store3_one_lane<q>")]

596

)

597

598

@@ -4137,6 +4145,7 @@

599

[(set_attr "type" "neon_store4_4reg<q>")]

600

)

601

602

+;; RTL uses GCC vector extension indices, so flip only for assembly.

603

(define_insn "vec_store_lanesxi_lane<mode>"

604

[(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")

605

(unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")

606

@@ -4144,7 +4153,10 @@

607

(match_operand:SI 2 "immediate_operand" "i")]

608

UNSPEC_ST4_LANE))]

609

"TARGET_SIMD"

610

- "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"

611

+ {

612

+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));

613

+ return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";

614

+ }

615

[(set_attr "type" "neon_store4_one_lane<q>")]

616

)

617

618

--- a/src/gcc/config/aarch64/aarch64.c

619

+++ b/src/gcc/config/aarch64/aarch64.c

620

@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =

621

#define AARCH64_FUSE_ADRP_LDR (1 << 3)

622

#define AARCH64_FUSE_CMP_BRANCH (1 << 4)

623

624

+/* Generic costs for branch instructions. */

625

+static const struct cpu_branch_cost generic_branch_cost =

626

+{

627

+ 2, /* Predictable. */

628

+ 2 /* Unpredictable. */

629

+};

630

+

631

static const struct tune_params generic_tunings =

632

{

633

&cortexa57_extra_costs,

634

&generic_addrcost_table,

635

&generic_regmove_cost,

636

&generic_vector_cost,

637

+ &generic_branch_cost,

638

4, /* memmov_cost */

639

2, /* issue_rate */

640

AARCH64_FUSE_NOTHING, /* fuseable_ops */

641

@@ -353,7 +361,9 @@ static const struct tune_params generic_tunings =

642

4, /* loop_align. */

643

2, /* int_reassoc_width. */

644

4, /* fp_reassoc_width. */

645

- 1 /* vec_reassoc_width. */

646

+ 1, /* vec_reassoc_width. */

647

+ 2, /* min_div_recip_mul_sf. */

648

+ 2 /* min_div_recip_mul_df. */

649

};

650

651

static const struct tune_params cortexa53_tunings =

652

@@ -362,6 +372,7 @@ static const struct tune_params cortexa53_tunings =

653

&generic_addrcost_table,

654

&cortexa53_regmove_cost,

655

&generic_vector_cost,

656

+ &generic_branch_cost,

657

4, /* memmov_cost */

658

2, /* issue_rate */

659

(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD

660

@@ -371,7 +382,9 @@ static const struct tune_params cortexa53_tunings =

661

4, /* loop_align. */

662

2, /* int_reassoc_width. */

663

4, /* fp_reassoc_width. */

664

- 1 /* vec_reassoc_width. */

665

+ 1, /* vec_reassoc_width. */

666

+ 2, /* min_div_recip_mul_sf. */

667

+ 2 /* min_div_recip_mul_df. */

668

};

669

670

static const struct tune_params cortexa57_tunings =

671

@@ -380,6 +393,7 @@ static const struct tune_params cortexa57_tunings =

672

&cortexa57_addrcost_table,

673

&cortexa57_regmove_cost,

674

&cortexa57_vector_cost,

675

+ &generic_branch_cost,

676

4, /* memmov_cost */

677

3, /* issue_rate */

678

(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD

679

@@ -389,7 +403,9 @@ static const struct tune_params cortexa57_tunings =

680

4, /* loop_align. */

681

2, /* int_reassoc_width. */

682

4, /* fp_reassoc_width. */

683

- 1 /* vec_reassoc_width. */

684

+ 1, /* vec_reassoc_width. */

685

+ 2, /* min_div_recip_mul_sf. */

686

+ 2 /* min_div_recip_mul_df. */

687

};

688

689

static const struct tune_params thunderx_tunings =

690

@@ -398,6 +414,7 @@ static const struct tune_params thunderx_tunings =

691

&generic_addrcost_table,

692

&thunderx_regmove_cost,

693

&generic_vector_cost,

694

+ &generic_branch_cost,

695

6, /* memmov_cost */

696

2, /* issue_rate */

697

AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */

698

@@ -406,7 +423,9 @@ static const struct tune_params thunderx_tunings =

699

8, /* loop_align. */

700

2, /* int_reassoc_width. */

701

4, /* fp_reassoc_width. */

702

- 1 /* vec_reassoc_width. */

703

+ 1, /* vec_reassoc_width. */

704

+ 2, /* min_div_recip_mul_sf. */

705

+ 2 /* min_div_recip_mul_df. */

706

};

707

708

static const struct tune_params xgene1_tunings =

709

@@ -415,6 +434,7 @@ static const struct tune_params xgene1_tunings =

710

&xgene1_addrcost_table,

711

&xgene1_regmove_cost,

712

&xgene1_vector_cost,

713

+ &generic_branch_cost,

714

6, /* memmov_cost */

715

4, /* issue_rate */

716

AARCH64_FUSE_NOTHING, /* fuseable_ops */

717

@@ -423,7 +443,9 @@ static const struct tune_params xgene1_tunings =

718

16, /* loop_align. */

719

2, /* int_reassoc_width. */

720

4, /* fp_reassoc_width. */

721

- 1 /* vec_reassoc_width. */

722

+ 1, /* vec_reassoc_width. */

723

+ 2, /* min_div_recip_mul_sf. */

724

+ 2 /* min_div_recip_mul_df. */

725

};

726

727

/* A processor implementing AArch64. */

728

@@ -440,7 +462,7 @@ struct processor

729

/* Processor cores implementing AArch64. */

730

static const struct processor all_cores[] =

731

{

732

-#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \

733

+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \

734

{NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},

735

#include "aarch64-cores.def"

736

#undef AARCH64_CORE

737

@@ -477,7 +499,7 @@ struct aarch64_option_extension

738

/* ISA extensions in AArch64. */

739

static const struct aarch64_option_extension all_extensions[] =

740

{

741

-#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \

742

+#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \

743

{NAME, FLAGS_ON, FLAGS_OFF},

744

#include "aarch64-option-extensions.def"

745

#undef AARCH64_OPT_EXTENSION

746

@@ -512,9 +534,11 @@ static const char * const aarch64_condition_codes[] =

747

};

748

749

static unsigned int

750

-aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)

751

+aarch64_min_divisions_for_recip_mul (enum machine_mode mode)

752

{

753

- return 2;

754

+ if (GET_MODE_UNIT_SIZE (mode) == 4)

755

+ return aarch64_tune_params->min_div_recip_mul_sf;

756

+ return aarch64_tune_params->min_div_recip_mul_df;

757

}

758

759

static int

760

@@ -4901,8 +4925,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)

761

case FP_REGS:

762

case FP_LO_REGS:

763

return

764

- aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :

765

- (GET_MODE_SIZE (mode) + 7) / 8;

766

+ aarch64_vector_mode_p (mode)

767

+ ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG

768

+ : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;

769

case STACK_REG:

770

return 1;

771

772

@@ -5157,9 +5182,18 @@ aarch64_strip_extend (rtx x)

773

return x;

774

}

775

776

+/* Return true iff CODE is a shift supported in combination

777

+ with arithmetic instructions. */

778

+

779

+static bool

780

+aarch64_shift_p (enum rtx_code code)

781

+{

782

+ return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;

783

+}

784

+

785

/* Helper function for rtx cost calculation. Calculate the cost of

786

- a MULT, which may be part of a multiply-accumulate rtx. Return

787

- the calculated cost of the expression, recursing manually in to

788

+ a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.

789

+ Return the calculated cost of the expression, recursing manually in to

790

operands where needed. */

791

792

static int

793

@@ -5169,7 +5203,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

794

const struct cpu_cost_table *extra_cost

795

= aarch64_tune_params->insn_extra_cost;

796

int cost = 0;

797

- bool maybe_fma = (outer == PLUS || outer == MINUS);

798

+ bool compound_p = (outer == PLUS || outer == MINUS);

799

machine_mode mode = GET_MODE (x);

800

801

gcc_checking_assert (code == MULT);

802

@@ -5184,24 +5218,50 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

803

if (GET_MODE_CLASS (mode) == MODE_INT)

804

{

805

/* The multiply will be canonicalized as a shift, cost it as such. */

806

- if (CONST_INT_P (op1)

807

- && exact_log2 (INTVAL (op1)) > 0)

808

+ if (aarch64_shift_p (GET_CODE (x))

809

+ || (CONST_INT_P (op1)

810

+ && exact_log2 (INTVAL (op1)) > 0))

811

{

812

+ bool is_extend = GET_CODE (op0) == ZERO_EXTEND

813

+ || GET_CODE (op0) == SIGN_EXTEND;

814

if (speed)

815

{

816

- if (maybe_fma)

817

- /* ADD (shifted register). */

818

- cost += extra_cost->alu.arith_shift;

819

+ if (compound_p)

820

+ {

821

+ if (REG_P (op1))

822

+ /* ARITH + shift-by-register. */

823

+ cost += extra_cost->alu.arith_shift_reg;

824

+ else if (is_extend)

825

+ /* ARITH + extended register. We don't have a cost field

826

+ for ARITH+EXTEND+SHIFT, so use extend_arith here. */

827

+ cost += extra_cost->alu.extend_arith;

828

+ else

829

+ /* ARITH + shift-by-immediate. */

830

+ cost += extra_cost->alu.arith_shift;

831

+ }

832

else

833

/* LSL (immediate). */

834

- cost += extra_cost->alu.shift;

835

+ cost += extra_cost->alu.shift;

836

+

837

}

838

+ /* Strip extends as we will have costed them in the case above. */

839

+ if (is_extend)

840

+ op0 = aarch64_strip_extend (op0);

841

842

cost += rtx_cost (op0, GET_CODE (op0), 0, speed);

843

844

return cost;

845

}

846

847

+ /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a

848

+ compound and let the below cases handle it. After all, MNEG is a

849

+ special-case alias of MSUB. */

850

+ if (GET_CODE (op0) == NEG)

851

+ {

852

+ op0 = XEXP (op0, 0);

853

+ compound_p = true;

854

+ }

855

+

856

/* Integer multiplies or FMAs have zero/sign extending variants. */

857

if ((GET_CODE (op0) == ZERO_EXTEND

858

&& GET_CODE (op1) == ZERO_EXTEND)

859

@@ -5213,8 +5273,8 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

860

861

if (speed)

862

{

863

- if (maybe_fma)

864

- /* MADD/SMADDL/UMADDL. */

865

+ if (compound_p)

866

+ /* SMADDL/UMADDL/UMSUBL/SMSUBL. */

867

cost += extra_cost->mult[0].extend_add;

868

else

869

/* MUL/SMULL/UMULL. */

870

@@ -5224,15 +5284,15 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

871

return cost;

872

}

873

874

- /* This is either an integer multiply or an FMA. In both cases

875

+ /* This is either an integer multiply or a MADD. In both cases

876

we want to recurse and cost the operands. */

877

cost += rtx_cost (op0, MULT, 0, speed)

878

+ rtx_cost (op1, MULT, 1, speed);

879

880

if (speed)

881

{

882

- if (maybe_fma)

883

- /* MADD. */

884

+ if (compound_p)

885

+ /* MADD/MSUB. */

886

cost += extra_cost->mult[mode == DImode].add;

887

else

888

/* MUL. */

889

@@ -5250,7 +5310,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

890

which case FNMUL is different than FMUL with operand negation. */

891

bool neg0 = GET_CODE (op0) == NEG;

892

bool neg1 = GET_CODE (op1) == NEG;

893

- if (maybe_fma || !flag_rounding_math || (neg0 && neg1))

894

+ if (compound_p || !flag_rounding_math || (neg0 && neg1))

895

{

896

if (neg0)

897

op0 = XEXP (op0, 0);

898

@@ -5258,7 +5318,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)

899

op1 = XEXP (op1, 0);

900

}

901

902

- if (maybe_fma)

903

+ if (compound_p)

904

/* FMADD/FNMADD/FNMSUB/FMSUB. */

905

cost += extra_cost->fp[mode == DFmode].fma;

906

else

907

@@ -5367,6 +5427,23 @@ aarch64_address_cost (rtx x,

908

return cost;

909

}

910

911

+/* Return the cost of a branch. If SPEED_P is true then the compiler is

912

+ optimizing for speed. If PREDICTABLE_P is true then the branch is predicted

913

+ to be taken. */

914

+

915

+int

916

+aarch64_branch_cost (bool speed_p, bool predictable_p)

917

+{

918

+ /* When optimizing for speed, use the cost of unpredictable branches. */

919

+ const struct cpu_branch_cost *branch_costs =

920

+ aarch64_tune_params->branch_costs;

921

+

922

+ if (!speed_p || predictable_p)

923

+ return branch_costs->predictable;

924

+ else

925

+ return branch_costs->unpredictable;

926

+}

927

+

928

/* Return true if the RTX X in mode MODE is a zero or sign extract

929

usable in an ADD or SUB (extended register) instruction. */

930

static bool

931

@@ -5415,6 +5492,51 @@ aarch64_frint_unspec_p (unsigned int u)

932

}

933

}

934

935

+/* Return true iff X is an rtx that will match an extr instruction

936

+ i.e. as described in the *extr<mode>5_insn family of patterns.

937

+ OP0 and OP1 will be set to the operands of the shifts involved

938

+ on success and will be NULL_RTX otherwise. */

939

+

940

+static bool

941

+aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)

942

+{

943

+ rtx op0, op1;

944

+ machine_mode mode = GET_MODE (x);

945

+

946

+ *res_op0 = NULL_RTX;

947

+ *res_op1 = NULL_RTX;

948

+

949

+ if (GET_CODE (x) != IOR)

950

+ return false;

951

+

952

+ op0 = XEXP (x, 0);

953

+ op1 = XEXP (x, 1);

954

+

955

+ if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)

956

+ || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))

957

+ {

958

+ /* Canonicalise locally to ashift in op0, lshiftrt in op1. */

959

+ if (GET_CODE (op1) == ASHIFT)

960

+ std::swap (op0, op1);

961

+

962

+ if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))

963

+ return false;

964

+

965

+ unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));

966

+ unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));

967

+

968

+ if (shft_amnt_0 < GET_MODE_BITSIZE (mode)

969

+ && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))

970

+ {

971

+ *res_op0 = XEXP (op0, 0);

972

+ *res_op1 = XEXP (op1, 0);

973

+ return true;

974

+ }

975

+ }

976

+

977

+ return false;

978

+}

979

+

980

/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),

981

storing it in *COST. Result is true if the total cost of the operation

982

has now been calculated. */

983

@@ -5505,16 +5627,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

984

above this default. */

985

*cost = COSTS_N_INSNS (1);

986

987

- /* TODO: The cost infrastructure currently does not handle

988

- vector operations. Assume that all vector operations

989

- are equally expensive. */

990

- if (VECTOR_MODE_P (mode))

991

- {

992

- if (speed)

993

- *cost += extra_cost->vect.alu;

994

- return true;

995

- }

996

-

997

switch (code)

998

{

999

case SET:

1000

@@ -5529,7 +5641,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1001

if (speed)

1002

{

1003

rtx address = XEXP (op0, 0);

1004

- if (GET_MODE_CLASS (mode) == MODE_INT)

1005

+ if (VECTOR_MODE_P (mode))

1006

+ *cost += extra_cost->ldst.storev;

1007

+ else if (GET_MODE_CLASS (mode) == MODE_INT)

1008

*cost += extra_cost->ldst.store;

1009

else if (mode == SFmode)

1010

*cost += extra_cost->ldst.storef;

1011

@@ -5550,15 +5664,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1012

1013

/* Fall through. */

1014

case REG:

1015

+ /* The cost is one per vector-register copied. */

1016

+ if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))

1017

+ {

1018

+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)

1019

+ / GET_MODE_SIZE (V4SImode);

1020

+ *cost = COSTS_N_INSNS (n_minus_1 + 1);

1021

+ }

1022

/* const0_rtx is in general free, but we will use an

1023

instruction to set a register to 0. */

1024

- if (REG_P (op1) || op1 == const0_rtx)

1025

- {

1026

- /* The cost is 1 per register copied. */

1027

- int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)

1028

+ else if (REG_P (op1) || op1 == const0_rtx)

1029

+ {

1030

+ /* The cost is 1 per register copied. */

1031

+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)

1032

/ UNITS_PER_WORD;

1033

- *cost = COSTS_N_INSNS (n_minus_1 + 1);

1034

- }

1035

+ *cost = COSTS_N_INSNS (n_minus_1 + 1);

1036

+ }

1037

else

1038

/* Cost is just the cost of the RHS of the set. */

1039

*cost += rtx_cost (op1, SET, 1, speed);

1040

@@ -5656,7 +5777,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1041

approximation for the additional cost of the addressing

1042

mode. */

1043

rtx address = XEXP (x, 0);

1044

- if (GET_MODE_CLASS (mode) == MODE_INT)

1045

+ if (VECTOR_MODE_P (mode))

1046

+ *cost += extra_cost->ldst.loadv;

1047

+ else if (GET_MODE_CLASS (mode) == MODE_INT)

1048

*cost += extra_cost->ldst.load;

1049

else if (mode == SFmode)

1050

*cost += extra_cost->ldst.loadf;

1051

@@ -5673,6 +5796,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1052

case NEG:

1053

op0 = XEXP (x, 0);

1054

1055

+ if (VECTOR_MODE_P (mode))

1056

+ {

1057

+ if (speed)

1058

+ {

1059

+ /* FNEG. */

1060

+ *cost += extra_cost->vect.alu;

1061

+ }

1062

+ return false;

1063

+ }

1064

+

1065

if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)

1066

{

1067

if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE

1068

@@ -5717,7 +5850,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1069

case CLRSB:

1070

case CLZ:

1071

if (speed)

1072

- *cost += extra_cost->alu.clz;

1073

+ {

1074

+ if (VECTOR_MODE_P (mode))

1075

+ *cost += extra_cost->vect.alu;

1076

+ else

1077

+ *cost += extra_cost->alu.clz;

1078

+ }

1079

1080

return false;

1081

1082

@@ -5796,12 +5934,27 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1083

1084

if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))

1085

{

1086

+ *cost += rtx_cost (op0, COMPARE, 0, speed);

1087

/* FCMP supports constant 0.0 for no extra cost. */

1088

return true;

1089

}

1090

return false;

1091

}

1092

1093

+ if (VECTOR_MODE_P (mode))

1094

+ {

1095

+ /* Vector compare. */

1096

+ if (speed)

1097

+ *cost += extra_cost->vect.alu;

1098

+

1099

+ if (aarch64_float_const_zero_rtx_p (op1))

1100

+ {

1101

+ /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra

1102

+ cost. */

1103

+ return true;

1104

+ }

1105

+ return false;

1106

+ }

1107

return false;

1108

1109

case MINUS:

1110

@@ -5810,6 +5963,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,

1111

op1 = XEXP (x, 1);

1112

1113

cost_minus:

1114

+ *cost += rtx_cost (op0, MINUS, 0, speed);

1115

+

1116

/* Detect valid immediates. */

1117

if ((GET_MODE_CLASS (mode) == MODE_INT

1118

|| (GET_MODE_CLASS (mode) == MODE_CC

1119

@@ -5817,20 +5972,17 @@ cost_minus:

1120

&& CONST_INT_P (op1)

1121

&& aarch64_uimm12_shift (INTVAL (op1)))

1122

{

1123

- *cost += rtx_cost (op0, MINUS, 0, speed);

1124

-

1125

if (speed)

1126

/* SUB(S) (immediate). */

1127

*cost += extra_cost->alu.arith;

1128

return true;

1129

-

1130

}

1131

1132

/* Look for SUB (extended register). */

1133

if (aarch64_rtx_arith_op_extract_p (op1, mode))

1134

{

1135

if (speed)

1136

- *cost += extra_cost->alu.arith_shift;

1137

+ *cost += extra_cost->alu.extend_arith;

1138

1139

*cost += rtx_cost (XEXP (XEXP (op1, 0), 0),

1140

(enum rtx_code) GET_CODE (op1),

1141

@@ -5842,13 +5994,12 @@ cost_minus:

1142

1143

/* Cost this as an FMA-alike operation. */

1144

if ((GET_CODE (new_op1) == MULT

1145

- || GET_CODE (new_op1) == ASHIFT)

1146

+ || aarch64_shift_p (GET_CODE (new_op1)))

1147

&& code != COMPARE)

1148

{

1149

*cost += aarch64_rtx_mult_cost (new_op1, MULT,

1150

(enum rtx_code) code,

1151

speed);

1152

- *cost += rtx_cost (op0, MINUS, 0, speed);

1153

return true;

1154

}

1155

1156

@@ -5856,12 +6007,21 @@ cost_minus:

1157

1158

if (speed)

1159

{

1160

- if (GET_MODE_CLASS (mode) == MODE_INT)

1161

- /* SUB(S). */

1162

- *cost += extra_cost->alu.arith;

1163

+ if (VECTOR_MODE_P (mode))

1164

+ {

1165

+ /* Vector SUB. */

1166

+ *cost += extra_cost->vect.alu;

1167

+ }

1168

+ else if (GET_MODE_CLASS (mode) == MODE_INT)

1169

+ {

1170

+ /* SUB(S). */

1171

+ *cost += extra_cost->alu.arith;

1172

+ }

1173

else if (GET_MODE_CLASS (mode) == MODE_FLOAT)

1174

- /* FSUB. */

1175

- *cost += extra_cost->fp[mode == DFmode].addsub;

1176

+ {

1177

+ /* FSUB. */

1178

+ *cost += extra_cost->fp[mode == DFmode].addsub;

1179

+ }

1180

}

1181

return true;

1182

}

1183

@@ -5895,11 +6055,13 @@ cost_plus:

1184

return true;

1185

}

1186

1187

+ *cost += rtx_cost (op1, PLUS, 1, speed);

1188

+

1189

/* Look for ADD (extended register). */

1190

if (aarch64_rtx_arith_op_extract_p (op0, mode))

1191

{

1192

if (speed)

1193

- *cost += extra_cost->alu.arith_shift;

1194

+ *cost += extra_cost->alu.extend_arith;

1195

1196

*cost += rtx_cost (XEXP (XEXP (op0, 0), 0),

1197

(enum rtx_code) GET_CODE (op0),

1198

@@ -5912,25 +6074,32 @@ cost_plus:

1199

new_op0 = aarch64_strip_extend (op0);

1200

1201

if (GET_CODE (new_op0) == MULT

1202

- || GET_CODE (new_op0) == ASHIFT)

1203

+ || aarch64_shift_p (GET_CODE (new_op0)))

1204

{

1205

*cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,

1206

speed);

1207

- *cost += rtx_cost (op1, PLUS, 1, speed);

1208

return true;

1209

}

1210

1211

- *cost += (rtx_cost (new_op0, PLUS, 0, speed)

1212

- + rtx_cost (op1, PLUS, 1, speed));

1213

+ *cost += rtx_cost (new_op0, PLUS, 0, speed);

1214

1215

if (speed)

1216

{

1217

- if (GET_MODE_CLASS (mode) == MODE_INT)

1218

- /* ADD. */

1219

- *cost += extra_cost->alu.arith;

1220

+ if (VECTOR_MODE_P (mode))

1221

+ {

1222

+ /* Vector ADD. */

1223

+ *cost += extra_cost->vect.alu;

1224

+ }

1225

+ else if (GET_MODE_CLASS (mode) == MODE_INT)

1226

+ {

1227

+ /* ADD. */

1228

+ *cost += extra_cost->alu.arith;

1229

+ }

1230

else if (GET_MODE_CLASS (mode) == MODE_FLOAT)

1231

- /* FADD. */

1232

- *cost += extra_cost->fp[mode == DFmode].addsub;

1233

+ {

1234

+ /* FADD. */

1235

+ *cost += extra_cost->fp[mode == DFmode].addsub;

1236

+ }

1237

}

1238

return true;

1239

}

1240

@@ -5939,8 +6108,12 @@ cost_plus:

1241

*cost = COSTS_N_INSNS (1);

1242

1243

if (speed)

1244

- *cost += extra_cost->alu.rev;

1245

-

1246

+ {

1247

+ if (VECTOR_MODE_P (mode))

1248

+ *cost += extra_cost->vect.alu;

1249

+ else

1250

+ *cost += extra_cost->alu.rev;

1251

+ }

1252

return false;

1253

1254

case IOR:

1255

@@ -5948,8 +6121,22 @@ cost_plus:

1256

{

1257

*cost = COSTS_N_INSNS (1);

1258

1259

+ if (speed)

1260

+ {

1261

+ if (VECTOR_MODE_P (mode))

1262

+ *cost += extra_cost->vect.alu;

1263

+ else

1264

+ *cost += extra_cost->alu.rev;

1265

+ }

1266

+ return true;

1267

+ }

1268

+

1269

+ if (aarch64_extr_rtx_p (x, &op0, &op1))

1270

+ {

1271

+ *cost += rtx_cost (op0, IOR, 0, speed)

1272

+ + rtx_cost (op1, IOR, 1, speed);

1273

if (speed)

1274

- *cost += extra_cost->alu.rev;

1275

+ *cost += extra_cost->alu.shift;

1276

1277

return true;

1278

}

1279

@@ -5960,6 +6147,13 @@ cost_plus:

1280

op0 = XEXP (x, 0);

1281

op1 = XEXP (x, 1);

1282

1283

+ if (VECTOR_MODE_P (mode))

1284

+ {

1285

+ if (speed)

1286

+ *cost += extra_cost->vect.alu;

1287

+ return true;

1288

+ }

1289

+

1290

if (code == AND

1291

&& GET_CODE (op0) == MULT

1292

&& CONST_INT_P (XEXP (op0, 1))

1293

@@ -6025,13 +6219,52 @@ cost_plus:

1294

return false;

1295

1296

case NOT:

1297

+ x = XEXP (x, 0);

1298

+ op0 = aarch64_strip_shift (x);

1299

+

1300

+ if (VECTOR_MODE_P (mode))

1301

+ {

1302

+ /* Vector NOT. */

1303

+ *cost += extra_cost->vect.alu;

1304

+ return false;

1305

+ }

1306

+

1307

+ /* MVN-shifted-reg. */

1308

+ if (op0 != x)

1309

+ {

1310

+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);

1311

+

1312

+ if (speed)

1313

+ *cost += extra_cost->alu.log_shift;

1314

+

1315

+ return true;

1316

+ }

1317

+ /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).

1318

+ Handle the second form here taking care that 'a' in the above can

1319

+ be a shift. */

1320

+ else if (GET_CODE (op0) == XOR)

1321

+ {

1322

+ rtx newop0 = XEXP (op0, 0);

1323

+ rtx newop1 = XEXP (op0, 1);

1324

+ rtx op0_stripped = aarch64_strip_shift (newop0);

1325

+

1326

+ *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)

1327

+ + rtx_cost (op0_stripped, XOR, 0, speed);

1328

+

1329

+ if (speed)

1330

+ {

1331

+ if (op0_stripped != newop0)

1332

+ *cost += extra_cost->alu.log_shift;

1333

+ else

1334

+ *cost += extra_cost->alu.logical;

1335

+ }

1336

+

1337

+ return true;

1338

+ }

1339

/* MVN. */

1340

if (speed)

1341

*cost += extra_cost->alu.logical;

1342

1343

- /* The logical instruction could have the shifted register form,

1344

- but the cost is the same if the shift is processed as a separate

1345

- instruction, so we don't bother with it here. */

1346

return false;

1347

1348

case ZERO_EXTEND:

1349

@@ -6067,10 +6300,19 @@ cost_plus:

1350

return true;

1351

}

1352

1353

- /* UXTB/UXTH. */

1354

if (speed)

1355

- *cost += extra_cost->alu.extend;

1356

-

1357

+ {

1358

+ if (VECTOR_MODE_P (mode))

1359

+ {

1360

+ /* UMOV. */

1361

+ *cost += extra_cost->vect.alu;

1362

+ }

1363

+ else

1364

+ {

1365

+ /* UXTB/UXTH. */

1366

+ *cost += extra_cost->alu.extend;

1367

+ }

1368

+ }

1369

return false;

1370

1371

case SIGN_EXTEND:

1372

@@ -6090,7 +6332,12 @@ cost_plus:

1373

}

1374

1375

if (speed)

1376

- *cost += extra_cost->alu.extend;

1377

+ {

1378

+ if (VECTOR_MODE_P (mode))

1379

+ *cost += extra_cost->vect.alu;

1380

+ else

1381

+ *cost += extra_cost->alu.extend;

1382

+ }

1383

return false;

1384

1385

case ASHIFT:

1386

@@ -6099,10 +6346,20 @@ cost_plus:

1387

1388

if (CONST_INT_P (op1))

1389

{

1390

- /* LSL (immediate), UBMF, UBFIZ and friends. These are all

1391

- aliases. */

1392

if (speed)

1393

- *cost += extra_cost->alu.shift;

1394

+ {

1395

+ if (VECTOR_MODE_P (mode))

1396

+ {

1397

+ /* Vector shift (immediate). */

1398

+ *cost += extra_cost->vect.alu;

1399

+ }

1400

+ else

1401

+ {

1402

+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all

1403

+ aliases. */

1404

+ *cost += extra_cost->alu.shift;

1405

+ }

1406

+ }

1407

1408

/* We can incorporate zero/sign extend for free. */

1409

if (GET_CODE (op0) == ZERO_EXTEND

1410

@@ -6114,10 +6371,19 @@ cost_plus:

1411

}

1412

else

1413

{

1414

- /* LSLV. */

1415

if (speed)

1416

- *cost += extra_cost->alu.shift_reg;

1417

-

1418

+ {

1419

+ if (VECTOR_MODE_P (mode))

1420

+ {

1421

+ /* Vector shift (register). */

1422

+ *cost += extra_cost->vect.alu;

1423

+ }

1424

+ else

1425

+ {

1426

+ /* LSLV. */

1427

+ *cost += extra_cost->alu.shift_reg;

1428

+ }

1429

+ }

1430

return false; /* All arguments need to be in registers. */

1431

}

1432

1433

@@ -6132,7 +6398,12 @@ cost_plus:

1434

{

1435

/* ASR (immediate) and friends. */

1436

if (speed)

1437

- *cost += extra_cost->alu.shift;

1438

+ {

1439

+ if (VECTOR_MODE_P (mode))

1440

+ *cost += extra_cost->vect.alu;

1441

+ else

1442

+ *cost += extra_cost->alu.shift;

1443

+ }

1444

1445

*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);

1446

return true;

1447

@@ -6142,8 +6413,12 @@ cost_plus:

1448

1449

/* ASR (register) and friends. */

1450

if (speed)

1451

- *cost += extra_cost->alu.shift_reg;

1452

-

1453

+ {

1454

+ if (VECTOR_MODE_P (mode))

1455

+ *cost += extra_cost->vect.alu;

1456

+ else

1457

+ *cost += extra_cost->alu.shift_reg;

1458

+ }

1459

return false; /* All arguments need to be in registers. */

1460

}

1461

1462

@@ -6191,7 +6466,12 @@ cost_plus:

1463

case SIGN_EXTRACT:

1464

/* UBFX/SBFX. */

1465

if (speed)

1466

- *cost += extra_cost->alu.bfx;

1467

+ {

1468

+ if (VECTOR_MODE_P (mode))

1469

+ *cost += extra_cost->vect.alu;

1470

+ else

1471

+ *cost += extra_cost->alu.bfx;

1472

+ }

1473

1474

/* We can trust that the immediates used will be correct (there

1475

are no by-register forms), so we need only cost op0. */

1476

@@ -6208,7 +6488,9 @@ cost_plus:

1477

case UMOD:

1478

if (speed)

1479

{

1480

- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)

1481

+ if (VECTOR_MODE_P (mode))

1482

+ *cost += extra_cost->vect.alu;

1483

+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)

1484

*cost += (extra_cost->mult[GET_MODE (x) == DImode].add

1485

+ extra_cost->mult[GET_MODE (x) == DImode].idiv);

1486

else if (GET_MODE (x) == DFmode)

1487

@@ -6225,7 +6507,9 @@ cost_plus:

1488

case SQRT:

1489

if (speed)

1490

{

1491

- if (GET_MODE_CLASS (mode) == MODE_INT)

1492

+ if (VECTOR_MODE_P (mode))

1493

+ *cost += extra_cost->vect.alu;

1494

+ else if (GET_MODE_CLASS (mode) == MODE_INT)

1495

/* There is no integer SQRT, so only DIV and UDIV can get

1496

here. */

1497

*cost += extra_cost->mult[mode == DImode].idiv;

1498

@@ -6257,7 +6541,12 @@ cost_plus:

1499

op2 = XEXP (x, 2);

1500

1501

if (speed)

1502

- *cost += extra_cost->fp[mode == DFmode].fma;

1503

+ {

1504

+ if (VECTOR_MODE_P (mode))

1505

+ *cost += extra_cost->vect.alu;

1506

+ else

1507

+ *cost += extra_cost->fp[mode == DFmode].fma;

1508

+ }

1509

1510

/* FMSUB, FNMADD, and FNMSUB are free. */

1511

if (GET_CODE (op0) == NEG)

1512

@@ -6295,14 +6584,36 @@ cost_plus:

1513

*cost += rtx_cost (op2, FMA, 2, speed);

1514

return true;

1515

1516

+ case FLOAT:

1517

+ case UNSIGNED_FLOAT:

1518

+ if (speed)

1519

+ *cost += extra_cost->fp[mode == DFmode].fromint;

1520

+ return false;

1521

+

1522

case FLOAT_EXTEND:

1523

if (speed)

1524

- *cost += extra_cost->fp[mode == DFmode].widen;

1525

+ {

1526

+ if (VECTOR_MODE_P (mode))

1527

+ {

1528

+ /*Vector truncate. */

1529

+ *cost += extra_cost->vect.alu;

1530

+ }

1531

+ else

1532

+ *cost += extra_cost->fp[mode == DFmode].widen;

1533

+ }

1534

return false;

1535

1536

case FLOAT_TRUNCATE:

1537

if (speed)

1538

- *cost += extra_cost->fp[mode == DFmode].narrow;

1539

+ {

1540

+ if (VECTOR_MODE_P (mode))

1541

+ {

1542

+ /*Vector conversion. */

1543

+ *cost += extra_cost->vect.alu;

1544

+ }

1545

+ else

1546

+ *cost += extra_cost->fp[mode == DFmode].narrow;

1547

+ }

1548

return false;

1549

1550

case FIX:

1551

@@ -6323,15 +6634,37 @@ cost_plus:

1552

}

1553

1554

if (speed)

1555

- *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;

1556

-

1557

+ {

1558

+ if (VECTOR_MODE_P (mode))

1559

+ *cost += extra_cost->vect.alu;

1560

+ else

1561

+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;

1562

+ }

1563

*cost += rtx_cost (x, (enum rtx_code) code, 0, speed);

1564

return true;

1565

1566

case ABS:

1567

- if (GET_MODE_CLASS (mode) == MODE_FLOAT)

1568

+ if (VECTOR_MODE_P (mode))

1569

{

1570

- /* FABS and FNEG are analogous. */

1571

+ /* ABS (vector). */

1572

+ if (speed)

1573

+ *cost += extra_cost->vect.alu;

1574

+ }

1575

+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)

1576

+ {

1577

+ op0 = XEXP (x, 0);

1578

+

1579

+ /* FABD, which is analogous to FADD. */

1580

+ if (GET_CODE (op0) == MINUS)

1581

+ {

1582

+ *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);

1583

+ + rtx_cost (XEXP (op0, 1), MINUS, 1, speed);

1584

+ if (speed)

1585

+ *cost += extra_cost->fp[mode == DFmode].addsub;

1586

+

1587

+ return true;

1588

+ }

1589

+ /* Simple FABS is analogous to FNEG. */

1590

if (speed)

1591

*cost += extra_cost->fp[mode == DFmode].neg;

1592

}

1593

@@ -6350,10 +6683,15 @@ cost_plus:

1594

case SMIN:

1595

if (speed)

1596

{

1597

- /* FMAXNM/FMINNM/FMAX/FMIN.

1598

- TODO: This may not be accurate for all implementations, but

1599

- we do not model this in the cost tables. */

1600

- *cost += extra_cost->fp[mode == DFmode].addsub;

1601

+ if (VECTOR_MODE_P (mode))

1602

+ *cost += extra_cost->vect.alu;

1603

+ else

1604

+ {

1605

+ /* FMAXNM/FMINNM/FMAX/FMIN.

1606

+ TODO: This may not be accurate for all implementations, but

1607

+ we do not model this in the cost tables. */

1608

+ *cost += extra_cost->fp[mode == DFmode].addsub;

1609

+ }

1610

}

1611

return false;

1612

1613

@@ -7830,6 +8168,26 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)

1614

return -1;

1615

}

1616

1617

+/* Return TRUE if the type, as described by TYPE and MODE, is a short vector

1618

+ type as described in AAPCS64 \S 4.1.2.

1619

+

1620

+ See the comment above aarch64_composite_type_p for the notes on MODE. */

1621

+

1622

+static bool

1623

+aarch64_short_vector_p (const_tree type,

1624

+ machine_mode mode)

1625

+{

1626

+ HOST_WIDE_INT size = -1;

1627

+

1628

+ if (type && TREE_CODE (type) == VECTOR_TYPE)

1629

+ size = int_size_in_bytes (type);

1630

+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT

1631

+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)

1632

+ size = GET_MODE_SIZE (mode);

1633

+

1634

+ return (size == 8 || size == 16);

1635

+}

1636

+

1637

/* Return TRUE if the type, as described by TYPE and MODE, is a composite

1638

type as described in AAPCS64 \S 4.3. This includes aggregate, union and

1639

array types. The C99 floating-point complex types are also considered

1640

@@ -7851,6 +8209,9 @@ static bool

1641

aarch64_composite_type_p (const_tree type,

1642

machine_mode mode)

1643

{

1644

+ if (aarch64_short_vector_p (type, mode))

1645

+ return false;

1646

+

1647

if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))

1648

return true;

1649

1650

@@ -7862,27 +8223,6 @@ aarch64_composite_type_p (const_tree type,

1651

return false;

1652

}

1653

1654

-/* Return TRUE if the type, as described by TYPE and MODE, is a short vector

1655

- type as described in AAPCS64 \S 4.1.2.

1656

-

1657

- See the comment above aarch64_composite_type_p for the notes on MODE. */

1658

-

1659

-static bool

1660

-aarch64_short_vector_p (const_tree type,

1661

- machine_mode mode)

1662

-{

1663

- HOST_WIDE_INT size = -1;

1664

-

1665

- if (type && TREE_CODE (type) == VECTOR_TYPE)

1666

- size = int_size_in_bytes (type);

1667

- else if (!aarch64_composite_type_p (type, mode)

1668

- && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT

1669

- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))

1670

- size = GET_MODE_SIZE (mode);

1671

-

1672

- return (size == 8 || size == 16) ? true : false;

1673

-}

1674

-

1675

/* Return TRUE if an argument, whose type is described by TYPE and MODE,

1676

shall be passed or returned in simd/fp register(s) (providing these

1677

parameter passing registers are available).

1678

@@ -8581,24 +8921,6 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,

1679

}

1680

}

1681

1682

-/* Emit code to place a AdvSIMD pair result in memory locations (with equal

1683

- registers). */

1684

-void

1685

-aarch64_simd_emit_pair_result_insn (machine_mode mode,

1686

- rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,

1687

- rtx op1)

1688

-{

1689

- rtx mem = gen_rtx_MEM (mode, destaddr);

1690

- rtx tmp1 = gen_reg_rtx (mode);

1691

- rtx tmp2 = gen_reg_rtx (mode);

1692

-

1693

- emit_insn (intfn (tmp1, op1, tmp2));

1694

-

1695

- emit_move_insn (mem, tmp1);

1696

- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));

1697

- emit_move_insn (mem, tmp2);

1698

-}

1699

-

1700

/* Return TRUE if OP is a valid vector addressing mode. */

1701

bool

1702

aarch64_simd_mem_operand_p (rtx op)

1703

@@ -8781,22 +9103,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)

1704

machine_mode mode = GET_MODE (target);

1705

machine_mode inner_mode = GET_MODE_INNER (mode);

1706

int n_elts = GET_MODE_NUNITS (mode);

1707

- int n_var = 0, one_var = -1;

1708

+ int n_var = 0;

1709

+ rtx any_const = NULL_RTX;

1710

bool all_same = true;

1711

- rtx x, mem;

1712

- int i;

1713

1714

- x = XVECEXP (vals, 0, 0);

1715

- if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))

1716

- n_var = 1, one_var = 0;

1717

-

1718

- for (i = 1; i < n_elts; ++i)

1719

+ for (int i = 0; i < n_elts; ++i)

1720

{

1721

- x = XVECEXP (vals, 0, i);

1722

+ rtx x = XVECEXP (vals, 0, i);

1723

if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))

1724

- ++n_var, one_var = i;

1725

+ ++n_var;

1726

+ else

1727

+ any_const = x;

1728

1729

- if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))

1730

+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))

1731

all_same = false;

1732

}

1733

1734

@@ -8813,36 +9132,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)

1735

/* Splat a single non-constant element if we can. */

1736

if (all_same)

1737

{

1738

- x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));

1739

+ rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));

1740

aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));

1741

return;

1742

}

1743

1744

- /* One field is non-constant. Load constant then overwrite varying

1745

- field. This is more efficient than using the stack. */

1746

- if (n_var == 1)

1747

+ /* Half the fields (or less) are non-constant. Load constant then overwrite

1748

+ varying fields. Hope that this is more efficient than using the stack. */

1749

+ if (n_var <= n_elts/2)

1750

{

1751

rtx copy = copy_rtx (vals);

1752

- rtx index = GEN_INT (one_var);

1753

- enum insn_code icode;

1754

1755

- /* Load constant part of vector, substitute neighboring value for

1756

- varying element. */

1757

- XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);

1758

+ /* Load constant part of vector. We really don't care what goes into the

1759

+ parts we will overwrite, but we're more likely to be able to load the

1760

+ constant efficiently if it has fewer, larger, repeating parts

1761

+ (see aarch64_simd_valid_immediate). */

1762

+ for (int i = 0; i < n_elts; i++)

1763

+ {

1764

+ rtx x = XVECEXP (vals, 0, i);

1765

+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x))

1766

+ continue;

1767

+ rtx subst = any_const;

1768

+ for (int bit = n_elts / 2; bit > 0; bit /= 2)

1769

+ {

1770

+ /* Look in the copied vector, as more elements are const. */

1771

+ rtx test = XVECEXP (copy, 0, i ^ bit);

1772

+ if (CONST_INT_P (test) || CONST_DOUBLE_P (test))

1773

+ {

1774

+ subst = test;

1775

+ break;

1776

+ }

1777

+ }

1778

+ XVECEXP (copy, 0, i) = subst;

1779

+ }

1780

aarch64_expand_vector_init (target, copy);

1781

1782

- /* Insert variable. */

1783

- x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));

1784

- icode = optab_handler (vec_set_optab, mode);

1785

+ /* Insert variables. */

1786

+ enum insn_code icode = optab_handler (vec_set_optab, mode);

1787

gcc_assert (icode != CODE_FOR_nothing);

1788

- emit_insn (GEN_FCN (icode) (target, x, index));

1789

+

1790

+ for (int i = 0; i < n_elts; i++)

1791

+ {

1792

+ rtx x = XVECEXP (vals, 0, i);

1793

+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x))

1794

+ continue;

1795

+ x = copy_to_mode_reg (inner_mode, x);

1796

+ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));

1797

+ }

1798

return;

1799

}

1800

1801

/* Construct the vector in memory one field at a time

1802

and load the whole vector. */

1803

- mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));

1804

- for (i = 0; i < n_elts; i++)

1805

+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));

1806

+ for (int i = 0; i < n_elts; i++)

1807

emit_move_insn (adjust_address_nv (mem, inner_mode,

1808

i * GET_MODE_SIZE (inner_mode)),

1809

XVECEXP (vals, 0, i));

1810

--- a/src/gcc/config/aarch64/aarch64.h

1811

+++ b/src/gcc/config/aarch64/aarch64.h

1812

@@ -506,7 +506,7 @@ enum reg_class

1813

1814

enum target_cpus

1815

{

1816

-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \

1817

+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \

1818

TARGET_CPU_##INTERNAL_IDENT,

1819

#include "aarch64-cores.def"

1820

#undef AARCH64_CORE

1821

@@ -823,7 +823,8 @@ do { \

1822

#define TRAMPOLINE_SECTION text_section

1823

1824

/* To start with. */

1825

-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2

1826

+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \

1827

+ (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))

1828

1829

1830

/* Assembly output. */

1831

@@ -929,11 +930,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);

1832

#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \

1833

{ "rewrite_mcpu", aarch64_rewrite_mcpu },

1834

1835

+#if defined(__aarch64__)

1836

+extern const char *host_detect_local_cpu (int argc, const char **argv);

1837

+# define EXTRA_SPEC_FUNCTIONS \

1838

+ { "local_cpu_detect", host_detect_local_cpu }, \

1839

+ BIG_LITTLE_CPU_SPEC_FUNCTIONS

1840

+

1841

+# define MCPU_MTUNE_NATIVE_SPECS \

1842

+ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \

1843

+ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \

1844

+ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"

1845

+#else

1846

+# define MCPU_MTUNE_NATIVE_SPECS ""

1847

+# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS

1848

+#endif

1849

+

1850

#define ASM_CPU_SPEC \

1851

BIG_LITTLE_SPEC

1852

1853

-#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS

1854

-

1855

#define EXTRA_SPECS \

1856

{ "asm_cpu_spec", ASM_CPU_SPEC }

1857

1858

--- a/src/gcc/config/aarch64/aarch64.md

1859

+++ b/src/gcc/config/aarch64/aarch64.md

1860

@@ -1414,18 +1414,28 @@

1861

"

1862

if (! aarch64_plus_operand (operands[2], VOIDmode))

1863

{

1864

- rtx subtarget = ((optimize && can_create_pseudo_p ())

1865

- ? gen_reg_rtx (<MODE>mode) : operands[0]);

1866

HOST_WIDE_INT imm = INTVAL (operands[2]);

1867

1868

- if (imm < 0)

1869

- imm = -(-imm & ~0xfff);

1870

+ if (aarch64_move_imm (imm, <MODE>mode) && can_create_pseudo_p ())

1871

+ {

1872

+ rtx tmp = gen_reg_rtx (<MODE>mode);

1873

+ emit_move_insn (tmp, operands[2]);

1874

+ operands[2] = tmp;

1875

+ }

1876

else

1877

- imm &= ~0xfff;

1878

-

1879

- emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));

1880

- operands[1] = subtarget;

1881

- operands[2] = GEN_INT (INTVAL (operands[2]) - imm);

1882

+ {

1883

+ rtx subtarget = ((optimize && can_create_pseudo_p ())

1884

+ ? gen_reg_rtx (<MODE>mode) : operands[0]);

1885

+

1886

+ if (imm < 0)

1887

+ imm = -(-imm & ~0xfff);

1888

+ else

1889

+ imm &= ~0xfff;

1890

+

1891

+ emit_insn (gen_add<mode>3 (subtarget, operands[1], GEN_INT (imm)));

1892

+ operands[1] = subtarget;

1893

+ operands[2] = GEN_INT (INTVAL (operands[2]) - imm);

1894

+ }

1895

}

1896

"

1897

)

1898

@@ -1529,6 +1539,38 @@

1899

[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]

1900

)

1901

1902

+(define_insn "*adds_shift_imm_<mode>"

1903

+ [(set (reg:CC_NZ CC_REGNUM)

1904

+ (compare:CC_NZ

1905

+ (plus:GPI (ASHIFT:GPI

1906

+ (match_operand:GPI 1 "register_operand" "r")

1907

+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))

1908

+ (match_operand:GPI 3 "register_operand" "r"))

1909

+ (const_int 0)))

1910

+ (set (match_operand:GPI 0 "register_operand" "=r")

1911

+ (plus:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))

1912

+ (match_dup 3)))]

1913

+ ""

1914

+ "adds\\t%<w>0, %<w>3, %<w>1, <shift> %2"

1915

+ [(set_attr "type" "alus_shift_imm")]

1916

+)

1917

+

1918

+(define_insn "*subs_shift_imm_<mode>"

1919

+ [(set (reg:CC_NZ CC_REGNUM)

1920

+ (compare:CC_NZ

1921

+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")

1922

+ (ASHIFT:GPI

1923

+ (match_operand:GPI 2 "register_operand" "r")

1924

+ (match_operand:QI 3 "aarch64_shift_imm_<mode>" "n")))

1925

+ (const_int 0)))

1926

+ (set (match_operand:GPI 0 "register_operand" "=r")

1927

+ (minus:GPI (match_dup 1)

1928

+ (ASHIFT:GPI (match_dup 2) (match_dup 3))))]

1929

+ ""

1930

+ "subs\\t%<w>0, %<w>1, %<w>2, <shift> %3"

1931

+ [(set_attr "type" "alus_shift_imm")]

1932

+)

1933

+

1934

(define_insn "*adds_mul_imm_<mode>"

1935

[(set (reg:CC_NZ CC_REGNUM)

1936

(compare:CC_NZ

1937

@@ -1589,6 +1631,42 @@

1938

[(set_attr "type" "alus_ext")]

1939

)

1940

1941

+(define_insn "*adds_<optab><ALLX:mode>_shift_<GPI:mode>"

1942

+ [(set (reg:CC_NZ CC_REGNUM)

1943

+ (compare:CC_NZ

1944

+ (plus:GPI (ashift:GPI

1945

+ (ANY_EXTEND:GPI

1946

+ (match_operand:ALLX 1 "register_operand" "r"))

1947

+ (match_operand 2 "aarch64_imm3" "Ui3"))

1948

+ (match_operand:GPI 3 "register_operand" "r"))

1949

+ (const_int 0)))

1950

+ (set (match_operand:GPI 0 "register_operand" "=rk")

1951

+ (plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1))

1952

+ (match_dup 2))

1953

+ (match_dup 3)))]

1954

+ ""

1955

+ "adds\\t%<GPI:w>0, %<GPI:w>3, %<GPI:w>1, <su>xt<ALLX:size> %2"

1956

+ [(set_attr "type" "alus_ext")]

1957

+)

1958

+

1959

+(define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>"

1960

+ [(set (reg:CC_NZ CC_REGNUM)

1961

+ (compare:CC_NZ

1962

+ (minus:GPI (match_operand:GPI 1 "register_operand" "r")

1963

+ (ashift:GPI

1964

+ (ANY_EXTEND:GPI

1965

+ (match_operand:ALLX 2 "register_operand" "r"))

1966

+ (match_operand 3 "aarch64_imm3" "Ui3")))

1967

+ (const_int 0)))

1968

+ (set (match_operand:GPI 0 "register_operand" "=rk")

1969

+ (minus:GPI (match_dup 1)

1970

+ (ashift:GPI (ANY_EXTEND:GPI (match_dup 2))

1971

+ (match_dup 3))))]

1972

+ ""

1973

+ "subs\\t%<GPI:w>0, %<GPI:w>1, %<GPI:w>2, <su>xt<ALLX:size> %3"

1974

+ [(set_attr "type" "alus_ext")]

1975

+)

1976

+

1977

(define_insn "*adds_<optab><mode>_multp2"

1978

[(set (reg:CC_NZ CC_REGNUM)

1979

(compare:CC_NZ

1980

@@ -1884,6 +1962,38 @@

1981

[(set_attr "type" "adc_reg")]

1982

)

1983

1984

+(define_insn "*add_uxt<mode>_shift2"

1985

+ [(set (match_operand:GPI 0 "register_operand" "=rk")

1986

+ (plus:GPI (and:GPI

1987

+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")

1988

+ (match_operand 2 "aarch64_imm3" "Ui3"))

1989

+ (match_operand 3 "const_int_operand" "n"))

1990

+ (match_operand:GPI 4 "register_operand" "r")))]

1991

+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"

1992

+ "*

1993

+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL(operands[2]),

1994

+ INTVAL (operands[3])));

1995

+ return \"add\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"

1996

+ [(set_attr "type" "alu_ext")]

1997

+)

1998

+

1999

+;; zero_extend version of above

2000

+(define_insn "*add_uxtsi_shift2_uxtw"

2001

+ [(set (match_operand:DI 0 "register_operand" "=rk")

2002

+ (zero_extend:DI

2003

+ (plus:SI (and:SI

2004

+ (ashift:SI (match_operand:SI 1 "register_operand" "r")

2005

+ (match_operand 2 "aarch64_imm3" "Ui3"))

2006

+ (match_operand 3 "const_int_operand" "n"))

2007

+ (match_operand:SI 4 "register_operand" "r"))))]

2008

+ "aarch64_uxt_size (INTVAL (operands[2]), INTVAL (operands[3])) != 0"

2009

+ "*

2010

+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),

2011

+ INTVAL (operands[3])));

2012

+ return \"add\t%w0, %w4, %w1, uxt%e3 %2\";"

2013

+ [(set_attr "type" "alu_ext")]

2014

+)

2015

+

2016

(define_insn "*add_uxt<mode>_multp2"

2017

[(set (match_operand:GPI 0 "register_operand" "=rk")

2018

(plus:GPI (and:GPI

2019

@@ -2140,6 +2250,38 @@

2020

[(set_attr "type" "adc_reg")]

2021

)

2022

2023

+(define_insn "*sub_uxt<mode>_shift2"

2024

+ [(set (match_operand:GPI 0 "register_operand" "=rk")

2025

+ (minus:GPI (match_operand:GPI 4 "register_operand" "rk")

2026

+ (and:GPI

2027

+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")

2028

+ (match_operand 2 "aarch64_imm3" "Ui3"))

2029

+ (match_operand 3 "const_int_operand" "n"))))]

2030

+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"

2031

+ "*

2032

+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),

2033

+ INTVAL (operands[3])));

2034

+ return \"sub\t%<w>0, %<w>4, %<w>1, uxt%e3 %2\";"

2035

+ [(set_attr "type" "alu_ext")]

2036

+)

2037

+

2038

+;; zero_extend version of above

2039

+(define_insn "*sub_uxtsi_shift2_uxtw"

2040

+ [(set (match_operand:DI 0 "register_operand" "=rk")

2041

+ (zero_extend:DI

2042

+ (minus:SI (match_operand:SI 4 "register_operand" "rk")

2043

+ (and:SI

2044

+ (ashift:SI (match_operand:SI 1 "register_operand" "r")

2045

+ (match_operand 2 "aarch64_imm3" "Ui3"))

2046

+ (match_operand 3 "const_int_operand" "n")))))]

2047

+ "aarch64_uxt_size (INTVAL (operands[2]),INTVAL (operands[3])) != 0"

2048

+ "*

2049

+ operands[3] = GEN_INT (aarch64_uxt_size (INTVAL (operands[2]),

2050

+ INTVAL (operands[3])));

2051

+ return \"sub\t%w0, %w4, %w1, uxt%e3 %2\";"

2052

+ [(set_attr "type" "alu_ext")]

2053

+)

2054

+

2055

(define_insn "*sub_uxt<mode>_multp2"

2056

[(set (match_operand:GPI 0 "register_operand" "=rk")

2057

(minus:GPI (match_operand:GPI 4 "register_operand" "rk")

2058

@@ -3058,6 +3200,26 @@

2059

(set_attr "simd" "*,yes")]

2060

)

2061

2062

+(define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze"

2063

+ [(set (match_operand:DI 0 "register_operand" "=r")

2064

+ (zero_extend:DI

2065

+ (NLOGICAL:SI (not:SI (match_operand:SI 1 "register_operand" "r"))

2066

+ (match_operand:SI 2 "register_operand" "r"))))]

2067

+ ""

2068

+ "<NLOGICAL:nlogical>\\t%w0, %w2, %w1"

2069

+ [(set_attr "type" "logic_reg")]

2070

+)

2071

+

2072

+(define_insn "*xor_one_cmplsidi3_ze"

2073

+ [(set (match_operand:DI 0 "register_operand" "=r")

2074

+ (zero_extend:DI

2075

+ (not:SI (xor:SI (match_operand:SI 1 "register_operand" "r")

2076

+ (match_operand:SI 2 "register_operand" "r")))))]

2077

+ ""

2078

+ "eon\\t%w0, %w1, %w2"

2079

+ [(set_attr "type" "logic_reg")]

2080

+)

2081

+

2082

;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)).

2083

;; eon does not operate on SIMD registers so the vector variant must be split.

2084

(define_insn_and_split "*xor_one_cmpl<mode>3"

2085

@@ -3131,6 +3293,32 @@

2086

[(set_attr "type" "logics_shift_imm")]

2087

)

2088

2089

+(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"

2090

+ [(set (match_operand:GPI 0 "register_operand" "=r")

2091

+ (not:GPI (xor:GPI

2092

+ (SHIFT:GPI

2093

+ (match_operand:GPI 1 "register_operand" "r")

2094

+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))

2095

+ (match_operand:GPI 3 "register_operand" "r"))))]

2096

+ ""

2097

+ "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"

2098

+ [(set_attr "type" "logic_shift_imm")]

2099

+)

2100

+

2101

+;; Zero-extend version of the above.

2102

+(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"

2103

+ [(set (match_operand:DI 0 "register_operand" "=r")

2104

+ (zero_extend:DI

2105

+ (not:SI (xor:SI

2106

+ (SHIFT:SI

2107

+ (match_operand:SI 1 "register_operand" "r")

2108

+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))

2109

+ (match_operand:SI 3 "register_operand" "r")))))]

2110

+ ""

2111

+ "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"

2112

+ [(set_attr "type" "logic_shift_imm")]

2113

+)

2114

+

2115

(define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"

2116

[(set (reg:CC_NZ CC_REGNUM)

2117

(compare:CC_NZ

2118

@@ -3551,6 +3739,21 @@

2119

[(set_attr "type" "shift_imm")]

2120

)

2121

2122

+;; There are no canonicalisation rules for ashift and lshiftrt inside an ior

2123

+;; so we have to match both orderings.

2124

+(define_insn "*extr<mode>5_insn_alt"

2125

+ [(set (match_operand:GPI 0 "register_operand" "=r")

2126

+ (ior:GPI (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")

2127

+ (match_operand 4 "const_int_operand" "n"))

2128

+ (ashift:GPI (match_operand:GPI 1 "register_operand" "r")

2129

+ (match_operand 3 "const_int_operand" "n"))))]

2130

+ "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode)

2131

+ && (UINTVAL (operands[3]) + UINTVAL (operands[4])

2132

+ == GET_MODE_BITSIZE (<MODE>mode))"

2133

+ "extr\\t%<w>0, %<w>1, %<w>2, %4"

2134

+ [(set_attr "type" "shift_imm")]

2135

+)

2136

+

2137

;; zero_extend version of the above

2138

(define_insn "*extrsi5_insn_uxtw"

2139

[(set (match_operand:DI 0 "register_operand" "=r")

2140

@@ -3565,6 +3768,19 @@

2141

[(set_attr "type" "shift_imm")]

2142

)

2143

2144

+(define_insn "*extrsi5_insn_uxtw_alt"

2145

+ [(set (match_operand:DI 0 "register_operand" "=r")

2146

+ (zero_extend:DI

2147

+ (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")

2148

+ (match_operand 4 "const_int_operand" "n"))

2149

+ (ashift:SI (match_operand:SI 1 "register_operand" "r")

2150

+ (match_operand 3 "const_int_operand" "n")))))]

2151

+ "UINTVAL (operands[3]) < 32 &&

2152

+ (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"

2153

+ "extr\\t%w0, %w1, %w2, %4"

2154

+ [(set_attr "type" "shift_imm")]

2155

+)

2156

+

2157

(define_insn "*ror<mode>3_insn"

2158

[(set (match_operand:GPI 0 "register_operand" "=r")

2159

(rotate:GPI (match_operand:GPI 1 "register_operand" "r")

2160

--- a/src/gcc/config/aarch64/arm_neon.h

2161

+++ b/src/gcc/config/aarch64/arm_neon.h

2162

@@ -5665,8 +5665,6 @@ vaddlvq_u32 (uint32x4_t a)

2163

2164

/* vcvt_high_f32_f16 not supported */

2165

2166

-static float32x2_t vdup_n_f32 (float32_t);

2167

-

2168

#define vcvt_n_f32_s32(a, b) \

2169

__extension__ \

2170

({ \

2171

@@ -9824,272 +9822,6 @@ vrsqrtss_f32 (float32_t a, float32_t b)

2172

result; \

2173

})

2174

2175

-#define vst1_lane_f32(a, b, c) \

2176

- __extension__ \

2177

- ({ \

2178

- float32x2_t b_ = (b); \

2179

- float32_t * a_ = (a); \

2180

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2181

- : \

2182

- : "r"(a_), "w"(b_), "i"(c) \

2183

- : "memory"); \

2184

- })

2185

-

2186

-#define vst1_lane_f64(a, b, c) \

2187

- __extension__ \

2188

- ({ \

2189

- float64x1_t b_ = (b); \

2190

- float64_t * a_ = (a); \

2191

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2192

- : \

2193

- : "r"(a_), "w"(b_), "i"(c) \

2194

- : "memory"); \

2195

- })

2196

-

2197

-#define vst1_lane_p8(a, b, c) \

2198

- __extension__ \

2199

- ({ \

2200

- poly8x8_t b_ = (b); \

2201

- poly8_t * a_ = (a); \

2202

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2203

- : \

2204

- : "r"(a_), "w"(b_), "i"(c) \

2205

- : "memory"); \

2206

- })

2207

-

2208

-#define vst1_lane_p16(a, b, c) \

2209

- __extension__ \

2210

- ({ \

2211

- poly16x4_t b_ = (b); \

2212

- poly16_t * a_ = (a); \

2213

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2214

- : \

2215

- : "r"(a_), "w"(b_), "i"(c) \

2216

- : "memory"); \

2217

- })

2218

-

2219

-#define vst1_lane_s8(a, b, c) \

2220

- __extension__ \

2221

- ({ \

2222

- int8x8_t b_ = (b); \

2223

- int8_t * a_ = (a); \

2224

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2225

- : \

2226

- : "r"(a_), "w"(b_), "i"(c) \

2227

- : "memory"); \

2228

- })

2229

-

2230

-#define vst1_lane_s16(a, b, c) \

2231

- __extension__ \

2232

- ({ \

2233

- int16x4_t b_ = (b); \

2234

- int16_t * a_ = (a); \

2235

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2236

- : \

2237

- : "r"(a_), "w"(b_), "i"(c) \

2238

- : "memory"); \

2239

- })

2240

-

2241

-#define vst1_lane_s32(a, b, c) \

2242

- __extension__ \

2243

- ({ \

2244

- int32x2_t b_ = (b); \

2245

- int32_t * a_ = (a); \

2246

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2247

- : \

2248

- : "r"(a_), "w"(b_), "i"(c) \

2249

- : "memory"); \

2250

- })

2251

-

2252

-#define vst1_lane_s64(a, b, c) \

2253

- __extension__ \

2254

- ({ \

2255

- int64x1_t b_ = (b); \

2256

- int64_t * a_ = (a); \

2257

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2258

- : \

2259

- : "r"(a_), "w"(b_), "i"(c) \

2260

- : "memory"); \

2261

- })

2262

-

2263

-#define vst1_lane_u8(a, b, c) \

2264

- __extension__ \

2265

- ({ \

2266

- uint8x8_t b_ = (b); \

2267

- uint8_t * a_ = (a); \

2268

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2269

- : \

2270

- : "r"(a_), "w"(b_), "i"(c) \

2271

- : "memory"); \

2272

- })

2273

-

2274

-#define vst1_lane_u16(a, b, c) \

2275

- __extension__ \

2276

- ({ \

2277

- uint16x4_t b_ = (b); \

2278

- uint16_t * a_ = (a); \

2279

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2280

- : \

2281

- : "r"(a_), "w"(b_), "i"(c) \

2282

- : "memory"); \

2283

- })

2284

-

2285

-#define vst1_lane_u32(a, b, c) \

2286

- __extension__ \

2287

- ({ \

2288

- uint32x2_t b_ = (b); \

2289

- uint32_t * a_ = (a); \

2290

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2291

- : \

2292

- : "r"(a_), "w"(b_), "i"(c) \

2293

- : "memory"); \

2294

- })

2295

-

2296

-#define vst1_lane_u64(a, b, c) \

2297

- __extension__ \

2298

- ({ \

2299

- uint64x1_t b_ = (b); \

2300

- uint64_t * a_ = (a); \

2301

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2302

- : \

2303

- : "r"(a_), "w"(b_), "i"(c) \

2304

- : "memory"); \

2305

- })

2306

-

2307

-

2308

-#define vst1q_lane_f32(a, b, c) \

2309

- __extension__ \

2310

- ({ \

2311

- float32x4_t b_ = (b); \

2312

- float32_t * a_ = (a); \

2313

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2314

- : \

2315

- : "r"(a_), "w"(b_), "i"(c) \

2316

- : "memory"); \

2317

- })

2318

-

2319

-#define vst1q_lane_f64(a, b, c) \

2320

- __extension__ \

2321

- ({ \

2322

- float64x2_t b_ = (b); \

2323

- float64_t * a_ = (a); \

2324

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2325

- : \

2326

- : "r"(a_), "w"(b_), "i"(c) \

2327

- : "memory"); \

2328

- })

2329

-

2330

-#define vst1q_lane_p8(a, b, c) \

2331

- __extension__ \

2332

- ({ \

2333

- poly8x16_t b_ = (b); \

2334

- poly8_t * a_ = (a); \

2335

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2336

- : \

2337

- : "r"(a_), "w"(b_), "i"(c) \

2338

- : "memory"); \

2339

- })

2340

-

2341

-#define vst1q_lane_p16(a, b, c) \

2342

- __extension__ \

2343

- ({ \

2344

- poly16x8_t b_ = (b); \

2345

- poly16_t * a_ = (a); \

2346

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2347

- : \

2348

- : "r"(a_), "w"(b_), "i"(c) \

2349

- : "memory"); \

2350

- })

2351

-

2352

-#define vst1q_lane_s8(a, b, c) \

2353

- __extension__ \

2354

- ({ \

2355

- int8x16_t b_ = (b); \

2356

- int8_t * a_ = (a); \

2357

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2358

- : \

2359

- : "r"(a_), "w"(b_), "i"(c) \

2360

- : "memory"); \

2361

- })

2362

-

2363

-#define vst1q_lane_s16(a, b, c) \

2364

- __extension__ \

2365

- ({ \

2366

- int16x8_t b_ = (b); \

2367

- int16_t * a_ = (a); \

2368

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2369

- : \

2370

- : "r"(a_), "w"(b_), "i"(c) \

2371

- : "memory"); \

2372

- })

2373

-

2374

-#define vst1q_lane_s32(a, b, c) \

2375

- __extension__ \

2376

- ({ \

2377

- int32x4_t b_ = (b); \

2378

- int32_t * a_ = (a); \

2379

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2380

- : \

2381

- : "r"(a_), "w"(b_), "i"(c) \

2382

- : "memory"); \

2383

- })

2384

-

2385

-#define vst1q_lane_s64(a, b, c) \

2386

- __extension__ \

2387

- ({ \

2388

- int64x2_t b_ = (b); \

2389

- int64_t * a_ = (a); \

2390

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2391

- : \

2392

- : "r"(a_), "w"(b_), "i"(c) \

2393

- : "memory"); \

2394

- })

2395

-

2396

-#define vst1q_lane_u8(a, b, c) \

2397

- __extension__ \

2398

- ({ \

2399

- uint8x16_t b_ = (b); \

2400

- uint8_t * a_ = (a); \

2401

- __asm__ ("st1 {%1.b}[%2],[%0]" \

2402

- : \

2403

- : "r"(a_), "w"(b_), "i"(c) \

2404

- : "memory"); \

2405

- })

2406

-

2407

-#define vst1q_lane_u16(a, b, c) \

2408

- __extension__ \

2409

- ({ \

2410

- uint16x8_t b_ = (b); \

2411

- uint16_t * a_ = (a); \

2412

- __asm__ ("st1 {%1.h}[%2],[%0]" \

2413

- : \

2414

- : "r"(a_), "w"(b_), "i"(c) \

2415

- : "memory"); \

2416

- })

2417

-

2418

-#define vst1q_lane_u32(a, b, c) \

2419

- __extension__ \

2420

- ({ \

2421

- uint32x4_t b_ = (b); \

2422

- uint32_t * a_ = (a); \

2423

- __asm__ ("st1 {%1.s}[%2],[%0]" \

2424

- : \

2425

- : "r"(a_), "w"(b_), "i"(c) \

2426

- : "memory"); \

2427

- })

2428

-

2429

-#define vst1q_lane_u64(a, b, c) \

2430

- __extension__ \

2431

- ({ \

2432

- uint64x2_t b_ = (b); \

2433

- uint64_t * a_ = (a); \

2434

- __asm__ ("st1 {%1.d}[%2],[%0]" \

2435

- : \

2436

- : "r"(a_), "w"(b_), "i"(c) \

2437

- : "memory"); \

2438

- })

2439

-

2440

-

2441

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2442

vtst_p8 (poly8x8_t a, poly8x8_t b)

2443

{

2444

@@ -11668,25 +11400,25 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)

2445

2446

/* vaes */

2447

2448

-static __inline uint8x16_t

2449

+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))

2450

vaeseq_u8 (uint8x16_t data, uint8x16_t key)

2451

{

2452

return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);

2453

}

2454

2455

-static __inline uint8x16_t

2456

+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))

2457

vaesdq_u8 (uint8x16_t data, uint8x16_t key)

2458

{

2459

return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);

2460

}

2461

2462

-static __inline uint8x16_t

2463

+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))

2464

vaesmcq_u8 (uint8x16_t data)

2465

{

2466

return __builtin_aarch64_crypto_aesmcv16qi_uu (data);

2467

}

2468

2469

-static __inline uint8x16_t

2470

+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))

2471

vaesimcq_u8 (uint8x16_t data)

2472

{

2473

return __builtin_aarch64_crypto_aesimcv16qi_uu (data);

2474

@@ -11887,7 +11619,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b)

2475

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2476

vceq_s64 (int64x1_t __a, int64x1_t __b)

2477

{

2478

- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};

2479

+ return (uint64x1_t) (__a == __b);

2480

}

2481

2482

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2483

@@ -11911,7 +11643,7 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b)

2484

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2485

vceq_u64 (uint64x1_t __a, uint64x1_t __b)

2486

{

2487

- return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};

2488

+ return (__a == __b);

2489

}

2490

2491

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2492

@@ -12047,7 +11779,7 @@ vceqz_s32 (int32x2_t __a)

2493

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2494

vceqz_s64 (int64x1_t __a)

2495

{

2496

- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};

2497

+ return (uint64x1_t) (__a == __AARCH64_INT64_C (0));

2498

}

2499

2500

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2501

@@ -12071,7 +11803,7 @@ vceqz_u32 (uint32x2_t __a)

2502

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2503

vceqz_u64 (uint64x1_t __a)

2504

{

2505

- return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};

2506

+ return (__a == __AARCH64_UINT64_C (0));

2507

}

2508

2509

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2510

@@ -12201,7 +11933,7 @@ vcge_s32 (int32x2_t __a, int32x2_t __b)

2511

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2512

vcge_s64 (int64x1_t __a, int64x1_t __b)

2513

{

2514

- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};

2515

+ return (uint64x1_t) (__a >= __b);

2516

}

2517

2518

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2519

@@ -12225,7 +11957,7 @@ vcge_u32 (uint32x2_t __a, uint32x2_t __b)

2520

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2521

vcge_u64 (uint64x1_t __a, uint64x1_t __b)

2522

{

2523

- return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};

2524

+ return (__a >= __b);

2525

}

2526

2527

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2528

@@ -12349,7 +12081,7 @@ vcgez_s32 (int32x2_t __a)

2529

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2530

vcgez_s64 (int64x1_t __a)

2531

{

2532

- return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};

2533

+ return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));

2534

}

2535

2536

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2537

@@ -12443,7 +12175,7 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b)

2538

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2539

vcgt_s64 (int64x1_t __a, int64x1_t __b)

2540

{

2541

- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);

2542

+ return (uint64x1_t) (__a > __b);

2543

}

2544

2545

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2546

@@ -12467,7 +12199,7 @@ vcgt_u32 (uint32x2_t __a, uint32x2_t __b)

2547

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2548

vcgt_u64 (uint64x1_t __a, uint64x1_t __b)

2549

{

2550

- return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);

2551

+ return (__a > __b);

2552

}

2553

2554

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2555

@@ -12591,7 +12323,7 @@ vcgtz_s32 (int32x2_t __a)

2556

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2557

vcgtz_s64 (int64x1_t __a)

2558

{

2559

- return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};

2560

+ return (uint64x1_t) (__a > __AARCH64_INT64_C (0));

2561

}

2562

2563

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2564

@@ -12685,7 +12417,7 @@ vcle_s32 (int32x2_t __a, int32x2_t __b)

2565

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2566

vcle_s64 (int64x1_t __a, int64x1_t __b)

2567

{

2568

- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};

2569

+ return (uint64x1_t) (__a <= __b);

2570

}

2571

2572

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2573

@@ -12709,7 +12441,7 @@ vcle_u32 (uint32x2_t __a, uint32x2_t __b)

2574

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2575

vcle_u64 (uint64x1_t __a, uint64x1_t __b)

2576

{

2577

- return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};

2578

+ return (__a <= __b);

2579

}

2580

2581

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2582

@@ -12833,7 +12565,7 @@ vclez_s32 (int32x2_t __a)

2583

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2584

vclez_s64 (int64x1_t __a)

2585

{

2586

- return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};

2587

+ return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));

2588

}

2589

2590

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2591

@@ -12927,7 +12659,7 @@ vclt_s32 (int32x2_t __a, int32x2_t __b)

2592

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2593

vclt_s64 (int64x1_t __a, int64x1_t __b)

2594

{

2595

- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};

2596

+ return (uint64x1_t) (__a < __b);

2597

}

2598

2599

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2600

@@ -12951,7 +12683,7 @@ vclt_u32 (uint32x2_t __a, uint32x2_t __b)

2601

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2602

vclt_u64 (uint64x1_t __a, uint64x1_t __b)

2603

{

2604

- return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};

2605

+ return (__a < __b);

2606

}

2607

2608

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2609

@@ -13075,7 +12807,7 @@ vcltz_s32 (int32x2_t __a)

2610

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2611

vcltz_s64 (int64x1_t __a)

2612

{

2613

- return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};

2614

+ return (uint64x1_t) (__a < __AARCH64_INT64_C (0));

2615

}

2616

2617

__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2618

@@ -21321,72 +21053,74 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)

2619

2620

/* vsha1 */

2621

2622

-static __inline uint32x4_t

2623

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2624

vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)

2625

{

2626

return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);

2627

}

2628

-static __inline uint32x4_t

2629

+

2630

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2631

vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)

2632

{

2633

return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);

2634

}

2635

-static __inline uint32x4_t

2636

+

2637

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2638

vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)

2639

{

2640

return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);

2641

}

2642

2643

-static __inline uint32_t

2644

+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))

2645

vsha1h_u32 (uint32_t hash_e)

2646

{

2647

return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);

2648

}

2649

2650

-static __inline uint32x4_t

2651

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2652

vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)

2653

{

2654

return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);

2655

}

2656

2657

-static __inline uint32x4_t

2658

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2659

vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)

2660

{

2661

return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);

2662

}

2663

2664

-static __inline uint32x4_t

2665

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2666

vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)

2667

{

2668

return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);

2669

}

2670

2671

-static __inline uint32x4_t

2672

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2673

vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)

2674

{

2675

return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);

2676

}

2677

2678

-static __inline uint32x4_t

2679

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2680

vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)

2681

{

2682

return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);

2683

}

2684

2685

-static __inline uint32x4_t

2686

+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))

2687

vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)

2688

{

2689

return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);

2690

}

2691

2692

-static __inline poly128_t

2693

+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))

2694

vmull_p64 (poly64_t a, poly64_t b)

2695

{

2696

return

2697

__builtin_aarch64_crypto_pmulldi_ppp (a, b);

2698

}

2699

2700

-static __inline poly128_t

2701

+__extension__ static __inline poly128_t __attribute__ ((__always_inline__))

2702

vmull_high_p64 (poly64x2_t a, poly64x2_t b)

2703

{

2704

return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);

2705

@@ -22302,6 +22036,8 @@ vst1_u64 (uint64_t *a, uint64x1_t b)

2706

*a = b[0];

2707

}

2708

2709

+/* vst1q */

2710

+

2711

__extension__ static __inline void __attribute__ ((__always_inline__))

2712

vst1q_f32 (float32_t *a, float32x4_t b)

2713

{

2714

@@ -22314,8 +22050,6 @@ vst1q_f64 (float64_t *a, float64x2_t b)

2715

__builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);

2716

}

2717

2718

-/* vst1q */

2719

-

2720

__extension__ static __inline void __attribute__ ((__always_inline__))

2721

vst1q_p8 (poly8_t *a, poly8x16_t b)

2722

{

2723

@@ -22382,6 +22116,154 @@ vst1q_u64 (uint64_t *a, uint64x2_t b)

2724

(int64x2_t) b);

2725

}

2726

2727

+/* vst1_lane */

2728

+

2729

+__extension__ static __inline void __attribute__ ((__always_inline__))

2730

+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)

2731

+{

2732

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2733

+}

2734

+

2735

+__extension__ static __inline void __attribute__ ((__always_inline__))

2736

+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)

2737

+{

2738

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2739

+}

2740

+

2741

+__extension__ static __inline void __attribute__ ((__always_inline__))

2742

+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)

2743

+{

2744

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2745

+}

2746

+

2747

+__extension__ static __inline void __attribute__ ((__always_inline__))

2748

+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)

2749

+{

2750

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2751

+}

2752

+

2753

+__extension__ static __inline void __attribute__ ((__always_inline__))

2754

+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)

2755

+{

2756

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2757

+}

2758

+

2759

+__extension__ static __inline void __attribute__ ((__always_inline__))

2760

+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)

2761

+{

2762

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2763

+}

2764

+

2765

+__extension__ static __inline void __attribute__ ((__always_inline__))

2766

+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)

2767

+{

2768

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2769

+}

2770

+

2771

+__extension__ static __inline void __attribute__ ((__always_inline__))

2772

+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)

2773

+{

2774

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2775

+}

2776

+

2777

+__extension__ static __inline void __attribute__ ((__always_inline__))

2778

+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)

2779

+{

2780

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2781

+}

2782

+

2783

+__extension__ static __inline void __attribute__ ((__always_inline__))

2784

+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)

2785

+{

2786

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2787

+}

2788

+

2789

+__extension__ static __inline void __attribute__ ((__always_inline__))

2790

+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)

2791

+{

2792

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2793

+}

2794

+

2795

+__extension__ static __inline void __attribute__ ((__always_inline__))

2796

+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)

2797

+{

2798

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2799

+}

2800

+

2801

+/* vst1q_lane */

2802

+

2803

+__extension__ static __inline void __attribute__ ((__always_inline__))

2804

+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)

2805

+{

2806

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2807

+}

2808

+

2809

+__extension__ static __inline void __attribute__ ((__always_inline__))

2810

+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)

2811

+{

2812

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2813

+}

2814

+

2815

+__extension__ static __inline void __attribute__ ((__always_inline__))

2816

+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)

2817

+{

2818

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2819

+}

2820

+

2821

+__extension__ static __inline void __attribute__ ((__always_inline__))

2822

+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)

2823

+{

2824

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2825

+}

2826

+

2827

+__extension__ static __inline void __attribute__ ((__always_inline__))

2828

+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)

2829

+{

2830

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2831

+}

2832

+

2833

+__extension__ static __inline void __attribute__ ((__always_inline__))

2834

+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)

2835

+{

2836

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2837

+}

2838

+

2839

+__extension__ static __inline void __attribute__ ((__always_inline__))

2840

+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)

2841

+{

2842

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2843

+}

2844

+

2845

+__extension__ static __inline void __attribute__ ((__always_inline__))

2846

+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)

2847

+{

2848

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2849

+}

2850

+

2851

+__extension__ static __inline void __attribute__ ((__always_inline__))

2852

+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)

2853

+{

2854

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2855

+}

2856

+

2857

+__extension__ static __inline void __attribute__ ((__always_inline__))

2858

+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)

2859

+{

2860

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2861

+}

2862

+

2863

+__extension__ static __inline void __attribute__ ((__always_inline__))

2864

+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)

2865

+{

2866

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2867

+}

2868

+

2869

+__extension__ static __inline void __attribute__ ((__always_inline__))

2870

+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)

2871

+{

2872

+ *__a = __aarch64_vget_lane_any (__b, __lane);

2873

+}

2874

+

2875

/* vstn */

2876

2877

__extension__ static __inline void

2878

@@ -23887,7 +23769,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b)

2879

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2880

vtst_s64 (int64x1_t __a, int64x1_t __b)

2881

{

2882

- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};

2883

+ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));

2884

}

2885

2886

__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))

2887

@@ -23911,7 +23793,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b)

2888

__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))

2889

vtst_u64 (uint64x1_t __a, uint64x1_t __b)

2890

{

2891

- return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};

2892

+ return ((__a & __b) != __AARCH64_UINT64_C (0));

2893

}

2894

2895

__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))

2896

--- a/src//dev/null

2897

+++ b/src/gcc/config/aarch64/driver-aarch64.c

2898

@@ -0,0 +1,307 @@

2899

+/* Native CPU detection for aarch64.

2900

2901

+

2902

+ This file is part of GCC.

2903

+

2904

+ GCC is free software; you can redistribute it and/or modify

2905

+ it under the terms of the GNU General Public License as published by

2906

+ the Free Software Foundation; either version 3, or (at your option)

2907

+ any later version.

2908

+

2909

+ GCC is distributed in the hope that it will be useful,

2910

+ but WITHOUT ANY WARRANTY; without even the implied warranty of

2911

+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

2912

+ GNU General Public License for more details.

2913

+

2914

+ You should have received a copy of the GNU General Public License

2915

+ along with GCC; see the file COPYING3. If not see

2916

+ <http://www.gnu.org/licenses/>. */

2917

+

2918

+#include "config.h"

2919

+#include "system.h"

2920

+

2921

+struct arch_extension

2922

+{

2923

+ const char *ext;

2924

+ const char *feat_string;

2925

+};

2926

+

2927

+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \

2928

+ { EXT_NAME, FEATURE_STRING },

2929

+static struct arch_extension ext_to_feat_string[] =

2930

+{

2931

+#include "aarch64-option-extensions.def"

2932

+};

2933

+#undef AARCH64_OPT_EXTENSION

2934

+

2935

+

2936

+struct aarch64_core_data

2937

+{

2938

+ const char* name;

2939

+ const char* arch;

2940

+ const char* implementer_id;

2941

+ const char* part_no;

2942

+};

2943

+

2944

+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \

2945

+ { CORE_NAME, #ARCH, IMP, PART },

2946

+

2947

+static struct aarch64_core_data cpu_data [] =

2948

+{

2949

+#include "aarch64-cores.def"

2950

+ { NULL, NULL, NULL, NULL }

2951

+};

2952

+

2953

+#undef AARCH64_CORE

2954

+

2955

+struct aarch64_arch

2956

+{

2957

+ const char* id;

2958

+ const char* name;

2959

+};

2960

+

2961

+#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \

2962

+ { #ARCH, NAME },

2963

+

2964

+static struct aarch64_arch aarch64_arches [] =

2965

+{

2966

+#include "aarch64-arches.def"

2967

+ {NULL, NULL}

2968

+};

2969

+

2970

+#undef AARCH64_ARCH

2971

+

2972

+/* Return the full architecture name string corresponding to the

2973

+ identifier ID. */

2974

+

2975

+static const char*

2976

+get_arch_name_from_id (const char* id)

2977

+{

2978

+ unsigned int i = 0;

2979

+

2980

+ for (i = 0; aarch64_arches[i].id != NULL; i++)

2981

+ {

2982

+ if (strcmp (id, aarch64_arches[i].id) == 0)

2983

+ return aarch64_arches[i].name;

2984

+ }

2985

+

2986

+ return NULL;

2987

+}

2988

+

2989

+

2990

+/* Check wether the string CORE contains the same CPU part numbers

2991

+ as BL_STRING. For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03"

2992

+ should return true. */

2993

+

2994

+static bool

2995

+valid_bL_string_p (const char** core, const char* bL_string)

2996

+{

2997

+ return strstr (bL_string, core[0]) != NULL

2998

+ && strstr (bL_string, core[1]) != NULL;

2999

+}

3000

+

3001

+/* Return true iff ARR contains STR in one of its two elements. */

3002

+

3003

+static bool

3004

+contains_string_p (const char** arr, const char* str)

3005

+{

3006

+ bool res = false;

3007

+

3008

+ if (arr[0] != NULL)

3009

+ {

3010

+ res = strstr (arr[0], str) != NULL;

3011

+ if (res)

3012

+ return res;

3013

+

3014

+ if (arr[1] != NULL)

3015

+ return strstr (arr[1], str) != NULL;

3016

+ }

3017

+

3018

+ return false;

3019

+}

3020

+

3021

+/* This will be called by the spec parser in gcc.c when it sees

3022

+ a %:local_cpu_detect(args) construct. Currently it will be called

3023

+ with either "arch", "cpu" or "tune" as argument depending on if

3024

+ -march=native, -mcpu=native or -mtune=native is to be substituted.

3025

+

3026

+ It returns a string containing new command line parameters to be

3027

+ put at the place of the above two options, depending on what CPU

3028

+ this is executed. E.g. "-march=armv8-a" on a Cortex-A57 for

3029

+ -march=native. If the routine can't detect a known processor,

3030

+ the -march or -mtune option is discarded.

3031

+

3032

+ For -mtune and -mcpu arguments it attempts to detect the CPU or

3033

+ a big.LITTLE system.

3034

+ ARGC and ARGV are set depending on the actual arguments given

3035

+ in the spec. */

3036

+

3037

+const char *

3038

+host_detect_local_cpu (int argc, const char **argv)

3039

+{

3040

+ const char *arch_id = NULL;

3041

+ const char *res = NULL;

3042

+ static const int num_exts = ARRAY_SIZE (ext_to_feat_string);

3043

+ char buf[128];

3044

+ FILE *f = NULL;

3045

+ bool arch = false;

3046

+ bool tune = false;

3047

+ bool cpu = false;

3048

+ unsigned int i = 0;

3049

+ unsigned int core_idx = 0;

3050

+ const char* imps[2] = { NULL, NULL };

3051

+ const char* cores[2] = { NULL, NULL };

3052

+ unsigned int n_cores = 0;

3053

+ unsigned int n_imps = 0;

3054

+ bool processed_exts = false;

3055

+ const char *ext_string = "";

3056

+

3057

+ gcc_assert (argc);

3058

+

3059

+ if (!argv[0])

3060

+ goto not_found;

3061

+

3062

+ /* Are we processing -march, mtune or mcpu? */

3063

+ arch = strcmp (argv[0], "arch") == 0;

3064

+ if (!arch)

3065

+ tune = strcmp (argv[0], "tune") == 0;

3066

+

3067

+ if (!arch && !tune)

3068

+ cpu = strcmp (argv[0], "cpu") == 0;

3069

+

3070

+ if (!arch && !tune && !cpu)

3071

+ goto not_found;

3072

+

3073

+ f = fopen ("/proc/cpuinfo", "r");

3074

+

3075

+ if (f == NULL)

3076

+ goto not_found;

3077

+

3078

+ /* Look through /proc/cpuinfo to determine the implementer

3079

+ and then the part number that identifies a particular core. */

3080

+ while (fgets (buf, sizeof (buf), f) != NULL)

3081

+ {

3082

+ if (strstr (buf, "implementer") != NULL)

3083

+ {

3084

+ for (i = 0; cpu_data[i].name != NULL; i++)

3085

+ if (strstr (buf, cpu_data[i].implementer_id) != NULL

3086

+ && !contains_string_p (imps, cpu_data[i].implementer_id))

3087

+ {

3088

+ if (n_imps == 2)

3089

+ goto not_found;

3090

+

3091

+ imps[n_imps++] = cpu_data[i].implementer_id;

3092

+

3093

+ break;

3094

+ }

3095

+ continue;

3096

+ }

3097

+

3098

+ if (strstr (buf, "part") != NULL)

3099

+ {

3100

+ for (i = 0; cpu_data[i].name != NULL; i++)

3101

+ if (strstr (buf, cpu_data[i].part_no) != NULL

3102

+ && !contains_string_p (cores, cpu_data[i].part_no))

3103

+ {

3104

+ if (n_cores == 2)

3105

+ goto not_found;

3106

+

3107

+ cores[n_cores++] = cpu_data[i].part_no;

3108

+ core_idx = i;

3109

+ arch_id = cpu_data[i].arch;

3110

+ break;

3111

+ }

3112

+ continue;

3113

+ }

3114

+ if (!tune && !processed_exts && strstr (buf, "Features") != NULL)

3115

+ {

3116

+ for (i = 0; i < num_exts; i++)

3117

+ {

3118

+ bool enabled = true;

3119

+ char *p = NULL;

3120

+ char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL);

3121

+

3122

+ p = strtok (feat_string, " ");

3123

+

3124

+ while (p != NULL)

3125

+ {

3126

+ if (strstr (buf, p) == NULL)

3127

+ {

3128

+ enabled = false;

3129

+ break;

3130

+ }

3131

+ p = strtok (NULL, " ");

3132

+ }

3133

+ ext_string = concat (ext_string, "+", enabled ? "" : "no",

3134

+ ext_to_feat_string[i].ext, NULL);

3135

+ }

3136

+ processed_exts = true;

3137

+ }

3138

+ }

3139

+

3140

+ fclose (f);

3141

+ f = NULL;

3142

+

3143

+ /* Weird cpuinfo format that we don't know how to handle. */

3144

+ if (n_cores == 0 || n_cores > 2 || n_imps != 1)

3145

+ goto not_found;

3146

+

3147

+ if (arch && !arch_id)

3148

+ goto not_found;

3149

+

3150

+ if (arch)

3151

+ {

3152

+ const char* arch_name = get_arch_name_from_id (arch_id);

3153

+

3154

+ /* We got some arch indentifier that's not in aarch64-arches.def? */

3155

+ if (!arch_name)

3156

+ goto not_found;

3157

+

3158

+ res = concat ("-march=", arch_name, NULL);

3159

+ }

3160

+ /* We have big.LITTLE. */

3161

+ else if (n_cores == 2)

3162

+ {

3163

+ for (i = 0; cpu_data[i].name != NULL; i++)

3164

+ {

3165

+ if (strchr (cpu_data[i].part_no, '.') != NULL

3166

+ && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0

3167

+ && valid_bL_string_p (cores, cpu_data[i].part_no))

3168

+ {

3169

+ res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);

3170

+ break;

3171

+ }

3172

+ }

3173

+ if (!res)

3174

+ goto not_found;

3175

+ }

3176

+ /* The simple, non-big.LITTLE case. */

3177

+ else

3178

+ {

3179

+ if (strncmp (cpu_data[core_idx].implementer_id, imps[0],

3180

+ strlen (imps[0]) - 1) != 0)

3181

+ goto not_found;

3182

+

3183

+ res = concat ("-m", cpu ? "cpu" : "tune", "=",

3184

+ cpu_data[core_idx].name, NULL);

3185

+ }

3186

+

3187

+ if (tune)

3188

+ return res;

3189

+

3190

+ res = concat (res, ext_string, NULL);

3191

+

3192

+ return res;

3193

+

3194

+not_found:

3195

+ {

3196

+ /* If detection fails we ignore the option.

3197

+ Clean up and return empty string. */

3198

+

3199

+ if (f)

3200

+ fclose (f);

3201

+

3202

+ return "";

3203

+ }

3204

+}

3205

+

3206

--- a/src//dev/null

3207

+++ b/src/gcc/config/aarch64/x-aarch64

3208

@@ -0,0 +1,3 @@

3209

+driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \

3210

+ $(CONFIG_H) $(SYSTEM_H)

3211

+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<

3212

--- a/src/gcc/config/alpha/linux.h

3213

+++ b/src/gcc/config/alpha/linux.h

3214

@@ -61,10 +61,14 @@ along with GCC; see the file COPYING3. If not see

3215

#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)

3216

#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)

3217

#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)

3218

+#undef OPTION_MUSL

3219

+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)

3220

#else

3221

#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)

3222

#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)

3223

#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)

3224

+#undef OPTION_MUSL

3225

+#define OPTION_MUSL (linux_libc == LIBC_MUSL)

3226

#endif

3227

3228

/* Determine what functions are present at the runtime;

3229

--- a/src/gcc/config/arm/aarch-common-protos.h

3230

+++ b/src/gcc/config/arm/aarch-common-protos.h

3231

@@ -102,6 +102,8 @@ struct mem_cost_table

3232

const int storef; /* SFmode. */

3233

const int stored; /* DFmode. */

3234

const int store_unaligned; /* Extra for unaligned stores. */

3235

+ const int loadv; /* Vector load. */

3236

+ const int storev; /* Vector store. */

3237

};

3238

3239

struct fp_cost_table

3240

--- a/src/gcc/config/arm/aarch-cost-tables.h

3241

+++ b/src/gcc/config/arm/aarch-cost-tables.h

3242

@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs =

3243

1, /* stm_regs_per_insn_subsequent. */

3244

COSTS_N_INSNS (2), /* storef. */

3245

COSTS_N_INSNS (3), /* stored. */

3246

- COSTS_N_INSNS (1) /* store_unaligned. */

3247

+ COSTS_N_INSNS (1), /* store_unaligned. */

3248

+ COSTS_N_INSNS (1), /* loadv. */

3249

+ COSTS_N_INSNS (1) /* storev. */

3250

},

3251

{

3252

/* FP SFmode */

3253

@@ -130,12 +132,12 @@ const struct cpu_cost_table cortexa53_extra_costs =

3254

0, /* arith. */

3255

0, /* logical. */

3256

COSTS_N_INSNS (1), /* shift. */

3257

- COSTS_N_INSNS (2), /* shift_reg. */

3258

+ 0, /* shift_reg. */

3259

COSTS_N_INSNS (1), /* arith_shift. */

3260

- COSTS_N_INSNS (2), /* arith_shift_reg. */

3261

+ COSTS_N_INSNS (1), /* arith_shift_reg. */

3262

COSTS_N_INSNS (1), /* log_shift. */

3263

- COSTS_N_INSNS (2), /* log_shift_reg. */

3264

- 0, /* extend. */

3265

+ COSTS_N_INSNS (1), /* log_shift_reg. */

3266

+ COSTS_N_INSNS (1), /* extend. */

3267

COSTS_N_INSNS (1), /* extend_arith. */

3268

COSTS_N_INSNS (1), /* bfi. */

3269

COSTS_N_INSNS (1), /* bfx. */

3270

@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs =

3271

2, /* stm_regs_per_insn_subsequent. */

3272

0, /* storef. */

3273

0, /* stored. */

3274

- COSTS_N_INSNS (1) /* store_unaligned. */

3275

+ COSTS_N_INSNS (1), /* store_unaligned. */

3276

+ COSTS_N_INSNS (1), /* loadv. */

3277

+ COSTS_N_INSNS (1) /* storev. */

3278

},

3279

{

3280

/* FP SFmode */

3281

@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs =

3282

2, /* stm_regs_per_insn_subsequent. */

3283

0, /* storef. */

3284

0, /* stored. */

3285

- COSTS_N_INSNS (1) /* store_unaligned. */

3286

+ COSTS_N_INSNS (1), /* store_unaligned. */

3287

+ COSTS_N_INSNS (1), /* loadv. */

3288

+ COSTS_N_INSNS (1) /* storev. */

3289

},

3290

{

3291

/* FP SFmode */

3292

@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs =

3293

0, /* storef. */

3294

0, /* stored. */

3295

0, /* store_unaligned. */

3296

+ COSTS_N_INSNS (1), /* loadv. */

3297

+ COSTS_N_INSNS (1) /* storev. */

3298

},

3299

{

3300

/* FP SFmode */

3301

--- a/src/gcc/config/arm/arm-cores.def

3302

+++ b/src/gcc/config/arm/arm-cores.def

3303

@@ -158,7 +158,7 @@ ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex

3304

ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)

3305

ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m)

3306

ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m)

3307

-ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e)

3308

+ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4)

3309

3310

/* V7 big.LITTLE implementations */

3311

ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)

3312

--- a/src/gcc/config/arm/arm-protos.h

3313

+++ b/src/gcc/config/arm/arm-protos.h

3314

@@ -66,10 +66,6 @@ extern rtx legitimize_tls_address (rtx, rtx);

3315

extern bool arm_legitimate_address_p (machine_mode, rtx, bool);

3316

extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);

3317

extern int thumb_legitimate_offset_p (machine_mode, HOST_WIDE_INT);

3318

-extern bool arm_legitimize_reload_address (rtx *, machine_mode, int, int,

3319

- int);

3320

-extern rtx thumb_legitimize_reload_address (rtx *, machine_mode, int, int,

3321

- int);

3322

extern int thumb1_legitimate_address_p (machine_mode, rtx, int);

3323

extern bool ldm_stm_operation_p (rtx, bool, machine_mode mode,

3324

bool, bool);

3325

@@ -257,13 +253,6 @@ struct cpu_vec_costs {

3326

3327

struct cpu_cost_table;

3328

3329

-enum arm_sched_autopref

3330

- {

3331

- ARM_SCHED_AUTOPREF_OFF,

3332

- ARM_SCHED_AUTOPREF_RANK,

3333

- ARM_SCHED_AUTOPREF_FULL

3334

- };

3335

-

3336

/* Dump function ARM_PRINT_TUNE_INFO should be updated whenever this

3337

structure is modified. */

3338

3339

@@ -272,39 +261,57 @@ struct tune_params

3340

bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);

3341

const struct cpu_cost_table *insn_extra_cost;

3342

bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *);

3343

+ int (*branch_cost) (bool, bool);

3344

+ /* Vectorizer costs. */

3345

+ const struct cpu_vec_costs* vec_costs;

3346

int constant_limit;

3347

/* Maximum number of instructions to conditionalise. */

3348

int max_insns_skipped;

3349

- int num_prefetch_slots;

3350

- int l1_cache_size;

3351

- int l1_cache_line_size;

3352

- bool prefer_constant_pool;

3353

- int (*branch_cost) (bool, bool);

3354

+ /* Maximum number of instructions to inline calls to memset. */

3355

+ int max_insns_inline_memset;

3356

+ /* Issue rate of the processor. */

3357

+ unsigned int issue_rate;

3358

+ /* Explicit prefetch data. */

3359

+ struct

3360

+ {

3361

+ int num_slots;

3362

+ int l1_cache_size;

3363

+ int l1_cache_line_size;

3364

+ } prefetch;

3365

+ enum {PREF_CONST_POOL_FALSE, PREF_CONST_POOL_TRUE}

3366

+ prefer_constant_pool: 1;

3367

/* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */

3368

- bool prefer_ldrd_strd;

3369

+ enum {PREF_LDRD_FALSE, PREF_LDRD_TRUE} prefer_ldrd_strd: 1;

3370

/* The preference for non short cirtcuit operation when optimizing for

3371

performance. The first element covers Thumb state and the second one

3372

is for ARM state. */

3373

- bool logical_op_non_short_circuit[2];

3374

- /* Vectorizer costs. */

3375

- const struct cpu_vec_costs* vec_costs;

3376

- /* Prefer Neon for 64-bit bitops. */

3377

- bool prefer_neon_for_64bits;

3378

+ enum log_op_non_sc {LOG_OP_NON_SC_FALSE, LOG_OP_NON_SC_TRUE};

3379

+ log_op_non_sc logical_op_non_short_circuit_thumb: 1;

3380

+ log_op_non_sc logical_op_non_short_circuit_arm: 1;

3381

/* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */

3382

- bool disparage_flag_setting_t16_encodings;

3383

- /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags

3384

- would be set. */

3385

- bool disparage_partial_flag_setting_t16_encodings;

3386

+ enum {DISPARAGE_FLAGS_NEITHER, DISPARAGE_FLAGS_PARTIAL, DISPARAGE_FLAGS_ALL}

3387

+ disparage_flag_setting_t16_encodings: 2;

3388

+ enum {PREF_NEON_64_FALSE, PREF_NEON_64_TRUE} prefer_neon_for_64bits: 1;

3389

/* Prefer to inline string operations like memset by using Neon. */

3390

- bool string_ops_prefer_neon;

3391

- /* Maximum number of instructions to inline calls to memset. */

3392

- int max_insns_inline_memset;

3393

- /* Bitfield encoding the fuseable pairs of instructions. */

3394

- unsigned int fuseable_ops;

3395

+ enum {PREF_NEON_STRINGOPS_FALSE, PREF_NEON_STRINGOPS_TRUE}

3396

+ string_ops_prefer_neon: 1;

3397

+ /* Bitfield encoding the fuseable pairs of instructions. Use FUSE_OPS

3398

+ in an initializer if multiple fusion operations are supported on a

3399

+ target. */

3400

+ enum fuse_ops

3401

+ {

3402

+ FUSE_NOTHING = 0,

3403

+ FUSE_MOVW_MOVT = 1 << 0

3404

+ } fuseable_ops: 1;

3405

/* Depth of scheduling queue to check for L2 autoprefetcher. */

3406

- enum arm_sched_autopref sched_autopref;

3407

+ enum {SCHED_AUTOPREF_OFF, SCHED_AUTOPREF_RANK, SCHED_AUTOPREF_FULL}

3408

+ sched_autopref: 2;

3409

};

3410

3411

+/* Smash multiple fusion operations into a type that can be used for an

3412

+ initializer. */

3413

+#define FUSE_OPS(x) ((tune_params::fuse_ops) (x))

3414

+

3415

extern const struct tune_params *current_tune;

3416

extern int vfp3_const_double_for_fract_bits (rtx);

3417

/* return power of two from operand, otherwise 0. */

3418

--- a/src/gcc/config/arm/arm.c

3419

+++ b/src/gcc/config/arm/arm.c

3420

@@ -940,11 +940,13 @@ struct processors

3421

};

3422

3423

3424

-#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1

3425

-#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \

3426

- prefetch_slots, \

3427

- l1_size, \

3428

- l1_line_size

3429

+#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }

3430

+#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \

3431

+ { \

3432

+ num_slots, \

3433

+ l1_size, \

3434

+ l1_line_size \

3435

+ }

3436

3437

/* arm generic vectorizer costs. */

3438

static const

3439

@@ -1027,7 +1029,9 @@ const struct cpu_cost_table cortexa9_extra_costs =

3440

2, /* stm_regs_per_insn_subsequent. */

3441

COSTS_N_INSNS (1), /* storef. */

3442

COSTS_N_INSNS (1), /* stored. */

3443

- COSTS_N_INSNS (1) /* store_unaligned. */

3444

+ COSTS_N_INSNS (1), /* store_unaligned. */

3445

+ COSTS_N_INSNS (1), /* loadv. */

3446

+ COSTS_N_INSNS (1) /* storev. */

3447

},

3448

{

3449

/* FP SFmode */

3450

@@ -1128,7 +1132,9 @@ const struct cpu_cost_table cortexa8_extra_costs =

3451

2, /* stm_regs_per_insn_subsequent. */

3452

COSTS_N_INSNS (1), /* storef. */

3453

COSTS_N_INSNS (1), /* stored. */

3454

- COSTS_N_INSNS (1) /* store_unaligned. */

3455

+ COSTS_N_INSNS (1), /* store_unaligned. */

3456

+ COSTS_N_INSNS (1), /* loadv. */

3457

+ COSTS_N_INSNS (1) /* storev. */

3458

},

3459

{

3460

/* FP SFmode */

3461

@@ -1230,7 +1236,9 @@ const struct cpu_cost_table cortexa5_extra_costs =

3462

2, /* stm_regs_per_insn_subsequent. */

3463

COSTS_N_INSNS (2), /* storef. */

3464

COSTS_N_INSNS (2), /* stored. */

3465

- COSTS_N_INSNS (1) /* store_unaligned. */

3466

+ COSTS_N_INSNS (1), /* store_unaligned. */

3467

+ COSTS_N_INSNS (1), /* loadv. */

3468

+ COSTS_N_INSNS (1) /* storev. */

3469

},

3470

{

3471

/* FP SFmode */

3472

@@ -1333,7 +1341,9 @@ const struct cpu_cost_table cortexa7_extra_costs =

3473

2, /* stm_regs_per_insn_subsequent. */

3474

COSTS_N_INSNS (2), /* storef. */

3475

COSTS_N_INSNS (2), /* stored. */

3476

- COSTS_N_INSNS (1) /* store_unaligned. */

3477

+ COSTS_N_INSNS (1), /* store_unaligned. */

3478

+ COSTS_N_INSNS (1), /* loadv. */

3479

+ COSTS_N_INSNS (1) /* storev. */

3480

},

3481

{

3482

/* FP SFmode */

3483

@@ -1434,7 +1444,9 @@ const struct cpu_cost_table cortexa12_extra_costs =

3484

2, /* stm_regs_per_insn_subsequent. */

3485

COSTS_N_INSNS (2), /* storef. */

3486

COSTS_N_INSNS (2), /* stored. */

3487

- 0 /* store_unaligned. */

3488

+ 0, /* store_unaligned. */

3489

+ COSTS_N_INSNS (1), /* loadv. */

3490

+ COSTS_N_INSNS (1) /* storev. */

3491

},

3492

{

3493

/* FP SFmode */

3494

@@ -1535,7 +1547,9 @@ const struct cpu_cost_table cortexa15_extra_costs =

3495

2, /* stm_regs_per_insn_subsequent. */

3496

0, /* storef. */

3497

0, /* stored. */

3498

- 0 /* store_unaligned. */

3499

+ 0, /* store_unaligned. */

3500

+ COSTS_N_INSNS (1), /* loadv. */

3501

+ COSTS_N_INSNS (1) /* storev. */

3502

},

3503

{

3504

/* FP SFmode */

3505

@@ -1636,7 +1650,9 @@ const struct cpu_cost_table v7m_extra_costs =

3506

1, /* stm_regs_per_insn_subsequent. */

3507

COSTS_N_INSNS (2), /* storef. */

3508

COSTS_N_INSNS (3), /* stored. */

3509

- COSTS_N_INSNS (1) /* store_unaligned. */

3510

+ COSTS_N_INSNS (1), /* store_unaligned. */

3511

+ COSTS_N_INSNS (1), /* loadv. */

3512

+ COSTS_N_INSNS (1) /* storev. */

3513

},

3514

{

3515

/* FP SFmode */

3516

@@ -1678,49 +1694,50 @@ const struct cpu_cost_table v7m_extra_costs =

3517

}

3518

};

3519

3520

-#define ARM_FUSE_NOTHING (0)

3521

-#define ARM_FUSE_MOVW_MOVT (1 << 0)

3522

-

3523

const struct tune_params arm_slowmul_tune =

3524

{

3525

arm_slowmul_rtx_costs,

3526

- NULL,

3527

- NULL, /* Sched adj cost. */

3528

+ NULL, /* Insn extra costs. */

3529

+ NULL, /* Sched adj cost. */

3530

+ arm_default_branch_cost,

3531

+ &arm_default_vec_cost,

3532

3, /* Constant limit. */

3533

5, /* Max cond insns. */

3534

+ 8, /* Memset max inline. */

3535

+ 1, /* Issue rate. */

3536

ARM_PREFETCH_NOT_BENEFICIAL,

3537

- true, /* Prefer constant pool. */

3538

- arm_default_branch_cost,

3539

- false, /* Prefer LDRD/STRD. */

3540

- {true, true}, /* Prefer non short circuit. */

3541

- &arm_default_vec_cost, /* Vectorizer costs. */

3542

- false, /* Prefer Neon for 64-bits bitops. */

3543

- false, false, /* Prefer 32-bit encodings. */

3544

- false, /* Prefer Neon for stringops. */

3545

- 8, /* Maximum insns to inline memset. */

3546

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3547

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3548

+ tune_params::PREF_CONST_POOL_TRUE,

3549

+ tune_params::PREF_LDRD_FALSE,

3550

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3551

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3552

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3553

+ tune_params::PREF_NEON_64_FALSE,

3554

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3555

+ tune_params::FUSE_NOTHING,

3556

+ tune_params::SCHED_AUTOPREF_OFF

3557

};

3558

3559

const struct tune_params arm_fastmul_tune =

3560

{

3561

arm_fastmul_rtx_costs,

3562

- NULL,

3563

- NULL, /* Sched adj cost. */

3564

+ NULL, /* Insn extra costs. */

3565

+ NULL, /* Sched adj cost. */

3566

+ arm_default_branch_cost,

3567

+ &arm_default_vec_cost,

3568

1, /* Constant limit. */

3569

5, /* Max cond insns. */

3570

+ 8, /* Memset max inline. */

3571

+ 1, /* Issue rate. */

3572

ARM_PREFETCH_NOT_BENEFICIAL,

3573

- true, /* Prefer constant pool. */

3574

- arm_default_branch_cost,

3575

- false, /* Prefer LDRD/STRD. */

3576

- {true, true}, /* Prefer non short circuit. */

3577

- &arm_default_vec_cost, /* Vectorizer costs. */

3578

- false, /* Prefer Neon for 64-bits bitops. */

3579

- false, false, /* Prefer 32-bit encodings. */

3580

- false, /* Prefer Neon for stringops. */

3581

- 8, /* Maximum insns to inline memset. */

3582

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3583

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3584

+ tune_params::PREF_CONST_POOL_TRUE,

3585

+ tune_params::PREF_LDRD_FALSE,

3586

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3587

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3588

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3589

+ tune_params::PREF_NEON_64_FALSE,

3590

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3591

+ tune_params::FUSE_NOTHING,

3592

+ tune_params::SCHED_AUTOPREF_OFF

3593

};

3594

3595

/* StrongARM has early execution of branches, so a sequence that is worth

3596

@@ -1729,233 +1746,279 @@ const struct tune_params arm_fastmul_tune =

3597

const struct tune_params arm_strongarm_tune =

3598

{

3599

arm_fastmul_rtx_costs,

3600

- NULL,

3601

- NULL, /* Sched adj cost. */

3602

+ NULL, /* Insn extra costs. */

3603

+ NULL, /* Sched adj cost. */

3604

+ arm_default_branch_cost,

3605

+ &arm_default_vec_cost,

3606

1, /* Constant limit. */

3607

3, /* Max cond insns. */

3608

+ 8, /* Memset max inline. */

3609

+ 1, /* Issue rate. */

3610

ARM_PREFETCH_NOT_BENEFICIAL,

3611

- true, /* Prefer constant pool. */

3612

- arm_default_branch_cost,

3613

- false, /* Prefer LDRD/STRD. */

3614

- {true, true}, /* Prefer non short circuit. */

3615

- &arm_default_vec_cost, /* Vectorizer costs. */

3616

- false, /* Prefer Neon for 64-bits bitops. */

3617

- false, false, /* Prefer 32-bit encodings. */

3618

- false, /* Prefer Neon for stringops. */

3619

- 8, /* Maximum insns to inline memset. */

3620

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3621

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3622

+ tune_params::PREF_CONST_POOL_TRUE,

3623

+ tune_params::PREF_LDRD_FALSE,

3624

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3625

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3626

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3627

+ tune_params::PREF_NEON_64_FALSE,

3628

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3629

+ tune_params::FUSE_NOTHING,

3630

+ tune_params::SCHED_AUTOPREF_OFF

3631

};

3632

3633

const struct tune_params arm_xscale_tune =

3634

{

3635

arm_xscale_rtx_costs,

3636

- NULL,

3637

+ NULL, /* Insn extra costs. */

3638

xscale_sched_adjust_cost,

3639

+ arm_default_branch_cost,

3640

+ &arm_default_vec_cost,

3641

2, /* Constant limit. */

3642

3, /* Max cond insns. */

3643

+ 8, /* Memset max inline. */

3644

+ 1, /* Issue rate. */

3645

ARM_PREFETCH_NOT_BENEFICIAL,

3646

- true, /* Prefer constant pool. */

3647

- arm_default_branch_cost,

3648

- false, /* Prefer LDRD/STRD. */

3649

- {true, true}, /* Prefer non short circuit. */

3650

- &arm_default_vec_cost, /* Vectorizer costs. */

3651

- false, /* Prefer Neon for 64-bits bitops. */

3652

- false, false, /* Prefer 32-bit encodings. */

3653

- false, /* Prefer Neon for stringops. */

3654

- 8, /* Maximum insns to inline memset. */

3655

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3656

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3657

+ tune_params::PREF_CONST_POOL_TRUE,

3658

+ tune_params::PREF_LDRD_FALSE,

3659

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3660

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3661

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3662

+ tune_params::PREF_NEON_64_FALSE,

3663

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3664

+ tune_params::FUSE_NOTHING,

3665

+ tune_params::SCHED_AUTOPREF_OFF

3666

};

3667

3668

const struct tune_params arm_9e_tune =

3669

{

3670

arm_9e_rtx_costs,

3671

- NULL,

3672

- NULL, /* Sched adj cost. */

3673

+ NULL, /* Insn extra costs. */

3674

+ NULL, /* Sched adj cost. */

3675

+ arm_default_branch_cost,

3676

+ &arm_default_vec_cost,

3677

1, /* Constant limit. */

3678

5, /* Max cond insns. */

3679

+ 8, /* Memset max inline. */

3680

+ 1, /* Issue rate. */

3681

ARM_PREFETCH_NOT_BENEFICIAL,

3682

- true, /* Prefer constant pool. */

3683

+ tune_params::PREF_CONST_POOL_TRUE,

3684

+ tune_params::PREF_LDRD_FALSE,

3685

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3686

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3687

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3688

+ tune_params::PREF_NEON_64_FALSE,

3689

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3690

+ tune_params::FUSE_NOTHING,

3691

+ tune_params::SCHED_AUTOPREF_OFF

3692

+};

3693

+

3694

+const struct tune_params arm_marvell_pj4_tune =

3695

+{

3696

+ arm_9e_rtx_costs,

3697

+ NULL, /* Insn extra costs. */

3698

+ NULL, /* Sched adj cost. */

3699

arm_default_branch_cost,

3700

- false, /* Prefer LDRD/STRD. */

3701

- {true, true}, /* Prefer non short circuit. */

3702

- &arm_default_vec_cost, /* Vectorizer costs. */

3703

- false, /* Prefer Neon for 64-bits bitops. */

3704

- false, false, /* Prefer 32-bit encodings. */

3705

- false, /* Prefer Neon for stringops. */

3706

- 8, /* Maximum insns to inline memset. */

3707

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3708

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3709

+ &arm_default_vec_cost,

3710

+ 1, /* Constant limit. */

3711

+ 5, /* Max cond insns. */

3712

+ 8, /* Memset max inline. */

3713

+ 2, /* Issue rate. */

3714

+ ARM_PREFETCH_NOT_BENEFICIAL,

3715

+ tune_params::PREF_CONST_POOL_TRUE,

3716

+ tune_params::PREF_LDRD_FALSE,

3717

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3718

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3719

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3720

+ tune_params::PREF_NEON_64_FALSE,

3721

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3722

+ tune_params::FUSE_NOTHING,

3723

+ tune_params::SCHED_AUTOPREF_OFF

3724

};

3725

3726

const struct tune_params arm_v6t2_tune =

3727

{

3728

arm_9e_rtx_costs,

3729

- NULL,

3730

- NULL, /* Sched adj cost. */

3731

+ NULL, /* Insn extra costs. */

3732

+ NULL, /* Sched adj cost. */

3733

+ arm_default_branch_cost,

3734

+ &arm_default_vec_cost,

3735

1, /* Constant limit. */

3736

5, /* Max cond insns. */

3737

+ 8, /* Memset max inline. */

3738

+ 1, /* Issue rate. */

3739

ARM_PREFETCH_NOT_BENEFICIAL,

3740

- false, /* Prefer constant pool. */

3741

- arm_default_branch_cost,

3742

- false, /* Prefer LDRD/STRD. */

3743

- {true, true}, /* Prefer non short circuit. */

3744

- &arm_default_vec_cost, /* Vectorizer costs. */

3745

- false, /* Prefer Neon for 64-bits bitops. */

3746

- false, false, /* Prefer 32-bit encodings. */

3747

- false, /* Prefer Neon for stringops. */

3748

- 8, /* Maximum insns to inline memset. */

3749

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3750

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3751

+ tune_params::PREF_CONST_POOL_FALSE,

3752

+ tune_params::PREF_LDRD_FALSE,

3753

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3754

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3755

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3756

+ tune_params::PREF_NEON_64_FALSE,

3757

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3758

+ tune_params::FUSE_NOTHING,

3759

+ tune_params::SCHED_AUTOPREF_OFF

3760

};

3761

3762

+

3763

/* Generic Cortex tuning. Use more specific tunings if appropriate. */

3764

const struct tune_params arm_cortex_tune =

3765

{

3766

arm_9e_rtx_costs,

3767

&generic_extra_costs,

3768

- NULL, /* Sched adj cost. */

3769

+ NULL, /* Sched adj cost. */

3770

+ arm_default_branch_cost,

3771

+ &arm_default_vec_cost,

3772

1, /* Constant limit. */

3773

5, /* Max cond insns. */

3774

+ 8, /* Memset max inline. */

3775

+ 2, /* Issue rate. */

3776

ARM_PREFETCH_NOT_BENEFICIAL,

3777

- false, /* Prefer constant pool. */

3778

- arm_default_branch_cost,

3779

- false, /* Prefer LDRD/STRD. */

3780

- {true, true}, /* Prefer non short circuit. */

3781

- &arm_default_vec_cost, /* Vectorizer costs. */

3782

- false, /* Prefer Neon for 64-bits bitops. */

3783

- false, false, /* Prefer 32-bit encodings. */

3784

- false, /* Prefer Neon for stringops. */

3785

- 8, /* Maximum insns to inline memset. */

3786

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3787

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3788

+ tune_params::PREF_CONST_POOL_FALSE,

3789

+ tune_params::PREF_LDRD_FALSE,

3790

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3791

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3792

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3793

+ tune_params::PREF_NEON_64_FALSE,

3794

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

3795

+ tune_params::FUSE_NOTHING,

3796

+ tune_params::SCHED_AUTOPREF_OFF

3797

};

3798

3799

const struct tune_params arm_cortex_a8_tune =

3800

{

3801

arm_9e_rtx_costs,

3802

&cortexa8_extra_costs,

3803

- NULL, /* Sched adj cost. */

3804

+ NULL, /* Sched adj cost. */

3805

+ arm_default_branch_cost,

3806

+ &arm_default_vec_cost,

3807

1, /* Constant limit. */

3808

5, /* Max cond insns. */

3809

+ 8, /* Memset max inline. */

3810

+ 2, /* Issue rate. */

3811

ARM_PREFETCH_NOT_BENEFICIAL,

3812

- false, /* Prefer constant pool. */

3813

- arm_default_branch_cost,

3814

- false, /* Prefer LDRD/STRD. */

3815

- {true, true}, /* Prefer non short circuit. */

3816

- &arm_default_vec_cost, /* Vectorizer costs. */

3817

- false, /* Prefer Neon for 64-bits bitops. */

3818

- false, false, /* Prefer 32-bit encodings. */

3819

- true, /* Prefer Neon for stringops. */

3820

- 8, /* Maximum insns to inline memset. */

3821

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3822

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3823

+ tune_params::PREF_CONST_POOL_FALSE,

3824

+ tune_params::PREF_LDRD_FALSE,

3825

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3826

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3827

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3828

+ tune_params::PREF_NEON_64_FALSE,

3829

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

3830

+ tune_params::FUSE_NOTHING,

3831

+ tune_params::SCHED_AUTOPREF_OFF

3832

};

3833

3834

const struct tune_params arm_cortex_a7_tune =

3835

{

3836

arm_9e_rtx_costs,

3837

&cortexa7_extra_costs,

3838

- NULL,

3839

+ NULL, /* Sched adj cost. */

3840

+ arm_default_branch_cost,

3841

+ &arm_default_vec_cost,

3842

1, /* Constant limit. */

3843

5, /* Max cond insns. */

3844

+ 8, /* Memset max inline. */

3845

+ 2, /* Issue rate. */

3846

ARM_PREFETCH_NOT_BENEFICIAL,

3847

- false, /* Prefer constant pool. */

3848

- arm_default_branch_cost,

3849

- false, /* Prefer LDRD/STRD. */

3850

- {true, true}, /* Prefer non short circuit. */

3851

- &arm_default_vec_cost, /* Vectorizer costs. */

3852

- false, /* Prefer Neon for 64-bits bitops. */

3853

- false, false, /* Prefer 32-bit encodings. */

3854

- true, /* Prefer Neon for stringops. */

3855

- 8, /* Maximum insns to inline memset. */

3856

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3857

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3858

+ tune_params::PREF_CONST_POOL_FALSE,

3859

+ tune_params::PREF_LDRD_FALSE,

3860

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3861

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3862

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3863

+ tune_params::PREF_NEON_64_FALSE,

3864

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

3865

+ tune_params::FUSE_NOTHING,

3866

+ tune_params::SCHED_AUTOPREF_OFF

3867

};

3868

3869

const struct tune_params arm_cortex_a15_tune =

3870

{

3871

arm_9e_rtx_costs,

3872

&cortexa15_extra_costs,

3873

- NULL, /* Sched adj cost. */

3874

+ NULL, /* Sched adj cost. */

3875

+ arm_default_branch_cost,

3876

+ &arm_default_vec_cost,

3877

1, /* Constant limit. */

3878

2, /* Max cond insns. */

3879

+ 8, /* Memset max inline. */

3880

+ 3, /* Issue rate. */

3881

ARM_PREFETCH_NOT_BENEFICIAL,

3882

- false, /* Prefer constant pool. */

3883

- arm_default_branch_cost,

3884

- true, /* Prefer LDRD/STRD. */

3885

- {true, true}, /* Prefer non short circuit. */

3886

- &arm_default_vec_cost, /* Vectorizer costs. */

3887

- false, /* Prefer Neon for 64-bits bitops. */

3888

- true, true, /* Prefer 32-bit encodings. */

3889

- true, /* Prefer Neon for stringops. */

3890

- 8, /* Maximum insns to inline memset. */

3891

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3892

- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */

3893

+ tune_params::PREF_CONST_POOL_FALSE,

3894

+ tune_params::PREF_LDRD_TRUE,

3895

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3896

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3897

+ tune_params::DISPARAGE_FLAGS_ALL,

3898

+ tune_params::PREF_NEON_64_FALSE,

3899

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

3900

+ tune_params::FUSE_NOTHING,

3901

+ tune_params::SCHED_AUTOPREF_FULL

3902

};

3903

3904

const struct tune_params arm_cortex_a53_tune =

3905

{

3906

arm_9e_rtx_costs,

3907

&cortexa53_extra_costs,

3908

- NULL, /* Scheduler cost adjustment. */

3909

+ NULL, /* Sched adj cost. */

3910

+ arm_default_branch_cost,

3911

+ &arm_default_vec_cost,

3912

1, /* Constant limit. */

3913

5, /* Max cond insns. */

3914

+ 8, /* Memset max inline. */

3915

+ 2, /* Issue rate. */

3916

ARM_PREFETCH_NOT_BENEFICIAL,

3917

- false, /* Prefer constant pool. */

3918

- arm_default_branch_cost,

3919

- false, /* Prefer LDRD/STRD. */

3920

- {true, true}, /* Prefer non short circuit. */

3921

- &arm_default_vec_cost, /* Vectorizer costs. */

3922

- false, /* Prefer Neon for 64-bits bitops. */

3923

- false, false, /* Prefer 32-bit encodings. */

3924

- true, /* Prefer Neon for stringops. */

3925

- 8, /* Maximum insns to inline memset. */

3926

- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */

3927

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3928

+ tune_params::PREF_CONST_POOL_FALSE,

3929

+ tune_params::PREF_LDRD_FALSE,

3930

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3931

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3932

+ tune_params::DISPARAGE_FLAGS_NEITHER,

3933

+ tune_params::PREF_NEON_64_FALSE,

3934

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

3935

+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),

3936

+ tune_params::SCHED_AUTOPREF_OFF

3937

};

3938

3939

const struct tune_params arm_cortex_a57_tune =

3940

{

3941

arm_9e_rtx_costs,

3942

&cortexa57_extra_costs,

3943

- NULL, /* Scheduler cost adjustment. */

3944

- 1, /* Constant limit. */

3945

- 2, /* Max cond insns. */

3946

- ARM_PREFETCH_NOT_BENEFICIAL,

3947

- false, /* Prefer constant pool. */

3948

+ NULL, /* Sched adj cost. */

3949

arm_default_branch_cost,

3950

- true, /* Prefer LDRD/STRD. */

3951

- {true, true}, /* Prefer non short circuit. */

3952

- &arm_default_vec_cost, /* Vectorizer costs. */

3953

- false, /* Prefer Neon for 64-bits bitops. */

3954

- true, true, /* Prefer 32-bit encodings. */

3955

- true, /* Prefer Neon for stringops. */

3956

- 8, /* Maximum insns to inline memset. */

3957

- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */

3958

- ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */

3959

+ &arm_default_vec_cost,

3960

+ 1, /* Constant limit. */

3961

+ 2, /* Max cond insns. */

3962

+ 8, /* Memset max inline. */

3963

+ 3, /* Issue rate. */

3964

+ ARM_PREFETCH_NOT_BENEFICIAL,

3965

+ tune_params::PREF_CONST_POOL_FALSE,

3966

+ tune_params::PREF_LDRD_TRUE,

3967

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

3968

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

3969

+ tune_params::DISPARAGE_FLAGS_ALL,

3970

+ tune_params::PREF_NEON_64_FALSE,

3971

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

3972

+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),

3973

+ tune_params::SCHED_AUTOPREF_FULL

3974

};

3975

3976

const struct tune_params arm_xgene1_tune =

3977

{

3978

arm_9e_rtx_costs,

3979

&xgene1_extra_costs,

3980

- NULL, /* Scheduler cost adjustment. */

3981

- 1, /* Constant limit. */

3982

- 2, /* Max cond insns. */

3983

- ARM_PREFETCH_NOT_BENEFICIAL,

3984

- false, /* Prefer constant pool. */

3985

+ NULL, /* Sched adj cost. */

3986

arm_default_branch_cost,

3987

- true, /* Prefer LDRD/STRD. */

3988

- {true, true}, /* Prefer non short circuit. */

3989

- &arm_default_vec_cost, /* Vectorizer costs. */

3990

- false, /* Prefer Neon for 64-bits bitops. */

3991

- true, true, /* Prefer 32-bit encodings. */

3992

- false, /* Prefer Neon for stringops. */

3993

- 32, /* Maximum insns to inline memset. */

3994

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

3995

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

3996

+ &arm_default_vec_cost,

3997

+ 1, /* Constant limit. */

3998

+ 2, /* Max cond insns. */

3999

+ 32, /* Memset max inline. */

4000

+ 4, /* Issue rate. */

4001

+ ARM_PREFETCH_NOT_BENEFICIAL,

4002

+ tune_params::PREF_CONST_POOL_FALSE,

4003

+ tune_params::PREF_LDRD_TRUE,

4004

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

4005

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

4006

+ tune_params::DISPARAGE_FLAGS_ALL,

4007

+ tune_params::PREF_NEON_64_FALSE,

4008

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4009

+ tune_params::FUSE_NOTHING,

4010

+ tune_params::SCHED_AUTOPREF_OFF

4011

};

4012

4013

/* Branches can be dual-issued on Cortex-A5, so conditional execution is

4014

@@ -1965,21 +2028,23 @@ const struct tune_params arm_cortex_a5_tune =

4015

{

4016

arm_9e_rtx_costs,

4017

&cortexa5_extra_costs,

4018

- NULL, /* Sched adj cost. */

4019

+ NULL, /* Sched adj cost. */

4020

+ arm_cortex_a5_branch_cost,

4021

+ &arm_default_vec_cost,

4022

1, /* Constant limit. */

4023

1, /* Max cond insns. */

4024

+ 8, /* Memset max inline. */

4025

+ 2, /* Issue rate. */

4026

ARM_PREFETCH_NOT_BENEFICIAL,

4027

- false, /* Prefer constant pool. */

4028

- arm_cortex_a5_branch_cost,

4029

- false, /* Prefer LDRD/STRD. */

4030

- {false, false}, /* Prefer non short circuit. */

4031

- &arm_default_vec_cost, /* Vectorizer costs. */

4032

- false, /* Prefer Neon for 64-bits bitops. */

4033

- false, false, /* Prefer 32-bit encodings. */

4034

- true, /* Prefer Neon for stringops. */

4035

- 8, /* Maximum insns to inline memset. */

4036

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4037

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4038

+ tune_params::PREF_CONST_POOL_FALSE,

4039

+ tune_params::PREF_LDRD_FALSE,

4040

+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */

4041

+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */

4042

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4043

+ tune_params::PREF_NEON_64_FALSE,

4044

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

4045

+ tune_params::FUSE_NOTHING,

4046

+ tune_params::SCHED_AUTOPREF_OFF

4047

};

4048

4049

const struct tune_params arm_cortex_a9_tune =

4050

@@ -1987,41 +2052,45 @@ const struct tune_params arm_cortex_a9_tune =

4051

arm_9e_rtx_costs,

4052

&cortexa9_extra_costs,

4053

cortex_a9_sched_adjust_cost,

4054

+ arm_default_branch_cost,

4055

+ &arm_default_vec_cost,

4056

1, /* Constant limit. */

4057

5, /* Max cond insns. */

4058

+ 8, /* Memset max inline. */

4059

+ 2, /* Issue rate. */

4060

ARM_PREFETCH_BENEFICIAL(4,32,32),

4061

- false, /* Prefer constant pool. */

4062

- arm_default_branch_cost,

4063

- false, /* Prefer LDRD/STRD. */

4064

- {true, true}, /* Prefer non short circuit. */

4065

- &arm_default_vec_cost, /* Vectorizer costs. */

4066

- false, /* Prefer Neon for 64-bits bitops. */

4067

- false, false, /* Prefer 32-bit encodings. */

4068

- false, /* Prefer Neon for stringops. */

4069

- 8, /* Maximum insns to inline memset. */

4070

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4071

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4072

+ tune_params::PREF_CONST_POOL_FALSE,

4073

+ tune_params::PREF_LDRD_FALSE,

4074

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

4075

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

4076

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4077

+ tune_params::PREF_NEON_64_FALSE,

4078

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4079

+ tune_params::FUSE_NOTHING,

4080

+ tune_params::SCHED_AUTOPREF_OFF

4081

};

4082

4083

const struct tune_params arm_cortex_a12_tune =

4084

{

4085

arm_9e_rtx_costs,

4086

&cortexa12_extra_costs,

4087

- NULL, /* Sched adj cost. */

4088

+ NULL, /* Sched adj cost. */

4089

+ arm_default_branch_cost,

4090

+ &arm_default_vec_cost, /* Vectorizer costs. */

4091

1, /* Constant limit. */

4092

2, /* Max cond insns. */

4093

+ 8, /* Memset max inline. */

4094

+ 2, /* Issue rate. */

4095

ARM_PREFETCH_NOT_BENEFICIAL,

4096

- false, /* Prefer constant pool. */

4097

- arm_default_branch_cost,

4098

- true, /* Prefer LDRD/STRD. */

4099

- {true, true}, /* Prefer non short circuit. */

4100

- &arm_default_vec_cost, /* Vectorizer costs. */

4101

- false, /* Prefer Neon for 64-bits bitops. */

4102

- true, true, /* Prefer 32-bit encodings. */

4103

- true, /* Prefer Neon for stringops. */

4104

- 8, /* Maximum insns to inline memset. */

4105

- ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */

4106

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4107

+ tune_params::PREF_CONST_POOL_FALSE,

4108

+ tune_params::PREF_LDRD_TRUE,

4109

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

4110

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

4111

+ tune_params::DISPARAGE_FLAGS_ALL,

4112

+ tune_params::PREF_NEON_64_FALSE,

4113

+ tune_params::PREF_NEON_STRINGOPS_TRUE,

4114

+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT),

4115

+ tune_params::SCHED_AUTOPREF_OFF

4116

};

4117

4118

/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single

4119

@@ -2035,21 +2104,23 @@ const struct tune_params arm_v7m_tune =

4120

{

4121

arm_9e_rtx_costs,

4122

&v7m_extra_costs,

4123

- NULL, /* Sched adj cost. */

4124

+ NULL, /* Sched adj cost. */

4125

+ arm_cortex_m_branch_cost,

4126

+ &arm_default_vec_cost,

4127

1, /* Constant limit. */

4128

2, /* Max cond insns. */

4129

+ 8, /* Memset max inline. */

4130

+ 1, /* Issue rate. */

4131

ARM_PREFETCH_NOT_BENEFICIAL,

4132

- true, /* Prefer constant pool. */

4133

- arm_cortex_m_branch_cost,

4134

- false, /* Prefer LDRD/STRD. */

4135

- {false, false}, /* Prefer non short circuit. */

4136

- &arm_default_vec_cost, /* Vectorizer costs. */

4137

- false, /* Prefer Neon for 64-bits bitops. */

4138

- false, false, /* Prefer 32-bit encodings. */

4139

- false, /* Prefer Neon for stringops. */

4140

- 8, /* Maximum insns to inline memset. */

4141

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4142

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4143

+ tune_params::PREF_CONST_POOL_TRUE,

4144

+ tune_params::PREF_LDRD_FALSE,

4145

+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */

4146

+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */

4147

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4148

+ tune_params::PREF_NEON_64_FALSE,

4149

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4150

+ tune_params::FUSE_NOTHING,

4151

+ tune_params::SCHED_AUTOPREF_OFF

4152

};

4153

4154

/* Cortex-M7 tuning. */

4155

@@ -2058,21 +2129,23 @@ const struct tune_params arm_cortex_m7_tune =

4156

{

4157

arm_9e_rtx_costs,

4158

&v7m_extra_costs,

4159

- NULL, /* Sched adj cost. */

4160

+ NULL, /* Sched adj cost. */

4161

+ arm_cortex_m7_branch_cost,

4162

+ &arm_default_vec_cost,

4163

0, /* Constant limit. */

4164

1, /* Max cond insns. */

4165

+ 8, /* Memset max inline. */

4166

+ 2, /* Issue rate. */

4167

ARM_PREFETCH_NOT_BENEFICIAL,

4168

- true, /* Prefer constant pool. */

4169

- arm_cortex_m7_branch_cost,

4170

- false, /* Prefer LDRD/STRD. */

4171

- {true, true}, /* Prefer non short circuit. */

4172

- &arm_default_vec_cost, /* Vectorizer costs. */

4173

- false, /* Prefer Neon for 64-bits bitops. */

4174

- false, false, /* Prefer 32-bit encodings. */

4175

- false, /* Prefer Neon for stringops. */

4176

- 8, /* Maximum insns to inline memset. */

4177

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4178

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4179

+ tune_params::PREF_CONST_POOL_TRUE,

4180

+ tune_params::PREF_LDRD_FALSE,

4181

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

4182

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

4183

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4184

+ tune_params::PREF_NEON_64_FALSE,

4185

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4186

+ tune_params::FUSE_NOTHING,

4187

+ tune_params::SCHED_AUTOPREF_OFF

4188

};

4189

4190

/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than

4191

@@ -2080,43 +2153,47 @@ const struct tune_params arm_cortex_m7_tune =

4192

const struct tune_params arm_v6m_tune =

4193

{

4194

arm_9e_rtx_costs,

4195

- NULL,

4196

- NULL, /* Sched adj cost. */

4197

+ NULL, /* Insn extra costs. */

4198

+ NULL, /* Sched adj cost. */

4199

+ arm_default_branch_cost,

4200

+ &arm_default_vec_cost, /* Vectorizer costs. */

4201

1, /* Constant limit. */

4202

5, /* Max cond insns. */

4203

+ 8, /* Memset max inline. */

4204

+ 1, /* Issue rate. */

4205

ARM_PREFETCH_NOT_BENEFICIAL,

4206

- false, /* Prefer constant pool. */

4207

- arm_default_branch_cost,

4208

- false, /* Prefer LDRD/STRD. */

4209

- {false, false}, /* Prefer non short circuit. */

4210

- &arm_default_vec_cost, /* Vectorizer costs. */

4211

- false, /* Prefer Neon for 64-bits bitops. */

4212

- false, false, /* Prefer 32-bit encodings. */

4213

- false, /* Prefer Neon for stringops. */

4214

- 8, /* Maximum insns to inline memset. */

4215

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4216

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4217

+ tune_params::PREF_CONST_POOL_FALSE,

4218

+ tune_params::PREF_LDRD_FALSE,

4219

+ tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */

4220

+ tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */

4221

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4222

+ tune_params::PREF_NEON_64_FALSE,

4223

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4224

+ tune_params::FUSE_NOTHING,

4225

+ tune_params::SCHED_AUTOPREF_OFF

4226

};

4227

4228

const struct tune_params arm_fa726te_tune =

4229

{

4230

arm_9e_rtx_costs,

4231

- NULL,

4232

+ NULL, /* Insn extra costs. */

4233

fa726te_sched_adjust_cost,

4234

+ arm_default_branch_cost,

4235

+ &arm_default_vec_cost,

4236

1, /* Constant limit. */

4237

5, /* Max cond insns. */

4238

+ 8, /* Memset max inline. */

4239

+ 2, /* Issue rate. */

4240

ARM_PREFETCH_NOT_BENEFICIAL,

4241

- true, /* Prefer constant pool. */

4242

- arm_default_branch_cost,

4243

- false, /* Prefer LDRD/STRD. */

4244

- {true, true}, /* Prefer non short circuit. */

4245

- &arm_default_vec_cost, /* Vectorizer costs. */

4246

- false, /* Prefer Neon for 64-bits bitops. */

4247

- false, false, /* Prefer 32-bit encodings. */

4248

- false, /* Prefer Neon for stringops. */

4249

- 8, /* Maximum insns to inline memset. */

4250

- ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */

4251

- ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */

4252

+ tune_params::PREF_CONST_POOL_TRUE,

4253

+ tune_params::PREF_LDRD_FALSE,

4254

+ tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */

4255

+ tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */

4256

+ tune_params::DISPARAGE_FLAGS_NEITHER,

4257

+ tune_params::PREF_NEON_64_FALSE,

4258

+ tune_params::PREF_NEON_STRINGOPS_FALSE,

4259

+ tune_params::FUSE_NOTHING,

4260

+ tune_params::SCHED_AUTOPREF_OFF

4261

};

4262

4263

4264

@@ -3140,31 +3217,33 @@ arm_option_override (void)

4265

&& abi_version_at_least(2))

4266

flag_strict_volatile_bitfields = 1;

4267

4268

- /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed

4269

- it beneficial (signified by setting num_prefetch_slots to 1 or more.) */

4270

+ /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we

4271

+ have deemed it beneficial (signified by setting

4272

+ prefetch.num_slots to 1 or more). */

4273

if (flag_prefetch_loop_arrays < 0

4274

&& HAVE_prefetch

4275

&& optimize >= 3

4276

- && current_tune->num_prefetch_slots > 0)

4277

+ && current_tune->prefetch.num_slots > 0)

4278

flag_prefetch_loop_arrays = 1;

4279

4280

- /* Set up parameters to be used in prefetching algorithm. Do not override the

4281

- defaults unless we are tuning for a core we have researched values for. */

4282

- if (current_tune->num_prefetch_slots > 0)

4283

+ /* Set up parameters to be used in prefetching algorithm. Do not

4284

+ override the defaults unless we are tuning for a core we have

4285

+ researched values for. */

4286

+ if (current_tune->prefetch.num_slots > 0)

4287

maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,

4288

- current_tune->num_prefetch_slots,

4289

- global_options.x_param_values,

4290

- global_options_set.x_param_values);

4291

- if (current_tune->l1_cache_line_size >= 0)

4292

+ current_tune->prefetch.num_slots,

4293

+ global_options.x_param_values,

4294

+ global_options_set.x_param_values);

4295

+ if (current_tune->prefetch.l1_cache_line_size >= 0)

4296

maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,

4297

- current_tune->l1_cache_line_size,

4298

- global_options.x_param_values,

4299

- global_options_set.x_param_values);

4300

- if (current_tune->l1_cache_size >= 0)

4301

+ current_tune->prefetch.l1_cache_line_size,

4302

+ global_options.x_param_values,

4303

+ global_options_set.x_param_values);

4304

+ if (current_tune->prefetch.l1_cache_size >= 0)

4305

maybe_set_param_value (PARAM_L1_CACHE_SIZE,

4306

- current_tune->l1_cache_size,

4307

- global_options.x_param_values,

4308

- global_options_set.x_param_values);

4309

+ current_tune->prefetch.l1_cache_size,

4310

+ global_options.x_param_values,

4311

+ global_options_set.x_param_values);

4312

4313

/* Use Neon to perform 64-bits operations rather than core

4314

registers. */

4315

@@ -3174,24 +3253,35 @@ arm_option_override (void)

4316

4317

/* Use the alternative scheduling-pressure algorithm by default. */

4318

maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,

4319

- global_options.x_param_values,

4320

- global_options_set.x_param_values);

4321

+ global_options.x_param_values,

4322

+ global_options_set.x_param_values);

4323

4324

/* Look through ready list and all of queue for instructions

4325

relevant for L2 auto-prefetcher. */

4326

int param_sched_autopref_queue_depth;

4327

- if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)

4328

- param_sched_autopref_queue_depth = -1;

4329

- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)

4330

- param_sched_autopref_queue_depth = 0;

4331

- else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)

4332

- param_sched_autopref_queue_depth = max_insn_queue_index + 1;

4333

- else

4334

- gcc_unreachable ();

4335

+

4336

+ switch (current_tune->sched_autopref)

4337

+ {

4338

+ case tune_params::SCHED_AUTOPREF_OFF:

4339

+ param_sched_autopref_queue_depth = -1;

4340

+ break;

4341

+

4342

+ case tune_params::SCHED_AUTOPREF_RANK:

4343

+ param_sched_autopref_queue_depth = 0;

4344

+ break;

4345

+

4346

+ case tune_params::SCHED_AUTOPREF_FULL:

4347

+ param_sched_autopref_queue_depth = max_insn_queue_index + 1;

4348

+ break;

4349

+

4350

+ default:

4351

+ gcc_unreachable ();

4352

+ }

4353

+

4354

maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,

4355

param_sched_autopref_queue_depth,

4356

- global_options.x_param_values,

4357

- global_options_set.x_param_values);

4358

+ global_options.x_param_values,

4359

+ global_options_set.x_param_values);

4360

4361

/* Disable shrink-wrap when optimizing function for size, since it tends to

4362

generate additional returns. */

4363

@@ -7946,236 +8036,6 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)

4364

return x;

4365

}

4366

4367

-bool

4368

-arm_legitimize_reload_address (rtx *p,

4369

- machine_mode mode,

4370

- int opnum, int type,

4371

- int ind_levels ATTRIBUTE_UNUSED)

4372

-{

4373

- /* We must recognize output that we have already generated ourselves. */

4374

- if (GET_CODE (*p) == PLUS

4375

- && GET_CODE (XEXP (*p, 0)) == PLUS

4376

- && REG_P (XEXP (XEXP (*p, 0), 0))

4377

- && CONST_INT_P (XEXP (XEXP (*p, 0), 1))

4378

- && CONST_INT_P (XEXP (*p, 1)))

4379

- {

4380

- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,

4381

- MODE_BASE_REG_CLASS (mode), GET_MODE (*p),

4382

- VOIDmode, 0, 0, opnum, (enum reload_type) type);

4383

- return true;

4384

- }

4385

-

4386

- if (GET_CODE (*p) == PLUS

4387

- && REG_P (XEXP (*p, 0))

4388

- && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))

4389

- /* If the base register is equivalent to a constant, let the generic

4390

- code handle it. Otherwise we will run into problems if a future

4391

- reload pass decides to rematerialize the constant. */

4392

- && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))

4393

- && CONST_INT_P (XEXP (*p, 1)))

4394

- {

4395

- HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));

4396

- HOST_WIDE_INT low, high;

4397

-

4398

- /* Detect coprocessor load/stores. */

4399

- bool coproc_p = ((TARGET_HARD_FLOAT

4400

- && TARGET_VFP

4401

- && (mode == SFmode || mode == DFmode))

4402

- || (TARGET_REALLY_IWMMXT

4403

- && VALID_IWMMXT_REG_MODE (mode))

4404

- || (TARGET_NEON

4405

- && (VALID_NEON_DREG_MODE (mode)

4406

- || VALID_NEON_QREG_MODE (mode))));

4407

-

4408

- /* For some conditions, bail out when lower two bits are unaligned. */

4409

- if ((val & 0x3) != 0

4410

- /* Coprocessor load/store indexes are 8-bits + '00' appended. */

4411

- && (coproc_p

4412

- /* For DI, and DF under soft-float: */

4413

- || ((mode == DImode || mode == DFmode)

4414

- /* Without ldrd, we use stm/ldm, which does not

4415

- fair well with unaligned bits. */

4416

- && (! TARGET_LDRD

4417

- /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */

4418

- || TARGET_THUMB2))))

4419

- return false;

4420

-

4421

- /* When breaking down a [reg+index] reload address into [(reg+high)+low],

4422

- of which the (reg+high) gets turned into a reload add insn,

4423

- we try to decompose the index into high/low values that can often

4424

- also lead to better reload CSE.

4425

- For example:

4426

- ldr r0, [r2, #4100] // Offset too large

4427

- ldr r1, [r2, #4104] // Offset too large

4428

-

4429

- is best reloaded as:

4430

- add t1, r2, #4096

4431

- ldr r0, [t1, #4]

4432

- add t2, r2, #4096

4433

- ldr r1, [t2, #8]

4434

-

4435

- which post-reload CSE can simplify in most cases to eliminate the

4436

- second add instruction:

4437

- add t1, r2, #4096

4438

- ldr r0, [t1, #4]

4439

- ldr r1, [t1, #8]

4440

-

4441

- The idea here is that we want to split out the bits of the constant

4442

- as a mask, rather than as subtracting the maximum offset that the

4443

- respective type of load/store used can handle.

4444

-

4445

- When encountering negative offsets, we can still utilize it even if

4446

- the overall offset is positive; sometimes this may lead to an immediate

4447

- that can be constructed with fewer instructions.

4448

- For example:

4449

- ldr r0, [r2, #0x3FFFFC]

4450

-

4451

- This is best reloaded as:

4452

- add t1, r2, #0x400000

4453

- ldr r0, [t1, #-4]

4454

-

4455

- The trick for spotting this for a load insn with N bits of offset

4456

- (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a

4457

- negative offset that is going to make bit N and all the bits below

4458

- it become zero in the remainder part.

4459

-

4460

- The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect

4461

- to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),

4462

- used in most cases of ARM load/store instructions. */

4463

-

4464

-#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \

4465

- (((VAL) & ((1 << (N)) - 1)) \

4466

- ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \

4467

- : 0)

4468

-

4469

- if (coproc_p)

4470

- {

4471

- low = SIGN_MAG_LOW_ADDR_BITS (val, 10);

4472

-

4473

- /* NEON quad-word load/stores are made of two double-word accesses,

4474

- so the valid index range is reduced by 8. Treat as 9-bit range if

4475

- we go over it. */

4476

- if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)

4477

- low = SIGN_MAG_LOW_ADDR_BITS (val, 9);

4478

- }

4479

- else if (GET_MODE_SIZE (mode) == 8)

4480

- {

4481

- if (TARGET_LDRD)

4482

- low = (TARGET_THUMB2

4483

- ? SIGN_MAG_LOW_ADDR_BITS (val, 10)

4484

- : SIGN_MAG_LOW_ADDR_BITS (val, 8));

4485

- else

4486

- /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)

4487

- to access doublewords. The supported load/store offsets are

4488

- -8, -4, and 4, which we try to produce here. */

4489

- low = ((val & 0xf) ^ 0x8) - 0x8;

4490

- }

4491

- else if (GET_MODE_SIZE (mode) < 8)

4492

- {

4493

- /* NEON element load/stores do not have an offset. */

4494

- if (TARGET_NEON_FP16 && mode == HFmode)

4495

- return false;

4496

-

4497

- if (TARGET_THUMB2)

4498

- {

4499

- /* Thumb-2 has an asymmetrical index range of (-256,4096).

4500

- Try the wider 12-bit range first, and re-try if the result

4501

- is out of range. */

4502

- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);

4503

- if (low < -255)

4504

- low = SIGN_MAG_LOW_ADDR_BITS (val, 8);

4505

- }

4506

- else

4507

- {

4508

- if (mode == HImode || mode == HFmode)

4509

- {

4510

- if (arm_arch4)

4511

- low = SIGN_MAG_LOW_ADDR_BITS (val, 8);

4512

- else

4513

- {

4514

- /* The storehi/movhi_bytes fallbacks can use only

4515

- [-4094,+4094] of the full ldrb/strb index range. */

4516

- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);

4517

- if (low == 4095 || low == -4095)

4518

- return false;

4519

- }

4520

- }

4521

- else

4522

- low = SIGN_MAG_LOW_ADDR_BITS (val, 12);

4523

- }

4524

- }

4525

- else

4526

- return false;

4527

-

4528

- high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)

4529

- ^ (unsigned HOST_WIDE_INT) 0x80000000)

4530

- - (unsigned HOST_WIDE_INT) 0x80000000);

4531

- /* Check for overflow or zero */

4532

- if (low == 0 || high == 0 || (high + low != val))

4533

- return false;

4534

-

4535

- /* Reload the high part into a base reg; leave the low part

4536

- in the mem.

4537

- Note that replacing this gen_rtx_PLUS with plus_constant is

4538

- wrong in this case because we rely on the

4539

- (plus (plus reg c1) c2) structure being preserved so that

4540

- XEXP (*p, 0) in push_reload below uses the correct term. */

4541

- *p = gen_rtx_PLUS (GET_MODE (*p),

4542

- gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),

4543

- GEN_INT (high)),

4544

- GEN_INT (low));

4545

- push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,

4546

- MODE_BASE_REG_CLASS (mode), GET_MODE (*p),

4547

- VOIDmode, 0, 0, opnum, (enum reload_type) type);

4548

- return true;

4549

- }

4550

-

4551

- return false;

4552

-}

4553

-

4554

-rtx

4555

-thumb_legitimize_reload_address (rtx *x_p,

4556

- machine_mode mode,

4557

- int opnum, int type,

4558

- int ind_levels ATTRIBUTE_UNUSED)

4559

-{

4560

- rtx x = *x_p;

4561

-

4562

- if (GET_CODE (x) == PLUS

4563

- && GET_MODE_SIZE (mode) < 4

4564

- && REG_P (XEXP (x, 0))

4565

- && XEXP (x, 0) == stack_pointer_rtx

4566

- && CONST_INT_P (XEXP (x, 1))

4567

- && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))

4568

- {

4569

- rtx orig_x = x;

4570

-

4571

- x = copy_rtx (x);

4572

- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),

4573

- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);

4574

- return x;

4575

- }

4576

-

4577

- /* If both registers are hi-regs, then it's better to reload the

4578

- entire expression rather than each register individually. That

4579

- only requires one reload register rather than two. */

4580

- if (GET_CODE (x) == PLUS

4581

- && REG_P (XEXP (x, 0))

4582

- && REG_P (XEXP (x, 1))

4583

- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)

4584

- && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))

4585

- {

4586

- rtx orig_x = x;

4587

-

4588

- x = copy_rtx (x);

4589

- push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),

4590

- Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);

4591

- return x;

4592

- }

4593

-

4594

- return NULL;

4595

-}

4596

-

4597

/* Return TRUE if X contains any TLS symbol references. */

4598

4599

bool

4600

@@ -9399,7 +9259,8 @@ static bool

4601

arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)

4602

{

4603

const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;

4604

- gcc_assert (GET_CODE (x) == UNSPEC);

4605

+ rtx_code code = GET_CODE (x);

4606

+ gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);

4607

4608

switch (XINT (x, 1))

4609

{

4610

@@ -9445,7 +9306,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)

4611

*cost = COSTS_N_INSNS (2);

4612

break;

4613

}

4614

- return false;

4615

+ return true;

4616

}

4617

4618

/* Cost of a libcall. We assume one insn per argument, an amount for the

4619

@@ -11008,6 +10869,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

4620

*cost = LIBCALL_COST (1);

4621

return false;

4622

4623

+ case UNSPEC_VOLATILE:

4624

case UNSPEC:

4625

return arm_unspec_cost (x, outer_code, speed_p, cost);

4626

4627

@@ -17287,14 +17149,16 @@ thumb2_reorg (void)

4628

4629

FOR_EACH_BB_FN (bb, cfun)

4630

{

4631

- if (current_tune->disparage_flag_setting_t16_encodings

4632

+ if ((current_tune->disparage_flag_setting_t16_encodings

4633

+ == tune_params::DISPARAGE_FLAGS_ALL)

4634

&& optimize_bb_for_speed_p (bb))

4635

continue;

4636

4637

rtx_insn *insn;

4638

Convert_Action action = SKIP;

4639

Convert_Action action_for_partial_flag_setting

4640

- = (current_tune->disparage_partial_flag_setting_t16_encodings

4641

+ = ((current_tune->disparage_flag_setting_t16_encodings

4642

+ != tune_params::DISPARAGE_FLAGS_NEITHER)

4643

&& optimize_bb_for_speed_p (bb))

4644

? SKIP : CONV;

4645

4646

@@ -25660,12 +25524,12 @@ arm_print_tune_info (void)

4647

current_tune->constant_limit);

4648

asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",

4649

current_tune->max_insns_skipped);

4650

- asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",

4651

- current_tune->num_prefetch_slots);

4652

- asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",

4653

- current_tune->l1_cache_size);

4654

- asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",

4655

- current_tune->l1_cache_line_size);

4656

+ asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",

4657

+ current_tune->prefetch.num_slots);

4658

+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",

4659

+ current_tune->prefetch.l1_cache_size);

4660

+ asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",

4661

+ current_tune->prefetch.l1_cache_line_size);

4662

asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",

4663

(int) current_tune->prefer_constant_pool);

4664

asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");

4665

@@ -25681,17 +25545,13 @@ arm_print_tune_info (void)

4666

asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",

4667

(int) current_tune->prefer_ldrd_strd);

4668

asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",

4669

- (int) current_tune->logical_op_non_short_circuit[0],

4670

- (int) current_tune->logical_op_non_short_circuit[1]);

4671

+ (int) current_tune->logical_op_non_short_circuit_thumb,

4672

+ (int) current_tune->logical_op_non_short_circuit_arm);

4673

asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",

4674

(int) current_tune->prefer_neon_for_64bits);

4675

asm_fprintf (asm_out_file,

4676

"\t\t@disparage_flag_setting_t16_encodings:\t%d\n",

4677

(int) current_tune->disparage_flag_setting_t16_encodings);

4678

- asm_fprintf (asm_out_file,

4679

- "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",

4680

- (int) current_tune

4681

- ->disparage_partial_flag_setting_t16_encodings);

4682

asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",

4683

(int) current_tune->string_ops_prefer_neon);

4684

asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",

4685

@@ -27213,40 +27073,12 @@ thumb2_output_casesi (rtx *operands)

4686

}

4687

}

4688

4689

-/* Most ARM cores are single issue, but some newer ones can dual issue.

4690

- The scheduler descriptions rely on this being correct. */

4691

+/* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the

4692

+ per-core tuning structs. */

4693

static int

4694

arm_issue_rate (void)

4695

{

4696

- switch (arm_tune)

4697

- {

4698

- case xgene1:

4699

- return 4;

4700

-

4701

- case cortexa15:

4702

- case cortexa57:

4703

- case exynosm1:

4704

- return 3;

4705

-

4706

- case cortexm7:

4707

- case cortexr4:

4708

- case cortexr4f:

4709

- case cortexr5:

4710

- case genericv7a:

4711

- case cortexa5:

4712

- case cortexa7:

4713

- case cortexa8:

4714

- case cortexa9:

4715

- case cortexa12:

4716

- case cortexa17:

4717

- case cortexa53:

4718

- case fa726te:

4719

- case marvell_pj4:

4720

- return 2;

4721

-

4722

- default:

4723

- return 1;

4724

- }

4725

+ return current_tune->issue_rate;

4726

}

4727

4728

/* Return how many instructions should scheduler lookahead to choose the

4729

@@ -29411,7 +29243,7 @@ arm_gen_setmem (rtx *operands)

4730

static bool

4731

arm_macro_fusion_p (void)

4732

{

4733

- return current_tune->fuseable_ops != ARM_FUSE_NOTHING;

4734

+ return current_tune->fuseable_ops != tune_params::FUSE_NOTHING;

4735

}

4736

4737

4738

@@ -29432,44 +29264,44 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)

4739

if (!arm_macro_fusion_p ())

4740

return false;

4741

4742

- if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)

4743

+ if (current_tune->fuseable_ops & tune_params::FUSE_MOVW_MOVT)

4744

{

4745

/* We are trying to fuse

4746

- movw imm / movt imm

4747

- instructions as a group that gets scheduled together. */

4748

+ movw imm / movt imm

4749

+ instructions as a group that gets scheduled together. */

4750

4751

set_dest = SET_DEST (curr_set);

4752

4753

if (GET_MODE (set_dest) != SImode)

4754

- return false;

4755

+ return false;

4756

4757

/* We are trying to match:

4758

- prev (movw) == (set (reg r0) (const_int imm16))

4759

- curr (movt) == (set (zero_extract (reg r0)

4760

- (const_int 16)

4761

- (const_int 16))

4762

- (const_int imm16_1))

4763

- or

4764

- prev (movw) == (set (reg r1)

4765

- (high (symbol_ref ("SYM"))))

4766

- curr (movt) == (set (reg r0)

4767

- (lo_sum (reg r1)

4768

- (symbol_ref ("SYM")))) */

4769

+ prev (movw) == (set (reg r0) (const_int imm16))

4770

+ curr (movt) == (set (zero_extract (reg r0)

4771

+ (const_int 16)

4772

+ (const_int 16))

4773

+ (const_int imm16_1))

4774

+ or

4775

+ prev (movw) == (set (reg r1)

4776

+ (high (symbol_ref ("SYM"))))

4777

+ curr (movt) == (set (reg r0)

4778

+ (lo_sum (reg r1)

4779

+ (symbol_ref ("SYM")))) */

4780

if (GET_CODE (set_dest) == ZERO_EXTRACT)

4781

- {

4782

- if (CONST_INT_P (SET_SRC (curr_set))

4783

- && CONST_INT_P (SET_SRC (prev_set))

4784

- && REG_P (XEXP (set_dest, 0))

4785

- && REG_P (SET_DEST (prev_set))

4786

- && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))

4787

- return true;

4788

- }

4789

+ {

4790

+ if (CONST_INT_P (SET_SRC (curr_set))

4791

+ && CONST_INT_P (SET_SRC (prev_set))

4792

+ && REG_P (XEXP (set_dest, 0))

4793

+ && REG_P (SET_DEST (prev_set))

4794

+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))

4795

+ return true;

4796

+ }

4797

else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM

4798

- && REG_P (SET_DEST (curr_set))

4799

- && REG_P (SET_DEST (prev_set))

4800

- && GET_CODE (SET_SRC (prev_set)) == HIGH

4801

- && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))

4802

- return true;

4803

+ && REG_P (SET_DEST (curr_set))

4804

+ && REG_P (SET_DEST (prev_set))

4805

+ && GET_CODE (SET_SRC (prev_set)) == HIGH

4806

+ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))

4807

+ return true;

4808

}

4809

return false;

4810

}

4811

--- a/src/gcc/config/arm/arm.h

4812

+++ b/src/gcc/config/arm/arm.h

4813

@@ -1360,46 +1360,6 @@ enum reg_class

4814

? GENERAL_REGS : NO_REGS) \

4815

: THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))

4816

4817

-/* Try a machine-dependent way of reloading an illegitimate address

4818

- operand. If we find one, push the reload and jump to WIN. This

4819

- macro is used in only one place: `find_reloads_address' in reload.c.

4820

-

4821

- For the ARM, we wish to handle large displacements off a base

4822

- register by splitting the addend across a MOV and the mem insn.

4823

- This can cut the number of reloads needed. */

4824

-#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \

4825

- do \

4826

- { \

4827

- if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \

4828

- goto WIN; \

4829

- } \

4830

- while (0)

4831

-

4832

-/* XXX If an HImode FP+large_offset address is converted to an HImode

4833

- SP+large_offset address, then reload won't know how to fix it. It sees

4834

- only that SP isn't valid for HImode, and so reloads the SP into an index

4835

- register, but the resulting address is still invalid because the offset

4836

- is too big. We fix it here instead by reloading the entire address. */

4837

-/* We could probably achieve better results by defining PROMOTE_MODE to help

4838

- cope with the variances between the Thumb's signed and unsigned byte and

4839

- halfword load instructions. */

4840

-/* ??? This should be safe for thumb2, but we may be able to do better. */

4841

-#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \

4842

-do { \

4843

- rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \

4844

- if (new_x) \

4845

- { \

4846

- X = new_x; \

4847

- goto WIN; \

4848

- } \

4849

-} while (0)

4850

-

4851

-#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \

4852

- if (TARGET_ARM) \

4853

- ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \

4854

- else \

4855

- THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)

4856

-

4857

/* Return the maximum number of consecutive registers

4858

needed to represent mode MODE in a register of class CLASS.

4859

ARM regs are UNITS_PER_WORD bits.

4860

@@ -2096,10 +2056,11 @@ enum arm_auto_incmodes

4861

(current_tune->branch_cost (speed_p, predictable_p))

4862

4863

/* False if short circuit operation is preferred. */

4864

-#define LOGICAL_OP_NON_SHORT_CIRCUIT \

4865

- ((optimize_size) \

4866

- ? (TARGET_THUMB ? false : true) \

4867

- : (current_tune->logical_op_non_short_circuit[TARGET_ARM]))

4868

+#define LOGICAL_OP_NON_SHORT_CIRCUIT \

4869

+ ((optimize_size) \

4870

+ ? (TARGET_THUMB ? false : true) \

4871

+ : TARGET_THUMB ? static_cast<bool> (current_tune->logical_op_non_short_circuit_thumb) \

4872

+ : static_cast<bool> (current_tune->logical_op_non_short_circuit_arm))

4873

4874

4875

/* Position Independent Code. */

4876

--- a/src/gcc/config/arm/arm.md

4877

+++ b/src/gcc/config/arm/arm.md

4878

@@ -1177,9 +1177,9 @@

4879

4880

; ??? Check Thumb-2 split length

4881

(define_insn_and_split "*arm_subsi3_insn"

4882

- [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r")

4883

- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n")

4884

- (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))]

4885

+ [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r,r,r,rk,r")

4886

+ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,I,r,r,k ,?n")

4887

+ (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r,I,r,r ,r")))]

4888

"TARGET_32BIT"

4889

"@

4890

sub%?\\t%0, %1, %2

4891

@@ -2768,6 +2768,55 @@

4892

(const_string "logic_shift_reg")))]

4893

)

4894

4895

+;; Shifted bics pattern used to set up CC status register and not reusing

4896

+;; bics output. Pattern restricts Thumb2 shift operand as bics for Thumb2

4897

+;; does not support shift by register.

4898

+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"

4899

+ [(set (reg:CC_NOOV CC_REGNUM)

4900

+ (compare:CC_NOOV

4901

+ (and:SI (not:SI (match_operator:SI 0 "shift_operator"

4902

+ [(match_operand:SI 1 "s_register_operand" "r")

4903

+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))

4904

+ (match_operand:SI 3 "s_register_operand" "r"))

4905

+ (const_int 0)))

4906

+ (clobber (match_scratch:SI 4 "=r"))]

4907

+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"

4908

+ "bic%.%?\\t%4, %3, %1%S0"

4909

+ [(set_attr "predicable" "yes")

4910

+ (set_attr "predicable_short_it" "no")

4911

+ (set_attr "conds" "set")

4912

+ (set_attr "shift" "1")

4913

+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")

4914

+ (const_string "logic_shift_imm")

4915

+ (const_string "logic_shift_reg")))]

4916

+)

4917

+

4918

+;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also

4919

+;; getting reused later.

4920

+(define_insn "andsi_not_shiftsi_si_scc"

4921

+ [(parallel [(set (reg:CC_NOOV CC_REGNUM)

4922

+ (compare:CC_NOOV

4923

+ (and:SI (not:SI (match_operator:SI 0 "shift_operator"

4924

+ [(match_operand:SI 1 "s_register_operand" "r")

4925

+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))

4926

+ (match_operand:SI 3 "s_register_operand" "r"))

4927

+ (const_int 0)))

4928

+ (set (match_operand:SI 4 "s_register_operand" "=r")

4929

+ (and:SI (not:SI (match_op_dup 0

4930

+ [(match_dup 1)

4931

+ (match_dup 2)]))

4932

+ (match_dup 3)))])]

4933

+ "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"

4934

+ "bic%.%?\\t%4, %3, %1%S0"

4935

+ [(set_attr "predicable" "yes")

4936

+ (set_attr "predicable_short_it" "no")

4937

+ (set_attr "conds" "set")

4938

+ (set_attr "shift" "1")

4939

+ (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")

4940

+ (const_string "logic_shift_imm")

4941

+ (const_string "logic_shift_reg")))]

4942

+)

4943

+

4944

(define_insn "*andsi_notsi_si_compare0"

4945

[(set (reg:CC_NOOV CC_REGNUM)

4946

(compare:CC_NOOV

4947

@@ -5076,7 +5125,7 @@

4948

4949

(define_split

4950

[(set (match_operand:SI 0 "s_register_operand" "")

4951

- (ior_xor:SI (and:SI (ashift:SI

4952

+ (IOR_XOR:SI (and:SI (ashift:SI

4953

(match_operand:SI 1 "s_register_operand" "")

4954

(match_operand:SI 2 "const_int_operand" ""))

4955

(match_operand:SI 3 "const_int_operand" ""))

4956

@@ -5088,7 +5137,7 @@

4957

== (GET_MODE_MASK (GET_MODE (operands[5]))

4958

& (GET_MODE_MASK (GET_MODE (operands[5]))

4959

<< (INTVAL (operands[2])))))"

4960

- [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))

4961

+ [(set (match_dup 0) (IOR_XOR:SI (ashift:SI (match_dup 1) (match_dup 2))

4962

(match_dup 4)))

4963

(set (match_dup 0) (zero_extend:SI (match_dup 5)))]

4964

"operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"

4965

@@ -5667,7 +5716,7 @@

4966

[(set_attr "predicable" "yes")

4967

(set_attr "predicable_short_it" "no")

4968

(set_attr "length" "4")

4969

- (set_attr "type" "mov_imm")]

4970

+ (set_attr "type" "alu_sreg")]

4971

)

4972

4973

(define_insn "*arm_movsi_insn"

4974

@@ -6712,7 +6761,7 @@

4975

4976

/* Support only fixed point registers. */

4977

if (!CONST_INT_P (operands[2])

4978

- || INTVAL (operands[2]) > 14

4979

+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS

4980

|| INTVAL (operands[2]) < 2

4981

|| !MEM_P (operands[1])

4982

|| !REG_P (operands[0])

4983

@@ -6737,7 +6786,7 @@

4984

4985

/* Support only fixed point registers. */

4986

if (!CONST_INT_P (operands[2])

4987

- || INTVAL (operands[2]) > 14

4988

+ || INTVAL (operands[2]) > MAX_LDM_STM_OPS

4989

|| INTVAL (operands[2]) < 2

4990

|| !REG_P (operands[1])

4991

|| !MEM_P (operands[0])

4992

@@ -6922,7 +6971,7 @@

4993

[(set_attr "conds" "set")

4994

(set_attr "shift" "1")

4995

(set_attr "arch" "32,a,a")

4996

- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])

4997

+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])

4998

4999

(define_insn "*cmpsi_shiftsi_swp"

5000

[(set (reg:CC_SWP CC_REGNUM)

5001

@@ -6935,7 +6984,7 @@

5002

[(set_attr "conds" "set")

5003

(set_attr "shift" "1")

5004

(set_attr "arch" "32,a,a")

5005

- (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")])

5006

+ (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")])

5007

5008

(define_insn "*arm_cmpsi_negshiftsi_si"

5009

[(set (reg:CC_Z CC_REGNUM)

5010

@@ -7528,10 +7577,10 @@

5011

(const_string "mov_imm")

5012

(const_string "mov_reg"))

5013

(const_string "mvn_imm")

5014

- (const_string "mov_reg")

5015

- (const_string "mov_reg")

5016

- (const_string "mov_reg")

5017

- (const_string "mov_reg")])]

5018

+ (const_string "multiple")

5019

+ (const_string "multiple")

5020

+ (const_string "multiple")

5021

+ (const_string "multiple")])]

5022

)

5023

5024

(define_insn "*movsfcc_soft_insn"

5025

@@ -7884,7 +7933,7 @@

5026

)

5027

5028

(define_expand "<return_str>return"

5029

- [(returns)]

5030

+ [(RETURNS)]

5031

"(TARGET_ARM || (TARGET_THUMB2

5032

&& ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL

5033

&& !IS_STACKALIGN (arm_current_func_type ())))

5034

@@ -7922,7 +7971,7 @@

5035

[(set (pc)

5036

(if_then_else (match_operator 0 "arm_comparison_operator"

5037

[(match_operand 1 "cc_register" "") (const_int 0)])

5038

- (returns)

5039

+ (RETURNS)

5040

(pc)))]

5041

"TARGET_ARM <return_cond_true>"

5042

"*

5043

@@ -7945,7 +7994,7 @@

5044

(if_then_else (match_operator 0 "arm_comparison_operator"

5045

[(match_operand 1 "cc_register" "") (const_int 0)])

5046

(pc)

5047

- (returns)))]

5048

+ (RETURNS)))]

5049

"TARGET_ARM <return_cond_true>"

5050

"*

5051

{

5052

@@ -8279,7 +8328,7 @@

5053

5054

(define_insn "*<arith_shift_insn>_multsi"

5055

[(set (match_operand:SI 0 "s_register_operand" "=r,r")

5056

- (shiftable_ops:SI

5057

+ (SHIFTABLE_OPS:SI

5058

(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")

5059

(match_operand:SI 3 "power_of_two_operand" ""))

5060

(match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]

5061

@@ -8293,7 +8342,7 @@

5062

5063

(define_insn "*<arith_shift_insn>_shiftsi"

5064

[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")

5065

- (shiftable_ops:SI

5066

+ (SHIFTABLE_OPS:SI

5067

(match_operator:SI 2 "shift_nomul_operator"

5068

[(match_operand:SI 3 "s_register_operand" "r,r,r")

5069

(match_operand:SI 4 "shift_amount_operand" "M,M,r")])

5070

@@ -8689,7 +8738,14 @@

5071

return \"\";

5072

"

5073

[(set_attr "conds" "use")

5074

- (set_attr "type" "mov_reg,mov_reg,multiple")

5075

+ (set_attr_alternative "type"

5076

+ [(if_then_else (match_operand 2 "const_int_operand" "")

5077

+ (const_string "mov_imm")

5078

+ (const_string "mov_reg"))

5079

+ (if_then_else (match_operand 1 "const_int_operand" "")

5080

+ (const_string "mov_imm")

5081

+ (const_string "mov_reg"))

5082

+ (const_string "multiple")])

5083

(set_attr "length" "4,4,8")]

5084

)

5085

5086

@@ -9485,8 +9541,8 @@

5087

(const_string "alu_imm" )

5088

(const_string "alu_sreg"))

5089

(const_string "alu_imm")

5090

- (const_string "alu_sreg")

5091

- (const_string "alu_sreg")])]

5092

+ (const_string "multiple")

5093

+ (const_string "multiple")])]

5094

)

5095

5096

(define_insn "*ifcompare_move_plus"

5097

@@ -9523,7 +9579,13 @@

5098

sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"

5099

[(set_attr "conds" "use")

5100

(set_attr "length" "4,4,8,8")

5101

- (set_attr "type" "alu_sreg,alu_imm,multiple,multiple")]

5102

+ (set_attr_alternative "type"

5103

+ [(if_then_else (match_operand 3 "const_int_operand" "")

5104

+ (const_string "alu_imm" )

5105

+ (const_string "alu_sreg"))

5106

+ (const_string "alu_imm")

5107

+ (const_string "multiple")

5108

+ (const_string "multiple")])]

5109

)

5110

5111

(define_insn "*ifcompare_arith_arith"

5112

@@ -9618,7 +9680,11 @@

5113

%I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"

5114

[(set_attr "conds" "use")

5115

(set_attr "length" "4,8")

5116

- (set_attr "type" "alu_shift_reg,multiple")]

5117

+ (set_attr_alternative "type"

5118

+ [(if_then_else (match_operand 3 "const_int_operand" "")

5119

+ (const_string "alu_shift_imm" )

5120

+ (const_string "alu_shift_reg"))

5121

+ (const_string "multiple")])]

5122

)

5123

5124

(define_insn "*ifcompare_move_arith"

5125

@@ -9679,7 +9745,11 @@

5126

%I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"

5127

[(set_attr "conds" "use")

5128

(set_attr "length" "4,8")

5129

- (set_attr "type" "alu_shift_reg,multiple")]

5130

+ (set_attr_alternative "type"

5131

+ [(if_then_else (match_operand 3 "const_int_operand" "")

5132

+ (const_string "alu_shift_imm" )

5133

+ (const_string "alu_shift_reg"))

5134

+ (const_string "multiple")])]

5135

)

5136

5137

(define_insn "*ifcompare_move_not"

5138

@@ -9786,7 +9856,12 @@

5139

[(set_attr "conds" "use")

5140

(set_attr "shift" "2")

5141

(set_attr "length" "4,8,8")

5142

- (set_attr "type" "mov_shift_reg,multiple,multiple")]

5143

+ (set_attr_alternative "type"

5144

+ [(if_then_else (match_operand 3 "const_int_operand" "")

5145

+ (const_string "mov_shift" )

5146

+ (const_string "mov_shift_reg"))

5147

+ (const_string "multiple")

5148

+ (const_string "multiple")])]

5149

)

5150

5151

(define_insn "*ifcompare_move_shift"

5152

@@ -9824,7 +9899,12 @@

5153

[(set_attr "conds" "use")

5154

(set_attr "shift" "2")

5155

(set_attr "length" "4,8,8")

5156

- (set_attr "type" "mov_shift_reg,multiple,multiple")]

5157

+ (set_attr_alternative "type"

5158

+ [(if_then_else (match_operand 3 "const_int_operand" "")

5159

+ (const_string "mov_shift" )

5160

+ (const_string "mov_shift_reg"))

5161

+ (const_string "multiple")

5162

+ (const_string "multiple")])]

5163

)

5164

5165

(define_insn "*ifcompare_shift_shift"

5166

@@ -10905,7 +10985,7 @@

5167

[(set_attr "predicable" "yes")

5168

(set_attr "predicable_short_it" "no")

5169

(set_attr "length" "4")

5170

- (set_attr "type" "mov_imm")]

5171

+ (set_attr "type" "alu_sreg")]

5172

)

5173

5174

(define_insn "*arm_rev"

5175

--- a/src/gcc/config/arm/iterators.md

5176

+++ b/src/gcc/config/arm/iterators.md

5177

@@ -181,39 +181,53 @@

5178

;; compare a second time.

5179

(define_code_iterator LTUGEU [ltu geu])

5180

5181

+;; The signed gt, ge comparisons

5182

+(define_code_iterator GTGE [gt ge])

5183

+

5184

+;; The unsigned gt, ge comparisons

5185

+(define_code_iterator GTUGEU [gtu geu])

5186

+

5187

+;; Comparisons for vc<cmp>

5188

+(define_code_iterator COMPARISONS [eq gt ge le lt])

5189

+

5190

;; A list of ...

5191

-(define_code_iterator ior_xor [ior xor])

5192

+(define_code_iterator IOR_XOR [ior xor])

5193

5194

;; Operations on two halves of a quadword vector.

5195

-(define_code_iterator vqh_ops [plus smin smax umin umax])

5196

+(define_code_iterator VQH_OPS [plus smin smax umin umax])

5197

5198

;; Operations on two halves of a quadword vector,

5199

;; without unsigned variants (for use with *SFmode pattern).

5200

-(define_code_iterator vqhs_ops [plus smin smax])

5201

+(define_code_iterator VQHS_OPS [plus smin smax])

5202

5203

;; A list of widening operators

5204

(define_code_iterator SE [sign_extend zero_extend])

5205

5206

;; Right shifts

5207

-(define_code_iterator rshifts [ashiftrt lshiftrt])

5208

+(define_code_iterator RSHIFTS [ashiftrt lshiftrt])

5209

5210

;; Iterator for integer conversions

5211

(define_code_iterator FIXUORS [fix unsigned_fix])

5212

5213

;; Binary operators whose second operand can be shifted.

5214

-(define_code_iterator shiftable_ops [plus minus ior xor and])

5215

+(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])

5216

5217

-;; plus and minus are the only shiftable_ops for which Thumb2 allows

5218

+;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows

5219

;; a stack pointer opoerand. The minus operation is a candidate for an rsub

5220

;; and hence only plus is supported.

5221

(define_code_attr t2_binop0

5222

[(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])

5223

5224

-;; The instruction to use when a shiftable_ops has a shift operation as

5225

+;; The instruction to use when a SHIFTABLE_OPS has a shift operation as

5226

;; its first operand.

5227

(define_code_attr arith_shift_insn

5228

[(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])

5229

5230

+(define_code_attr cmp_op [(eq "eq") (gt "gt") (ge "ge") (lt "lt") (le "le")

5231

+ (gtu "gt") (geu "ge")])

5232

+

5233

+(define_code_attr cmp_type [(eq "i") (gt "s") (ge "s") (lt "s") (le "s")])

5234

+

5235

;;----------------------------------------------------------------------------

5236

;; Int iterators

5237

;;----------------------------------------------------------------------------

5238

@@ -221,6 +235,10 @@

5239

(define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM

5240

UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])

5241

5242

+(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE])

5243

+

5244

+(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT])

5245

+

5246

(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])

5247

5248

(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM

5249

@@ -677,6 +695,11 @@

5250

5251

])

5252

5253

+(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")

5254

+ (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")

5255

+ (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")

5256

+ (UNSPEC_VCAGT "gt")])

5257

+

5258

(define_int_attr r [

5259

(UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r")

5260

(UNSPEC_VHADD_S "") (UNSPEC_VHADD_U "")

5261

@@ -774,7 +797,7 @@

5262

(UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])

5263

5264

;; Both kinds of return insn.

5265

-(define_code_iterator returns [return simple_return])

5266

+(define_code_iterator RETURNS [return simple_return])

5267

(define_code_attr return_str [(return "") (simple_return "simple_")])

5268

(define_code_attr return_simple_p [(return "false") (simple_return "true")])

5269

(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")

5270

--- a/src/gcc/config/arm/iwmmxt.md

5271

+++ b/src/gcc/config/arm/iwmmxt.md

5272

@@ -107,8 +107,8 @@

5273

)

5274

5275

(define_insn "*iwmmxt_arm_movdi"

5276

- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv")

5277

- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))]

5278

+ [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")

5279

+ (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,r,y,Uy,y, r,*w,*w,*Uvi,*w"))]

5280

"TARGET_REALLY_IWMMXT

5281

&& ( register_operand (operands[0], DImode)

5282

|| register_operand (operands[1], DImode))"

5283

--- a/src/gcc/config/arm/linux-eabi.h

5284

+++ b/src/gcc/config/arm/linux-eabi.h

5285

@@ -77,6 +77,23 @@

5286

%{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \

5287

%{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"

5288

5289

+/* For ARM musl currently supports four dynamic linkers:

5290

+ - ld-musl-arm.so.1 - for the EABI-derived soft-float ABI

5291

+ - ld-musl-armhf.so.1 - for the EABI-derived hard-float ABI

5292

+ - ld-musl-armeb.so.1 - for the EABI-derived soft-float ABI, EB

5293

+ - ld-musl-armebhf.so.1 - for the EABI-derived hard-float ABI, EB

5294

+ musl does not support the legacy OABI mode.

5295

+ All the dynamic linkers live in /lib.

5296

+ We default to soft-float, EL. */

5297

+#undef MUSL_DYNAMIC_LINKER

5298

+#if TARGET_BIG_ENDIAN_DEFAULT

5299

+#define MUSL_DYNAMIC_LINKER_E "%{mlittle-endian:;:eb}"

5300

+#else

5301

+#define MUSL_DYNAMIC_LINKER_E "%{mbig-endian:eb}"

5302

+#endif

5303

+#define MUSL_DYNAMIC_LINKER \

5304

+ "/lib/ld-musl-arm" MUSL_DYNAMIC_LINKER_E "%{mfloat-abi=hard:hf}.so.1"

5305

+

5306

/* At this point, bpabi.h will have clobbered LINK_SPEC. We want to

5307

use the GNU/Linux version, not the generic BPABI version. */

5308

#undef LINK_SPEC

5309

@@ -107,6 +124,7 @@

5310

5311

#undef ENDFILE_SPEC

5312

#define ENDFILE_SPEC \

5313

+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \

5314

LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)

5315

5316

/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we

5317

--- a/src/gcc/config/arm/neon.md

5318

+++ b/src/gcc/config/arm/neon.md

5319

@@ -1114,7 +1114,7 @@

5320

;; lshrdi3_neon

5321

(define_insn_and_split "<shift>di3_neon"

5322

[(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")

5323

- (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")

5324

+ (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")

5325

(match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))

5326

(clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))

5327

(clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))

5328

@@ -1194,71 +1194,6 @@

5329

[(set_attr "type" "neon_add_widen")]

5330

)

5331

5332

-;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit

5333

-;; shift-count granularity. That's good enough for the middle-end's current

5334

-;; needs.

5335

-

5336

-;; Note that it's not safe to perform such an operation in big-endian mode,

5337

-;; due to element-ordering issues.

5338

-

5339

-(define_expand "vec_shr_<mode>"

5340

- [(match_operand:VDQ 0 "s_register_operand" "")

5341

- (match_operand:VDQ 1 "s_register_operand" "")

5342

- (match_operand:SI 2 "const_multiple_of_8_operand" "")]

5343

- "TARGET_NEON && !BYTES_BIG_ENDIAN"

5344

-{

5345

- rtx zero_reg;

5346

- HOST_WIDE_INT num_bits = INTVAL (operands[2]);

5347

- const int width = GET_MODE_BITSIZE (<MODE>mode);

5348

- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;

5349

- rtx (*gen_ext) (rtx, rtx, rtx, rtx) =

5350

- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;

5351

-

5352

- if (num_bits == width)

5353

- {

5354

- emit_move_insn (operands[0], operands[1]);

5355

- DONE;

5356

- }

5357

-

5358

- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));

5359

- operands[0] = gen_lowpart (bvecmode, operands[0]);

5360

- operands[1] = gen_lowpart (bvecmode, operands[1]);

5361

-

5362

- emit_insn (gen_ext (operands[0], operands[1], zero_reg,

5363

- GEN_INT (num_bits / BITS_PER_UNIT)));

5364

- DONE;

5365

-})

5366

-

5367

-(define_expand "vec_shl_<mode>"

5368

- [(match_operand:VDQ 0 "s_register_operand" "")

5369

- (match_operand:VDQ 1 "s_register_operand" "")

5370

- (match_operand:SI 2 "const_multiple_of_8_operand" "")]

5371

- "TARGET_NEON && !BYTES_BIG_ENDIAN"

5372

-{

5373

- rtx zero_reg;

5374

- HOST_WIDE_INT num_bits = INTVAL (operands[2]);

5375

- const int width = GET_MODE_BITSIZE (<MODE>mode);

5376

- const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;

5377

- rtx (*gen_ext) (rtx, rtx, rtx, rtx) =

5378

- (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;

5379

-

5380

- if (num_bits == 0)

5381

- {

5382

- emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));

5383

- DONE;

5384

- }

5385

-

5386

- num_bits = width - num_bits;

5387

-

5388

- zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));

5389

- operands[0] = gen_lowpart (bvecmode, operands[0]);

5390

- operands[1] = gen_lowpart (bvecmode, operands[1]);

5391

-

5392

- emit_insn (gen_ext (operands[0], zero_reg, operands[1],

5393

- GEN_INT (num_bits / BITS_PER_UNIT)));

5394

- DONE;

5395

-})

5396

-

5397

;; Helpers for quad-word reduction operations

5398

5399

; Add (or smin, smax...) the low N/2 elements of the N-element vector

5400

@@ -1267,7 +1202,7 @@

5401

5402

(define_insn "quad_halves_<code>v4si"

5403

[(set (match_operand:V2SI 0 "s_register_operand" "=w")

5404

- (vqh_ops:V2SI

5405

+ (VQH_OPS:V2SI

5406

(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")

5407

(parallel [(const_int 0) (const_int 1)]))

5408

(vec_select:V2SI (match_dup 1)

5409

@@ -1280,7 +1215,7 @@

5410

5411

(define_insn "quad_halves_<code>v4sf"

5412

[(set (match_operand:V2SF 0 "s_register_operand" "=w")

5413

- (vqhs_ops:V2SF

5414

+ (VQHS_OPS:V2SF

5415

(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")

5416

(parallel [(const_int 0) (const_int 1)]))

5417

(vec_select:V2SF (match_dup 1)

5418

@@ -1293,7 +1228,7 @@

5419

5420

(define_insn "quad_halves_<code>v8hi"

5421

[(set (match_operand:V4HI 0 "s_register_operand" "+w")

5422

- (vqh_ops:V4HI

5423

+ (VQH_OPS:V4HI

5424

(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")

5425

(parallel [(const_int 0) (const_int 1)

5426

(const_int 2) (const_int 3)]))

5427

@@ -1308,7 +1243,7 @@

5428

5429

(define_insn "quad_halves_<code>v16qi"

5430

[(set (match_operand:V8QI 0 "s_register_operand" "+w")

5431

- (vqh_ops:V8QI

5432

+ (VQH_OPS:V8QI

5433

(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")

5434

(parallel [(const_int 0) (const_int 1)

5435

(const_int 2) (const_int 3)

5436

@@ -2200,134 +2135,140 @@

5437

[(set_attr "type" "neon_sub_halve_narrow_q")]

5438

)

5439

5440

-(define_insn "neon_vceq<mode>"

5441

- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")

5442

- (unspec:<V_cmp_result>

5443

- [(match_operand:VDQW 1 "s_register_operand" "w,w")

5444

- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]

5445

- UNSPEC_VCEQ))]

5446

+;; These may expand to an UNSPEC pattern when a floating point mode is used

5447

+;; without unsafe math optimizations.

5448

+(define_expand "neon_vc<cmp_op><mode>"

5449

+ [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")

5450

+ (neg:<V_cmp_result>

5451

+ (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")

5452

+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]

5453

"TARGET_NEON"

5454

- "@

5455

- vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2

5456

- vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"

5457

- [(set (attr "type")

5458

- (if_then_else (match_test "<Is_float_mode>")

5459

- (const_string "neon_fp_compare_s<q>")

5460

- (if_then_else (match_operand 2 "zero_operand")

5461

- (const_string "neon_compare_zero<q>")

5462

- (const_string "neon_compare<q>"))))]

5463

+ {

5464

+ /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations

5465

+ are enabled. */

5466

+ if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT

5467

+ && !flag_unsafe_math_optimizations)

5468

+ {

5469

+ /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because

5470

+ we define gen_neon_vceq<mode>_insn_unspec only for float modes

5471

+ whereas this expander iterates over the integer modes as well,

5472

+ but we will never expand to UNSPECs for the integer comparisons. */

5473

+ switch (<MODE>mode)

5474

+ {

5475

+ case V2SFmode:

5476

+ emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],

5477

+ operands[1],

5478

+ operands[2]));

5479

+ break;

5480

+ case V4SFmode:

5481

+ emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],

5482

+ operands[1],

5483

+ operands[2]));

5484

+ break;

5485

+ default:

5486

+ gcc_unreachable ();

5487

+ }

5488

+ }

5489

+ else

5490

+ emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],

5491

+ operands[1],

5492

+ operands[2]));

5493

+ DONE;

5494

+ }

5495

)

5496

5497

-(define_insn "neon_vcge<mode>"

5498

+(define_insn "neon_vc<cmp_op><mode>_insn"

5499

[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")

5500

- (unspec:<V_cmp_result>

5501

- [(match_operand:VDQW 1 "s_register_operand" "w,w")

5502

- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]

5503

- UNSPEC_VCGE))]

5504

- "TARGET_NEON"

5505

- "@

5506

- vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2

5507

- vcge.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"

5508

+ (neg:<V_cmp_result>

5509

+ (COMPARISONS:<V_cmp_result>

5510

+ (match_operand:VDQW 1 "s_register_operand" "w,w")

5511

+ (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]

5512

+ "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT

5513

+ && !flag_unsafe_math_optimizations)"

5514

+ {

5515

+ char pattern[100];

5516

+ sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"

5517

+ " %%<V_reg>1, %s",

5518

+ GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT

5519

+ ? "f" : "<cmp_type>",

5520

+ which_alternative == 0

5521

+ ? "%<V_reg>2" : "#0");

5522

+ output_asm_insn (pattern, operands);

5523

+ return "";

5524

+ }

5525

[(set (attr "type")

5526

- (if_then_else (match_test "<Is_float_mode>")

5527

- (const_string "neon_fp_compare_s<q>")

5528

- (if_then_else (match_operand 2 "zero_operand")

5529

+ (if_then_else (match_operand 2 "zero_operand")

5530

(const_string "neon_compare_zero<q>")

5531

- (const_string "neon_compare<q>"))))]

5532

-)

5533

-

5534

-(define_insn "neon_vcgeu<mode>"

5535

- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5536

- (unspec:<V_cmp_result>

5537

- [(match_operand:VDQIW 1 "s_register_operand" "w")

5538

- (match_operand:VDQIW 2 "s_register_operand" "w")]

5539

- UNSPEC_VCGEU))]

5540

- "TARGET_NEON"

5541

- "vcge.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5542

- [(set_attr "type" "neon_compare<q>")]

5543

+ (const_string "neon_compare<q>")))]

5544

)

5545

5546

-(define_insn "neon_vcgt<mode>"

5547

+(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"

5548

[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")

5549

(unspec:<V_cmp_result>

5550

- [(match_operand:VDQW 1 "s_register_operand" "w,w")

5551

- (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")]

5552

- UNSPEC_VCGT))]

5553

+ [(match_operand:VCVTF 1 "s_register_operand" "w,w")

5554

+ (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]

5555

+ NEON_VCMP))]

5556

"TARGET_NEON"

5557

- "@

5558

- vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2

5559

- vcgt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"

5560

- [(set (attr "type")

5561

- (if_then_else (match_test "<Is_float_mode>")

5562

- (const_string "neon_fp_compare_s<q>")

5563

- (if_then_else (match_operand 2 "zero_operand")

5564

- (const_string "neon_compare_zero<q>")

5565

- (const_string "neon_compare<q>"))))]

5566

+ {

5567

+ char pattern[100];

5568

+ sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"

5569

+ " %%<V_reg>1, %s",

5570

+ which_alternative == 0

5571

+ ? "%<V_reg>2" : "#0");

5572

+ output_asm_insn (pattern, operands);

5573

+ return "";

5574

+}

5575

+ [(set_attr "type" "neon_fp_compare_s<q>")]

5576

)

5577

5578

-(define_insn "neon_vcgtu<mode>"

5579

+(define_insn "neon_vc<cmp_op>u<mode>"

5580

[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5581

- (unspec:<V_cmp_result>

5582

- [(match_operand:VDQIW 1 "s_register_operand" "w")

5583

- (match_operand:VDQIW 2 "s_register_operand" "w")]

5584

- UNSPEC_VCGTU))]

5585

+ (neg:<V_cmp_result>

5586

+ (GTUGEU:<V_cmp_result>

5587

+ (match_operand:VDQIW 1 "s_register_operand" "w")

5588

+ (match_operand:VDQIW 2 "s_register_operand" "w"))))]

5589

"TARGET_NEON"

5590

- "vcgt.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5591

+ "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5592

[(set_attr "type" "neon_compare<q>")]

5593

)

5594

5595

-;; VCLE and VCLT only support comparisons with immediate zero (register

5596

-;; variants are VCGE and VCGT with operands reversed).

5597

-

5598

-(define_insn "neon_vcle<mode>"

5599

- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5600

- (unspec:<V_cmp_result>

5601

- [(match_operand:VDQW 1 "s_register_operand" "w")

5602

- (match_operand:VDQW 2 "zero_operand" "Dz")]

5603

- UNSPEC_VCLE))]

5604

- "TARGET_NEON"

5605

- "vcle.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"

5606

- [(set (attr "type")

5607

- (if_then_else (match_test "<Is_float_mode>")

5608

- (const_string "neon_fp_compare_s<q>")

5609

- (if_then_else (match_operand 2 "zero_operand")

5610

- (const_string "neon_compare_zero<q>")

5611

- (const_string "neon_compare<q>"))))]

5612

-)

5613

-

5614

-(define_insn "neon_vclt<mode>"

5615

- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5616

- (unspec:<V_cmp_result>

5617

- [(match_operand:VDQW 1 "s_register_operand" "w")

5618

- (match_operand:VDQW 2 "zero_operand" "Dz")]

5619

- UNSPEC_VCLT))]

5620

+(define_expand "neon_vca<cmp_op><mode>"

5621

+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")

5622

+ (neg:<V_cmp_result>

5623

+ (GTGE:<V_cmp_result>

5624

+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))

5625

+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]

5626

"TARGET_NEON"

5627

- "vclt.<V_s_elem>\t%<V_reg>0, %<V_reg>1, #0"

5628

- [(set (attr "type")

5629

- (if_then_else (match_test "<Is_float_mode>")

5630

- (const_string "neon_fp_compare_s<q>")

5631

- (if_then_else (match_operand 2 "zero_operand")

5632

- (const_string "neon_compare_zero<q>")

5633

- (const_string "neon_compare<q>"))))]

5634

+ {

5635

+ if (flag_unsafe_math_optimizations)

5636

+ emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],

5637

+ operands[2]));

5638

+ else

5639

+ emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],

5640

+ operands[1],

5641

+ operands[2]));

5642

+ DONE;

5643

+ }

5644

)

5645

5646

-(define_insn "neon_vcage<mode>"

5647

+(define_insn "neon_vca<cmp_op><mode>_insn"

5648

[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5649

- (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")

5650

- (match_operand:VCVTF 2 "s_register_operand" "w")]

5651

- UNSPEC_VCAGE))]

5652

- "TARGET_NEON"

5653

- "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5654

+ (neg:<V_cmp_result>

5655

+ (GTGE:<V_cmp_result>

5656

+ (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))

5657

+ (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]

5658

+ "TARGET_NEON && flag_unsafe_math_optimizations"

5659

+ "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5660

[(set_attr "type" "neon_fp_compare_s<q>")]

5661

)

5662

5663

-(define_insn "neon_vcagt<mode>"

5664

+(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"

5665

[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")

5666

(unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")

5667

(match_operand:VCVTF 2 "s_register_operand" "w")]

5668

- UNSPEC_VCAGT))]

5669

+ NEON_VACMP))]

5670

"TARGET_NEON"

5671

- "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5672

+ "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"

5673

[(set_attr "type" "neon_fp_compare_s<q>")]

5674

)

5675

5676

--- a/src/gcc/config/arm/thumb2.md

5677

+++ b/src/gcc/config/arm/thumb2.md

5678

@@ -300,7 +300,7 @@

5679

ldr%?\\t%0, %1

5680

str%?\\t%1, %0

5681

str%?\\t%1, %0"

5682

- [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1")

5683

+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load1,load1,store1,store1")

5684

(set_attr "length" "2,4,2,4,4,4,4,4,4")

5685

(set_attr "predicable" "yes")

5686

(set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")

5687

@@ -486,12 +486,12 @@

5688

)

5689

5690

(define_insn_and_split "*thumb2_movsicc_insn"

5691

- [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r")

5692

+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,r")

5693

(if_then_else:SI

5694

(match_operator 3 "arm_comparison_operator"

5695

[(match_operand 4 "cc_register" "") (const_int 0)])

5696

- (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r")

5697

- (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))]

5698

+ (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,I ,r,rI,K ,K,r")

5699

+ (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,I,K ,rI,K,r")))]

5700

"TARGET_THUMB2"

5701

"@

5702

it\\t%D3\;mov%D3\\t%0, %2

5703

@@ -504,12 +504,14 @@

5704

#

5705

#

5706

#

5707

+ #

5708

#"

5709

; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2

5710

- ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2

5711

- ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2

5712

- ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2

5713

- ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2

5714

+ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2

5715

+ ; alt 8: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2

5716

+ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2

5717

+ ; alt 10: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2

5718

+ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2

5719

"&& reload_completed"

5720

[(const_int 0)]

5721

{

5722

@@ -540,10 +542,30 @@

5723

operands[2])));

5724

DONE;

5725

}

5726

- [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6")

5727

- (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes")

5728

+ [(set_attr "length" "4,4,6,6,6,6,10,8,10,10,10,6")

5729

+ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,no,yes")

5730

(set_attr "conds" "use")

5731

- (set_attr "type" "multiple")]

5732

+ (set_attr_alternative "type"

5733

+ [(if_then_else (match_operand 2 "const_int_operand" "")

5734

+ (const_string "mov_imm")

5735

+ (const_string "mov_reg"))

5736

+ (if_then_else (match_operand 1 "const_int_operand" "")

5737

+ (const_string "mov_imm")

5738

+ (const_string "mov_reg"))

5739

+ (if_then_else (match_operand 2 "const_int_operand" "")

5740

+ (const_string "mov_imm")

5741

+ (const_string "mov_reg"))

5742

+ (const_string "mvn_imm")

5743

+ (if_then_else (match_operand 1 "const_int_operand" "")

5744

+ (const_string "mov_imm")

5745

+ (const_string "mov_reg"))

5746

+ (const_string "mvn_imm")

5747

+ (const_string "multiple")

5748

+ (const_string "multiple")

5749

+ (const_string "multiple")

5750

+ (const_string "multiple")

5751

+ (const_string "multiple")

5752

+ (const_string "multiple")])]

5753

)

5754

5755

(define_insn "*thumb2_movsfcc_soft_insn"

5756

@@ -1182,7 +1204,11 @@

5757

"

5758

[(set_attr "predicable" "yes")

5759

(set_attr "length" "2")

5760

- (set_attr "type" "alu_sreg")]

5761

+ (set_attr_alternative "type"

5762

+ [(if_then_else (match_operand 2 "const_int_operand" "")

5763

+ (const_string "alu_imm")

5764

+ (const_string "alu_sreg"))

5765

+ (const_string "alu_imm")])]

5766

)

5767

5768

(define_insn "*thumb2_subsi_short"

5769

@@ -1247,14 +1273,21 @@

5770

"

5771

[(set_attr "conds" "set")

5772

(set_attr "length" "2,2,4")

5773

- (set_attr "type" "alu_sreg")]

5774

+ (set_attr_alternative "type"

5775

+ [(if_then_else (match_operand 2 "const_int_operand" "")

5776

+ (const_string "alus_imm")

5777

+ (const_string "alus_sreg"))

5778

+ (const_string "alus_imm")

5779

+ (if_then_else (match_operand 2 "const_int_operand" "")

5780

+ (const_string "alus_imm")

5781

+ (const_string "alus_sreg"))])]

5782

)

5783

5784

(define_insn "*thumb2_addsi3_compare0_scratch"

5785

[(set (reg:CC_NOOV CC_REGNUM)

5786

(compare:CC_NOOV

5787

- (plus:SI (match_operand:SI 0 "s_register_operand" "l,l, r,r")

5788

- (match_operand:SI 1 "arm_add_operand" "Pv,l,IL,r"))

5789

+ (plus:SI (match_operand:SI 0 "s_register_operand" "l, r")

5790

+ (match_operand:SI 1 "arm_add_operand" "lPv,rIL"))

5791

(const_int 0)))]

5792

"TARGET_THUMB2"

5793

"*

5794

@@ -1271,8 +1304,10 @@

5795

return \"cmn\\t%0, %1\";

5796

"

5797

[(set_attr "conds" "set")

5798

- (set_attr "length" "2,2,4,4")

5799

- (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_sreg")]

5800

+ (set_attr "length" "2,4")

5801

+ (set (attr "type") (if_then_else (match_operand 1 "const_int_operand" "")

5802

+ (const_string "alus_imm")

5803

+ (const_string "alus_sreg")))]

5804

)

5805

5806

(define_insn "*thumb2_mulsi_short"

5807

--- a/src/gcc/config/arm/unknown-elf.h

5808

+++ b/src/gcc/config/arm/unknown-elf.h

5809

@@ -32,7 +32,9 @@

5810

#define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"

5811

5812

#undef STARTFILE_SPEC

5813

-#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC

5814

+#define STARTFILE_SPEC \

5815

+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} " \

5816

+ UNKNOWN_ELF_STARTFILE_SPEC

5817

5818

#define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s"

5819

5820

@@ -80,7 +82,9 @@

5821

\

5822

ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \

5823

ASM_OUTPUT_LABEL (FILE, NAME); \

5824

- fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \

5825

+ fprintf (FILE, "\t.space\t%d\n", SIZE ? (int) SIZE : 1); \

5826

+ fprintf (FILE, "\t.size\t%s, %d\n", \

5827

+ NAME, SIZE ? (int) SIZE : 1); \

5828

} \

5829

while (0)

5830

5831

--- a/src/gcc/config/glibc-stdint.h

5832

+++ b/src/gcc/config/glibc-stdint.h

5833

@@ -22,6 +22,12 @@ a copy of the GCC Runtime Library Exception along with this program;

5834

see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

5835

<http://www.gnu.org/licenses/>. */

5836

5837

+/* Systems using musl libc should use this header and make sure

5838

+ OPTION_MUSL is defined correctly before using the TYPE macros. */

5839

+#ifndef OPTION_MUSL

5840

+#define OPTION_MUSL 0

5841

+#endif

5842

+

5843

#define SIG_ATOMIC_TYPE "int"

5844

5845

#define INT8_TYPE "signed char"

5846

@@ -43,12 +49,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

5847

#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")

5848

5849

#define INT_FAST8_TYPE "signed char"

5850

-#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")

5851

-#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")

5852

+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")

5853

+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long int" : "int")

5854

#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")

5855

#define UINT_FAST8_TYPE "unsigned char"

5856

-#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")

5857

-#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")

5858

+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")

5859

+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 && !OPTION_MUSL ? "long unsigned int" : "unsigned int")

5860

#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")

5861

5862

#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")

5863

--- a/src/gcc/config/linux.h

5864

+++ b/src/gcc/config/linux.h

5865

@@ -32,10 +32,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

5866

#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)

5867

#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)

5868

#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)

5869

+#undef OPTION_MUSL

5870

+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)

5871

#else

5872

#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)

5873

#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)

5874

#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)

5875

+#undef OPTION_MUSL

5876

+#define OPTION_MUSL (linux_libc == LIBC_MUSL)

5877

#endif

5878

5879

#define GNU_USER_TARGET_OS_CPP_BUILTINS() \

5880

@@ -50,21 +54,25 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

5881

} while (0)

5882

5883

/* Determine which dynamic linker to use depending on whether GLIBC or

5884

- uClibc or Bionic is the default C library and whether

5885

- -muclibc or -mglibc or -mbionic has been passed to change the default. */

5886

+ uClibc or Bionic or musl is the default C library and whether

5887

+ -muclibc or -mglibc or -mbionic or -mmusl has been passed to change

5888

+ the default. */

5889

5890

-#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3) \

5891

- "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"

5892

+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LIBC4, LD1, LD2, LD3, LD4) \

5893

+ "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:%{" LIBC4 ":" LD4 ";:" LD1 "}}}"

5894

5895

#if DEFAULT_LIBC == LIBC_GLIBC

5896

-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \

5897

- CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)

5898

+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \

5899

+ CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", "mmusl", G, U, B, M)

5900

#elif DEFAULT_LIBC == LIBC_UCLIBC

5901

-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \

5902

- CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)

5903

+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \

5904

+ CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", "mmusl", U, G, B, M)

5905

#elif DEFAULT_LIBC == LIBC_BIONIC

5906

-#define CHOOSE_DYNAMIC_LINKER(G, U, B) \

5907

- CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)

5908

+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \

5909

+ CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", "mmusl", B, G, U, M)

5910

+#elif DEFAULT_LIBC == LIBC_MUSL

5911

+#define CHOOSE_DYNAMIC_LINKER(G, U, B, M) \

5912

+ CHOOSE_DYNAMIC_LINKER1 ("mmusl", "mglibc", "muclibc", "mbionic", M, G, U, B)

5913

#else

5914

#error "Unsupported DEFAULT_LIBC"

5915

#endif /* DEFAULT_LIBC */

5916

@@ -81,24 +89,100 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

5917

#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"

5918

#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"

5919

#define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"

5920

+/* Should be redefined for each target that supports musl. */

5921

+#define MUSL_DYNAMIC_LINKER "/dev/null"

5922

+#define MUSL_DYNAMIC_LINKER32 "/dev/null"

5923

+#define MUSL_DYNAMIC_LINKER64 "/dev/null"

5924

+#define MUSL_DYNAMIC_LINKERX32 "/dev/null"

5925

5926

#define GNU_USER_DYNAMIC_LINKER \

5927

CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER, \

5928

- BIONIC_DYNAMIC_LINKER)

5929

+ BIONIC_DYNAMIC_LINKER, MUSL_DYNAMIC_LINKER)

5930

#define GNU_USER_DYNAMIC_LINKER32 \

5931

CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \

5932

- BIONIC_DYNAMIC_LINKER32)

5933

+ BIONIC_DYNAMIC_LINKER32, MUSL_DYNAMIC_LINKER32)

5934

#define GNU_USER_DYNAMIC_LINKER64 \

5935

CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \

5936

- BIONIC_DYNAMIC_LINKER64)

5937

+ BIONIC_DYNAMIC_LINKER64, MUSL_DYNAMIC_LINKER64)

5938

#define GNU_USER_DYNAMIC_LINKERX32 \

5939

CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERX32, UCLIBC_DYNAMIC_LINKERX32, \

5940

- BIONIC_DYNAMIC_LINKERX32)

5941

+ BIONIC_DYNAMIC_LINKERX32, MUSL_DYNAMIC_LINKERX32)

5942

5943

/* Whether we have Bionic libc runtime */

5944

#undef TARGET_HAS_BIONIC

5945

#define TARGET_HAS_BIONIC (OPTION_BIONIC)

5946

5947

+/* musl avoids problematic includes by rearranging the include directories.

5948

+ * Unfortunately, this is mostly duplicated from cppdefault.c */

5949

+#if DEFAULT_LIBC == LIBC_MUSL

5950

+#define INCLUDE_DEFAULTS_MUSL_GPP \

5951

+ { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, \

5952

+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 }, \

5953

+ { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, \

5954

+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 }, \

5955

+ { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, \

5956

+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },

5957

+

5958

+#ifdef LOCAL_INCLUDE_DIR

5959

+#define INCLUDE_DEFAULTS_MUSL_LOCAL \

5960

+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, \

5961

+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 },

5962

+#else

5963

+#define INCLUDE_DEFAULTS_MUSL_LOCAL

5964

+#endif

5965

+

5966

+#ifdef PREFIX_INCLUDE_DIR

5967

+#define INCLUDE_DEFAULTS_MUSL_PREFIX \

5968

+ { PREFIX_INCLUDE_DIR, 0, 0, 1, 0, 0},

5969

+#else

5970

+#define INCLUDE_DEFAULTS_MUSL_PREFIX

5971

+#endif

5972

+

5973

+#ifdef CROSS_INCLUDE_DIR

5974

+#define INCLUDE_DEFAULTS_MUSL_CROSS \

5975

+ { CROSS_INCLUDE_DIR, "GCC", 0, 0, 0, 0},

5976

+#else

5977

+#define INCLUDE_DEFAULTS_MUSL_CROSS

5978

+#endif

5979

+

5980

+#ifdef TOOL_INCLUDE_DIR

5981

+#define INCLUDE_DEFAULTS_MUSL_TOOL \

5982

+ { TOOL_INCLUDE_DIR, "BINUTILS", 0, 1, 0, 0},

5983

+#else

5984

+#define INCLUDE_DEFAULTS_MUSL_TOOL

5985

+#endif

5986

+

5987

+#ifdef NATIVE_SYSTEM_HEADER_DIR

5988

+#define INCLUDE_DEFAULTS_MUSL_NATIVE \

5989

+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 2 }, \

5990

+ { NATIVE_SYSTEM_HEADER_DIR, 0, 0, 0, 1, 0 },

5991

+#else

5992

+#define INCLUDE_DEFAULTS_MUSL_NATIVE

5993

+#endif

5994

+

5995

+#if defined (CROSS_DIRECTORY_STRUCTURE) && !defined (TARGET_SYSTEM_ROOT)

5996

+# undef INCLUDE_DEFAULTS_MUSL_LOCAL

5997

+# define INCLUDE_DEFAULTS_MUSL_LOCAL

5998

+# undef INCLUDE_DEFAULTS_MUSL_NATIVE

5999

+# define INCLUDE_DEFAULTS_MUSL_NATIVE

6000

+#else

6001

+# undef INCLUDE_DEFAULTS_MUSL_CROSS

6002

+# define INCLUDE_DEFAULTS_MUSL_CROSS

6003

+#endif

6004

+

6005

+#undef INCLUDE_DEFAULTS

6006

+#define INCLUDE_DEFAULTS \

6007

+ { \

6008

+ INCLUDE_DEFAULTS_MUSL_GPP \

6009

+ INCLUDE_DEFAULTS_MUSL_PREFIX \

6010

+ INCLUDE_DEFAULTS_MUSL_CROSS \

6011

+ INCLUDE_DEFAULTS_MUSL_TOOL \

6012

+ INCLUDE_DEFAULTS_MUSL_NATIVE \

6013

+ { GCC_INCLUDE_DIR, "GCC", 0, 1, 0, 0 }, \

6014

+ { 0, 0, 0, 0, 0, 0 } \

6015

+ }

6016

+#endif

6017

+

6018

#if (DEFAULT_LIBC == LIBC_UCLIBC) && defined (SINGLE_LIBC) /* uClinux */

6019

/* This is a *uclinux* target. We don't define below macros to normal linux

6020

versions, because doing so would require *uclinux* targets to include

6021

--- a/src/gcc/config/linux.opt

6022

+++ b/src/gcc/config/linux.opt

6023

@@ -28,5 +28,9 @@ Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)

6024

Use GNU C library

6025

6026

muclibc

6027

-Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)

6028

+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mmusl)

6029

Use uClibc C library

6030

+

6031

+mmusl

6032

+Target Report RejectNegative Var(linux_libc,LIBC_MUSL) Negative(mbionic)

6033

+Use musl C library

6034

--- a/src/gcc/config/mips/linux.h

6035

+++ b/src/gcc/config/mips/linux.h

6036

@@ -37,7 +37,13 @@ along with GCC; see the file COPYING3. If not see

6037

#define UCLIBC_DYNAMIC_LINKERN32 \

6038

"%{mnan=2008:/lib32/ld-uClibc-mipsn8.so.0;:/lib32/ld-uClibc.so.0}"

6039

6040

+#undef MUSL_DYNAMIC_LINKER32

6041

+#define MUSL_DYNAMIC_LINKER32 "/lib/ld-musl-mips%{EL:el}%{msoft-float:-sf}.so.1"

6042

+#undef MUSL_DYNAMIC_LINKER64

6043

+#define MUSL_DYNAMIC_LINKER64 "/lib/ld-musl-mips64%{EL:el}%{msoft-float:-sf}.so.1"

6044

+#define MUSL_DYNAMIC_LINKERN32 "/lib/ld-musl-mipsn32%{EL:el}%{msoft-float:-sf}.so.1"

6045

+

6046

#define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"

6047

#define GNU_USER_DYNAMIC_LINKERN32 \

6048

CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \

6049

- BIONIC_DYNAMIC_LINKERN32)

6050

+ BIONIC_DYNAMIC_LINKERN32, MUSL_DYNAMIC_LINKERN32)

6051

--- a/src/gcc/config/rs6000/linux.h

6052

+++ b/src/gcc/config/rs6000/linux.h

6053

@@ -30,10 +30,14 @@

6054

#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)

6055

#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)

6056

#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)

6057

+#undef OPTION_MUSL

6058

+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)

6059

#else

6060

#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)

6061

#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)

6062

#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)

6063

+#undef OPTION_MUSL

6064

+#define OPTION_MUSL (linux_libc == LIBC_MUSL)

6065

#endif

6066

6067

/* Determine what functions are present at the runtime;

6068

--- a/src/gcc/config/rs6000/linux64.h

6069

+++ b/src/gcc/config/rs6000/linux64.h

6070

@@ -299,10 +299,14 @@ extern int dot_symbols;

6071

#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)

6072

#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)

6073

#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)

6074

+#undef OPTION_MUSL

6075

+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)

6076

#else

6077

#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)

6078

#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)

6079

#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)

6080

+#undef OPTION_MUSL

6081

+#define OPTION_MUSL (linux_libc == LIBC_MUSL)

6082

#endif

6083

6084

/* Determine what functions are present at the runtime;

6085

--- a/src/gcc/configure

6086

+++ b/src/gcc/configure

6087

@@ -1699,7 +1699,8 @@ Optional Packages:

6088

use sysroot as the system root during the build

6089

--with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR

6090

--with-specs=SPECS add SPECS to driver command-line processing

6091

- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"

6092

+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro

6093

+ GCC `cat $srcdir/LINARO-VERSION`"

6094

--with-bugurl=URL Direct users to URL to report a bug

6095

--with-multilib-list select multilibs (AArch64, SH and x86-64 only)

6096

--with-gnu-ld assume the C compiler uses GNU ld default=no

6097

@@ -7362,7 +7363,7 @@ if test "${with_pkgversion+set}" = set; then :

6098

*) PKGVERSION="($withval) " ;;

6099

esac

6100

else

6101

- PKGVERSION="(GCC) "

6102

+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "

6103

6104

fi

6105

6106

@@ -18162,7 +18163,7 @@ else

6107

lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2

6108

lt_status=$lt_dlunknown

6109

cat > conftest.$ac_ext <<_LT_EOF

6110

-#line 18165 "configure"

6111

+#line 18166 "configure"

6112

#include "confdefs.h"

6113

6114

#if HAVE_DLFCN_H

6115

@@ -18268,7 +18269,7 @@ else

6116

lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2

6117

lt_status=$lt_dlunknown

6118

cat > conftest.$ac_ext <<_LT_EOF

6119

-#line 18271 "configure"

6120

+#line 18272 "configure"

6121

#include "confdefs.h"

6122

6123

#if HAVE_DLFCN_H

6124

@@ -27742,6 +27743,9 @@ if test "${gcc_cv_libc_provides_ssp+set}" = set; then :

6125

else

6126

gcc_cv_libc_provides_ssp=no

6127

case "$target" in

6128

+ *-*-musl*)

6129

+ # All versions of musl provide stack protector

6130

+ gcc_cv_libc_provides_ssp=yes;;

6131

*-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)

6132

# glibc 2.4 and later provides __stack_chk_fail and

6133

# either __stack_chk_guard, or TLS access to stack guard canary.

6134

@@ -27774,6 +27778,7 @@ fi

6135

# <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now

6136

# simply assert that glibc does provide this, which is true for all

6137

# realistically usable GNU/Hurd configurations.

6138

+ # All supported versions of musl provide it as well

6139

gcc_cv_libc_provides_ssp=yes;;

6140

*-*-darwin* | *-*-freebsd*)

6141

ac_fn_c_check_func "$LINENO" "__stack_chk_fail" "ac_cv_func___stack_chk_fail"

6142

@@ -27870,6 +27875,9 @@ case "$target" in

6143

gcc_cv_target_dl_iterate_phdr=no

6144

fi

6145

;;

6146

+ *-linux-musl*)

6147

+ gcc_cv_target_dl_iterate_phdr=yes

6148

+ ;;

6149

esac

6150

6151

if test x$gcc_cv_target_dl_iterate_phdr = xyes; then

6152

--- a/src/gcc/configure.ac

6153

+++ b/src/gcc/configure.ac

6154

@@ -862,7 +862,7 @@ AC_ARG_WITH(specs,

6155

)

6156

AC_SUBST(CONFIGURE_SPECS)

6157

6158

-ACX_PKGVERSION([GCC])

6159

+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])

6160

ACX_BUGURL([http://gcc.gnu.org/bugs.html])

6161

6162

# Sanity check enable_languages in case someone does not run the toplevel

6163

@@ -5229,6 +5229,9 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,

6164

gcc_cv_libc_provides_ssp,

6165

[gcc_cv_libc_provides_ssp=no

6166

case "$target" in

6167

+ *-*-musl*)

6168

+ # All versions of musl provide stack protector

6169

+ gcc_cv_libc_provides_ssp=yes;;

6170

*-*-linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu)

6171

# glibc 2.4 and later provides __stack_chk_fail and

6172

# either __stack_chk_guard, or TLS access to stack guard canary.

6173

@@ -5255,6 +5258,7 @@ AC_CACHE_CHECK(__stack_chk_fail in target C library,

6174

# <http://gcc.gnu.org/ml/gcc/2008-10/msg00130.html>) and for now

6175

# simply assert that glibc does provide this, which is true for all

6176

# realistically usable GNU/Hurd configurations.

6177

+ # All supported versions of musl provide it as well

6178

gcc_cv_libc_provides_ssp=yes;;

6179

*-*-darwin* | *-*-freebsd*)

6180

AC_CHECK_FUNC(__stack_chk_fail,[gcc_cv_libc_provides_ssp=yes],

6181

@@ -5328,6 +5332,9 @@ case "$target" in

6182

gcc_cv_target_dl_iterate_phdr=no

6183

fi

6184

;;

6185

+ *-linux-musl*)

6186

+ gcc_cv_target_dl_iterate_phdr=yes

6187

+ ;;

6188

esac

6189

GCC_TARGET_TEMPLATE([TARGET_DL_ITERATE_PHDR])

6190

if test x$gcc_cv_target_dl_iterate_phdr = xyes; then

6191

--- a/src/gcc/cp/Make-lang.in

6192

+++ b/src/gcc/cp/Make-lang.in

6193

@@ -155,7 +155,7 @@ check-c++-subtargets : check-g++-subtargets

6194

# List of targets that can use the generic check- rule and its // variant.

6195

lang_checks += check-g++

6196

lang_checks_parallelized += check-g++

6197

-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.

6198

+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.

6199

check_g++_parallelize = 10000

6200

#

6201

# Install hooks:

6202

@@ -221,6 +221,7 @@ c++.mostlyclean:

6203

-rm -f doc/g++.1

6204

-rm -f cp/*$(objext)

6205

-rm -f cp/*$(coverageexts)

6206

+ -rm -f xg++$(exeext) g++-cross$(exeext) cc1plus$(exeext)

6207

c++.clean:

6208

c++.distclean:

6209

-rm -f cp/config.status cp/Makefile

6210

--- a/src/gcc/cppbuiltin.c

6211

+++ b/src/gcc/cppbuiltin.c

6212

@@ -62,18 +62,41 @@ parse_basever (int *major, int *minor, int *patchlevel)

6213

*patchlevel = s_patchlevel;

6214

}

6215

6216

+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"

6217

+ to create Linaro release number YYYYMM and spin version. */

6218

+static void

6219

+parse_linarover (int *release, int *spin)

6220

+{

6221

+ static int s_year = -1, s_month, s_spin;

6222

+

6223

+ if (s_year == -1)

6224

+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)

6225

+ {

6226

+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);

6227

+ s_spin = 0;

6228

+ }

6229

+

6230

+ if (release)

6231

+ *release = s_year * 100 + s_month;

6232

+

6233

+ if (spin)

6234

+ *spin = s_spin;

6235

+}

6236

6237

/* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */

6238

static void

6239

define__GNUC__ (cpp_reader *pfile)

6240

{

6241

- int major, minor, patchlevel;

6242

+ int major, minor, patchlevel, linaro_release, linaro_spin;

6243

6244

parse_basever (&major, &minor, &patchlevel);

6245

+ parse_linarover (&linaro_release, &linaro_spin);

6246

cpp_define_formatted (pfile, "__GNUC__=%d", major);

6247

cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);

6248

cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);

6249

cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);

6250

+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);

6251

+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);

6252

cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);

6253

cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);

6254

cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);

6255

--- a/src/gcc/cprop.c

6256

+++ b/src/gcc/cprop.c

6257

@@ -285,6 +285,15 @@ cprop_constant_p (const_rtx x)

6258

return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));

6259

}

6260

6261

+/* Determine whether the rtx X should be treated as a register that can

6262

+ be propagated. Any pseudo-register is fine. */

6263

+

6264

+static bool

6265

+cprop_reg_p (const_rtx x)

6266

+{

6267

+ return REG_P (x) && !HARD_REGISTER_P (x);

6268

+}

6269

+

6270

/* Scan SET present in INSN and add an entry to the hash TABLE.

6271

IMPLICIT is true if it's an implicit set, false otherwise. */

6272

6273

@@ -295,8 +304,7 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,

6274

rtx src = SET_SRC (set);

6275

rtx dest = SET_DEST (set);

6276

6277

- if (REG_P (dest)

6278

- && ! HARD_REGISTER_P (dest)

6279

+ if (cprop_reg_p (dest)

6280

&& reg_available_p (dest, insn)

6281

&& can_copy_p (GET_MODE (dest)))

6282

{

6283

@@ -321,9 +329,8 @@ hash_scan_set (rtx set, rtx_insn *insn, struct hash_table_d *table,

6284

src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);

6285

6286

/* Record sets for constant/copy propagation. */

6287

- if ((REG_P (src)

6288

+ if ((cprop_reg_p (src)

6289

&& src != dest

6290

- && ! HARD_REGISTER_P (src)

6291

&& reg_available_p (src, insn))

6292

|| cprop_constant_p (src))

6293

insert_set_in_table (dest, src, insn, table, implicit);

6294

@@ -821,15 +828,15 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)

6295

return success;

6296

}

6297

6298

-/* Find a set of REGNOs that are available on entry to INSN's block. Return

6299

- NULL no such set is found. */

6300

+/* Find a set of REGNOs that are available on entry to INSN's block. If found,

6301

+ SET_RET[0] will be assigned a set with a register source and SET_RET[1] a

6302

+ set with a constant source. If not found the corresponding entry is set to

6303

+ NULL. */

6304

6305

-static struct cprop_expr *

6306

-find_avail_set (int regno, rtx_insn *insn)

6307

+static void

6308

+find_avail_set (int regno, rtx_insn *insn, struct cprop_expr *set_ret[2])

6309

{

6310

- /* SET1 contains the last set found that can be returned to the caller for

6311

- use in a substitution. */

6312

- struct cprop_expr *set1 = 0;

6313

+ set_ret[0] = set_ret[1] = NULL;

6314

6315

/* Loops are not possible here. To get a loop we would need two sets

6316

available at the start of the block containing INSN. i.e. we would

6317

@@ -869,8 +876,10 @@ find_avail_set (int regno, rtx_insn *insn)

6318

If the source operand changed, we may still use it for the next

6319

iteration of this loop, but we may not use it for substitutions. */

6320

6321

- if (cprop_constant_p (src) || reg_not_set_p (src, insn))

6322

- set1 = set;

6323

+ if (cprop_constant_p (src))

6324

+ set_ret[1] = set;

6325

+ else if (reg_not_set_p (src, insn))

6326

+ set_ret[0] = set;

6327

6328

/* If the source of the set is anything except a register, then

6329

we have reached the end of the copy chain. */

6330

@@ -881,10 +890,6 @@ find_avail_set (int regno, rtx_insn *insn)

6331

and see if we have an available copy into SRC. */

6332

regno = REGNO (src);

6333

}

6334

-

6335

- /* SET1 holds the last set that was available and anticipatable at

6336

- INSN. */

6337

- return set1;

6338

}

6339

6340

/* Subroutine of cprop_insn that tries to propagate constants into

6341

@@ -1050,40 +1055,40 @@ cprop_insn (rtx_insn *insn)

6342

int changed = 0, changed_this_round;

6343

rtx note;

6344

6345

-retry:

6346

- changed_this_round = 0;

6347

- reg_use_count = 0;

6348

- note_uses (&PATTERN (insn), find_used_regs, NULL);

6349

-

6350

- /* We may win even when propagating constants into notes. */

6351

- note = find_reg_equal_equiv_note (insn);

6352

- if (note)

6353

- find_used_regs (&XEXP (note, 0), NULL);

6354

-

6355

- for (i = 0; i < reg_use_count; i++)

6356

+ do

6357

{

6358

- rtx reg_used = reg_use_table[i];

6359

- unsigned int regno = REGNO (reg_used);

6360

- rtx src;

6361

- struct cprop_expr *set;

6362

+ changed_this_round = 0;

6363

+ reg_use_count = 0;

6364

+ note_uses (&PATTERN (insn), find_used_regs, NULL);

6365

6366

- /* If the register has already been set in this block, there's

6367

- nothing we can do. */

6368

- if (! reg_not_set_p (reg_used, insn))

6369

- continue;

6370

+ /* We may win even when propagating constants into notes. */

6371

+ note = find_reg_equal_equiv_note (insn);

6372

+ if (note)

6373

+ find_used_regs (&XEXP (note, 0), NULL);

6374

6375

- /* Find an assignment that sets reg_used and is available

6376

- at the start of the block. */

6377

- set = find_avail_set (regno, insn);

6378

- if (! set)

6379

- continue;

6380

+ for (i = 0; i < reg_use_count; i++)

6381

+ {

6382

+ rtx reg_used = reg_use_table[i];

6383

+ unsigned int regno = REGNO (reg_used);

6384

+ rtx src_cst = NULL, src_reg = NULL;

6385

+ struct cprop_expr *set[2];

6386

6387

- src = set->src;

6388

+ /* If the register has already been set in this block, there's

6389

+ nothing we can do. */

6390

+ if (! reg_not_set_p (reg_used, insn))

6391

+ continue;

6392

6393

- /* Constant propagation. */

6394

- if (cprop_constant_p (src))

6395

- {

6396

- if (constprop_register (reg_used, src, insn))

6397

+ /* Find an assignment that sets reg_used and is available

6398

+ at the start of the block. */

6399

+ find_avail_set (regno, insn, set);

6400

+ if (set[0])

6401

+ src_reg = set[0]->src;

6402

+ if (set[1])

6403

+ src_cst = set[1]->src;

6404

+

6405

+ /* Constant propagation. */

6406

+ if (src_cst && cprop_constant_p (src_cst)

6407

+ && constprop_register (reg_used, src_cst, insn))

6408

{

6409

changed_this_round = changed = 1;

6410

global_const_prop_count++;

6411

@@ -1093,18 +1098,16 @@ retry:

6412

"GLOBAL CONST-PROP: Replacing reg %d in ", regno);

6413

fprintf (dump_file, "insn %d with constant ",

6414

INSN_UID (insn));

6415

- print_rtl (dump_file, src);

6416

+ print_rtl (dump_file, src_cst);

6417

fprintf (dump_file, "\n");

6418

}

6419

if (insn->deleted ())

6420

return 1;

6421

}

6422

- }

6423

- else if (REG_P (src)

6424

- && REGNO (src) >= FIRST_PSEUDO_REGISTER

6425

- && REGNO (src) != regno)

6426

- {

6427

- if (try_replace_reg (reg_used, src, insn))

6428

+ /* Copy propagation. */

6429

+ else if (src_reg && cprop_reg_p (src_reg)

6430

+ && REGNO (src_reg) != regno

6431

+ && try_replace_reg (reg_used, src_reg, insn))

6432

{

6433

changed_this_round = changed = 1;

6434

global_copy_prop_count++;

6435

@@ -1113,7 +1116,7 @@ retry:

6436

fprintf (dump_file,

6437

"GLOBAL COPY-PROP: Replacing reg %d in insn %d",

6438

regno, INSN_UID (insn));

6439

- fprintf (dump_file, " with reg %d\n", REGNO (src));

6440

+ fprintf (dump_file, " with reg %d\n", REGNO (src_reg));

6441

}

6442

6443

/* The original insn setting reg_used may or may not now be

6444

@@ -1123,12 +1126,10 @@ retry:

6445

and made things worse. */

6446

}

6447

}

6448

-

6449

- /* If try_replace_reg simplified the insn, the regs found

6450

- by find_used_regs may not be valid anymore. Start over. */

6451

- if (changed_this_round)

6452

- goto retry;

6453

}

6454

+ /* If try_replace_reg simplified the insn, the regs found by find_used_regs

6455

+ may not be valid anymore. Start over. */

6456

+ while (changed_this_round);

6457

6458

if (changed && DEBUG_INSN_P (insn))

6459

return 0;

6460

@@ -1191,7 +1192,7 @@ do_local_cprop (rtx x, rtx_insn *insn)

6461

/* Rule out USE instructions and ASM statements as we don't want to

6462

change the hard registers mentioned. */

6463

if (REG_P (x)

6464

- && (REGNO (x) >= FIRST_PSEUDO_REGISTER

6465

+ && (cprop_reg_p (x)

6466

|| (GET_CODE (PATTERN (insn)) != USE

6467

&& asm_noperands (PATTERN (insn)) < 0)))

6468

{

6469

@@ -1207,7 +1208,7 @@ do_local_cprop (rtx x, rtx_insn *insn)

6470

6471

if (cprop_constant_p (this_rtx))

6472

newcnst = this_rtx;

6473

- if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER

6474

+ if (cprop_reg_p (this_rtx)

6475

/* Don't copy propagate if it has attached REG_EQUIV note.

6476

At this point this only function parameters should have

6477

REG_EQUIV notes and if the argument slot is used somewhere

6478

@@ -1328,9 +1329,8 @@ implicit_set_cond_p (const_rtx cond)

6479

if (GET_CODE (cond) != EQ && GET_CODE (cond) != NE)

6480

return false;

6481

6482

- /* The first operand of COND must be a pseudo-reg. */

6483

- if (! REG_P (XEXP (cond, 0))

6484

- || HARD_REGISTER_P (XEXP (cond, 0)))

6485

+ /* The first operand of COND must be a register we can propagate. */

6486

+ if (!cprop_reg_p (XEXP (cond, 0)))

6487

return false;

6488

6489

/* The second operand of COND must be a suitable constant. */

6490

--- a/src/gcc/df-core.c

6491

+++ b/src/gcc/df-core.c

6492

@@ -642,7 +642,6 @@ void

6493

df_finish_pass (bool verify ATTRIBUTE_UNUSED)

6494

{

6495

int i;

6496

- int removed = 0;

6497

6498

#ifdef ENABLE_DF_CHECKING

6499

int saved_flags;

6500

@@ -658,21 +657,15 @@ df_finish_pass (bool verify ATTRIBUTE_UNUSED)

6501

saved_flags = df->changeable_flags;

6502

#endif

6503

6504

- for (i = 0; i < df->num_problems_defined; i++)

6505

+ /* We iterate over problems by index as each problem removed will

6506

+ lead to problems_in_order to be reordered. */

6507

+ for (i = 0; i < DF_LAST_PROBLEM_PLUS1; i++)

6508

{

6509

- struct dataflow *dflow = df->problems_in_order[i];

6510

- struct df_problem *problem = dflow->problem;

6511

+ struct dataflow *dflow = df->problems_by_index[i];

6512

6513

- if (dflow->optional_p)

6514

- {

6515

- gcc_assert (problem->remove_problem_fun);

6516

- (problem->remove_problem_fun) ();

6517

- df->problems_in_order[i] = NULL;

6518

- df->problems_by_index[problem->id] = NULL;

6519

- removed++;

6520

- }

6521

+ if (dflow && dflow->optional_p)

6522

+ df_remove_problem (dflow);

6523

}

6524

- df->num_problems_defined -= removed;

6525

6526

/* Clear all of the flags. */

6527

df->changeable_flags = 0;

6528

--- a/src/gcc/fortran/Make-lang.in

6529

+++ b/src/gcc/fortran/Make-lang.in

6530

@@ -167,7 +167,7 @@ check-f95-subtargets : check-gfortran-subtargets

6531

check-fortran-subtargets : check-gfortran-subtargets

6532

lang_checks += check-gfortran

6533

lang_checks_parallelized += check-gfortran

6534

-# For description see comment above check_gcc_parallelize in gcc/Makefile.in.

6535

+# For description see the check_$lang_parallelize comment in gcc/Makefile.in.

6536

check_gfortran_parallelize = 10000

6537

6538

# GFORTRAN documentation.

6539

@@ -275,7 +275,7 @@ fortran.uninstall:

6540

# We just have to delete files specific to us.

6541

6542

fortran.mostlyclean:

6543

- -rm -f f951$(exeext)

6544

+ -rm -f gfortran$(exeext) gfortran-cross$(exeext) f951$(exeext)

6545

-rm -f fortran/*.o

6546

6547

fortran.clean:

6548

--- a/src/gcc/genpreds.c

6549

+++ b/src/gcc/genpreds.c

6550

@@ -640,12 +640,14 @@ struct constraint_data

6551

const char *regclass; /* for register constraints */

6552

rtx exp; /* for other constraints */

6553

unsigned int lineno; /* line of definition */

6554

- unsigned int is_register : 1;

6555

- unsigned int is_const_int : 1;

6556

- unsigned int is_const_dbl : 1;

6557

- unsigned int is_extra : 1;

6558

- unsigned int is_memory : 1;

6559

- unsigned int is_address : 1;

6560

+ unsigned int is_register : 1;

6561

+ unsigned int is_const_int : 1;

6562

+ unsigned int is_const_dbl : 1;

6563

+ unsigned int is_extra : 1;

6564

+ unsigned int is_memory : 1;

6565

+ unsigned int is_address : 1;

6566

+ unsigned int maybe_allows_reg : 1;

6567

+ unsigned int maybe_allows_mem : 1;

6568

};

6569

6570

/* Overview of all constraints beginning with a given letter. */

6571

@@ -691,6 +693,9 @@ static unsigned int satisfied_start;

6572

static unsigned int const_int_start, const_int_end;

6573

static unsigned int memory_start, memory_end;

6574

static unsigned int address_start, address_end;

6575

+static unsigned int maybe_allows_none_start, maybe_allows_none_end;

6576

+static unsigned int maybe_allows_reg_start, maybe_allows_reg_end;

6577

+static unsigned int maybe_allows_mem_start, maybe_allows_mem_end;

6578

6579

/* Convert NAME, which contains angle brackets and/or underscores, to

6580

a string that can be used as part of a C identifier. The string

6581

@@ -711,6 +716,34 @@ mangle (const char *name)

6582

return XOBFINISH (rtl_obstack, const char *);

6583

}

6584

6585

+/* Return a bitmask, bit 1 if EXP maybe allows a REG/SUBREG, 2 if EXP

6586

+ maybe allows a MEM. Bits should be clear only when we are sure it

6587

+ will not allow a REG/SUBREG or a MEM. */

6588

+static int

6589

+compute_maybe_allows (rtx exp)

6590

+{

6591

+ switch (GET_CODE (exp))

6592

+ {

6593

+ case IF_THEN_ELSE:

6594

+ /* Conservative answer is like IOR, of the THEN and ELSE branches. */

6595

+ return compute_maybe_allows (XEXP (exp, 1))

6596

+ | compute_maybe_allows (XEXP (exp, 2));

6597

+ case AND:

6598

+ return compute_maybe_allows (XEXP (exp, 0))

6599

+ & compute_maybe_allows (XEXP (exp, 1));

6600

+ case IOR:

6601

+ return compute_maybe_allows (XEXP (exp, 0))

6602

+ | compute_maybe_allows (XEXP (exp, 1));

6603

+ case MATCH_CODE:

6604

+ if (*XSTR (exp, 1) == '\0')

6605

+ return (strstr (XSTR (exp, 0), "reg") != NULL ? 1 : 0)

6606

+ | (strstr (XSTR (exp, 0), "mem") != NULL ? 2 : 0);

6607

+ /* FALLTHRU */

6608

+ default:

6609

+ return 3;

6610

+ }

6611

+}

6612

+

6613

/* Add one constraint, of any sort, to the tables. NAME is its name;

6614

REGCLASS is the register class, if any; EXP is the expression to

6615

test, if any; IS_MEMORY and IS_ADDRESS indicate memory and address

6616

@@ -866,6 +899,11 @@ add_constraint (const char *name, const char *regclass,

6617

c->is_extra = !(regclass || is_const_int || is_const_dbl);

6618

c->is_memory = is_memory;

6619

c->is_address = is_address;

6620

+ int maybe_allows = 3;

6621

+ if (exp)

6622

+ maybe_allows = compute_maybe_allows (exp);

6623

+ c->maybe_allows_reg = (maybe_allows & 1) != 0;

6624

+ c->maybe_allows_mem = (maybe_allows & 2) != 0;

6625

6626

c->next_this_letter = *slot;

6627

*slot = c;

6628

@@ -940,8 +978,30 @@ choose_enum_order (void)

6629

enum_order[next++] = c;

6630

address_end = next;

6631

6632

+ maybe_allows_none_start = next;

6633

+ FOR_ALL_CONSTRAINTS (c)

6634

+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address

6635

+ && !c->maybe_allows_reg && !c->maybe_allows_mem)

6636

+ enum_order[next++] = c;

6637

+ maybe_allows_none_end = next;

6638

+

6639

+ maybe_allows_reg_start = next;

6640

+ FOR_ALL_CONSTRAINTS (c)

6641

+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address

6642

+ && c->maybe_allows_reg && !c->maybe_allows_mem)

6643

+ enum_order[next++] = c;

6644

+ maybe_allows_reg_end = next;

6645

+

6646

+ maybe_allows_mem_start = next;

6647

+ FOR_ALL_CONSTRAINTS (c)

6648

+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address

6649

+ && !c->maybe_allows_reg && c->maybe_allows_mem)

6650

+ enum_order[next++] = c;

6651

+ maybe_allows_mem_end = next;

6652

+

6653

FOR_ALL_CONSTRAINTS (c)

6654

- if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address)

6655

+ if (!c->is_register && !c->is_const_int && !c->is_memory && !c->is_address

6656

+ && c->maybe_allows_reg && c->maybe_allows_mem)

6657

enum_order[next++] = c;

6658

gcc_assert (next == num_constraints);

6659

}

6660

@@ -1229,6 +1289,41 @@ write_range_function (const char *name, unsigned int start, unsigned int end)

6661

"}\n\n", name);

6662

}

6663

6664

+/* Write a definition for insn_extra_constraint_allows_reg_mem function. */

6665

+static void

6666

+write_allows_reg_mem_function (void)

6667

+{

6668

+ printf ("static inline void\n"

6669

+ "insn_extra_constraint_allows_reg_mem (enum constraint_num c,\n"

6670

+ "\t\t\t\t bool *allows_reg, bool *allows_mem)\n"

6671

+ "{\n");

6672

+ if (maybe_allows_none_start != maybe_allows_none_end)

6673

+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"

6674

+ " return;\n",

6675

+ enum_order[maybe_allows_none_start]->c_name,

6676

+ enum_order[maybe_allows_none_end - 1]->c_name);

6677

+ if (maybe_allows_reg_start != maybe_allows_reg_end)

6678

+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"

6679

+ " {\n"

6680

+ " *allows_reg = true;\n"

6681

+ " return;\n"

6682

+ " }\n",

6683

+ enum_order[maybe_allows_reg_start]->c_name,

6684

+ enum_order[maybe_allows_reg_end - 1]->c_name);

6685

+ if (maybe_allows_mem_start != maybe_allows_mem_end)

6686

+ printf (" if (c >= CONSTRAINT_%s && c <= CONSTRAINT_%s)\n"

6687

+ " {\n"

6688

+ " *allows_mem = true;\n"

6689

+ " return;\n"

6690

+ " }\n",

6691

+ enum_order[maybe_allows_mem_start]->c_name,

6692

+ enum_order[maybe_allows_mem_end - 1]->c_name);

6693

+ printf (" (void) c;\n"

6694

+ " *allows_reg = true;\n"

6695

+ " *allows_mem = true;\n"

6696

+ "}\n\n");

6697

+}

6698

+

6699

/* VEC is a list of key/value pairs, with the keys being lower bounds

6700

of a range. Output a decision tree that handles the keys covered by

6701

[VEC[START], VEC[END]), returning FALLBACK for keys lower then VEC[START]'s.

6702

@@ -1326,6 +1421,7 @@ write_tm_preds_h (void)

6703

memory_start, memory_end);

6704

write_range_function ("insn_extra_address_constraint",

6705

address_start, address_end);

6706

+ write_allows_reg_mem_function ();

6707

6708

if (constraint_max_namelen > 1)

6709

{

6710

--- a/src/gcc/go/Make-lang.in

6711

+++ b/src/gcc/go/Make-lang.in

6712

@@ -197,6 +197,7 @@ go.uninstall:

6713

go.mostlyclean:

6714

-rm -f go/*$(objext)

6715

-rm -f go/*$(coverageexts)

6716

+ -rm -f gccgo$(exeext) gccgo-cross$(exeext) go1$(exeext)

6717

go.clean:

6718

go.distclean:

6719

go.maintainer-clean:

6720

--- a/src/gcc/ira-costs.c

6721

+++ b/src/gcc/ira-costs.c

6722

@@ -1380,8 +1380,6 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)

6723

rtx dest = SET_DEST (set);

6724

rtx src = SET_SRC (set);

6725

6726

- dest = SET_DEST (set);

6727

- src = SET_SRC (set);

6728

if (GET_CODE (dest) == SUBREG

6729

&& (GET_MODE_SIZE (GET_MODE (dest))

6730

== GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))

6731

--- a/src/gcc/jit/Make-lang.in

6732

+++ b/src/gcc/jit/Make-lang.in

6733

@@ -285,6 +285,10 @@ jit.uninstall:

6734

# We just have to delete files specific to us.

6735

6736

jit.mostlyclean:

6737

+ -rm -f $(LIBGCCJIT_FILENAME) $(LIBGCCJIT_SYMLINK)

6738

+ -rm -f $(LIBGCCJIT_LINKER_NAME_SYMLINK) $(FULL_DRIVER_NAME)

6739

+ -rm -f $(LIBGCCJIT_SONAME)

6740

+ -rm -f $(jit_OBJS)

6741

6742

jit.clean:

6743

6744

--- a/src/gcc/loop-invariant.c

6745

+++ b/src/gcc/loop-invariant.c

6746

@@ -740,8 +740,11 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,

6747

enough to not regress 410.bwaves either (by still moving reg+reg

6748

invariants).

6749

See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */

6750

- inv->cheap_address = address_cost (SET_SRC (set), word_mode,

6751

- ADDR_SPACE_GENERIC, speed) < 3;

6752

+ if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))

6753

+ inv->cheap_address = address_cost (SET_SRC (set), word_mode,

6754

+ ADDR_SPACE_GENERIC, speed) < 3;

6755

+ else

6756

+ inv->cheap_address = false;

6757

}

6758

else

6759

{

6760

@@ -1174,6 +1177,7 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,

6761

}

6762

6763

if (!inv->cheap_address

6764

+ || inv->def->n_uses == 0

6765

|| inv->def->n_addr_uses < inv->def->n_uses)

6766

(*comp_cost) += inv->cost * inv->eqno;

6767

6768

@@ -1512,6 +1516,79 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group)

6769

return 1;

6770

}

6771

6772

+/* Whether invariant INV setting REG can be moved out of LOOP, at the end of

6773

+ the block preceding its header. */

6774

+

6775

+static bool

6776

+can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)

6777

+{

6778

+ df_ref def, use;

6779

+ unsigned int dest_regno, defs_in_loop_count = 0;

6780

+ rtx_insn *insn = inv->insn;

6781

+ basic_block bb = BLOCK_FOR_INSN (inv->insn);

6782

+

6783

+ /* We ignore hard register and memory access for cost and complexity reasons.

6784

+ Hard register are few at this stage and expensive to consider as they

6785

+ require building a separate data flow. Memory access would require using

6786

+ df_simulate_* and can_move_insns_across functions and is more complex. */

6787

+ if (!REG_P (reg) || HARD_REGISTER_P (reg))

6788

+ return false;

6789

+

6790

+ /* Check whether the set is always executed. We could omit this condition if

6791

+ we know that the register is unused outside of the loop, but it does not

6792

+ seem worth finding out. */

6793

+ if (!inv->always_executed)

6794

+ return false;

6795

+

6796

+ /* Check that all uses that would be dominated by def are already dominated

6797

+ by it. */

6798

+ dest_regno = REGNO (reg);

6799

+ for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))

6800

+ {

6801

+ rtx_insn *use_insn;

6802

+ basic_block use_bb;

6803

+

6804

+ use_insn = DF_REF_INSN (use);

6805

+ use_bb = BLOCK_FOR_INSN (use_insn);

6806

+

6807

+ /* Ignore instruction considered for moving. */

6808

+ if (use_insn == insn)

6809

+ continue;

6810

+

6811

+ /* Don't consider uses outside loop. */

6812

+ if (!flow_bb_inside_loop_p (loop, use_bb))

6813

+ continue;

6814

+

6815

+ /* Don't move if a use is not dominated by def in insn. */

6816

+ if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))

6817

+ return false;

6818

+ if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))

6819

+ return false;

6820

+ }

6821

+

6822

+ /* Check for other defs. Any other def in the loop might reach a use

6823

+ currently reached by the def in insn. */

6824

+ for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))

6825

+ {

6826

+ basic_block def_bb = DF_REF_BB (def);

6827

+

6828

+ /* Defs in exit block cannot reach a use they weren't already. */

6829

+ if (single_succ_p (def_bb))

6830

+ {

6831

+ basic_block def_bb_succ;

6832

+

6833

+ def_bb_succ = single_succ (def_bb);

6834

+ if (!flow_bb_inside_loop_p (loop, def_bb_succ))

6835

+ continue;

6836

+ }

6837

+

6838

+ if (++defs_in_loop_count > 1)

6839

+ return false;

6840

+ }

6841

+

6842

+ return true;

6843

+}

6844

+

6845

/* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false

6846

otherwise. */

6847

6848

@@ -1545,11 +1622,8 @@ move_invariant_reg (struct loop *loop, unsigned invno)

6849

}

6850

}

6851

6852

- /* Move the set out of the loop. If the set is always executed (we could

6853

- omit this condition if we know that the register is unused outside of

6854

- the loop, but it does not seem worth finding out) and it has no uses

6855

- that would not be dominated by it, we may just move it (TODO).

6856

- Otherwise we need to create a temporary register. */

6857

+ /* If possible, just move the set out of the loop. Otherwise, we

6858

+ need to create a temporary register. */

6859

set = single_set (inv->insn);

6860

reg = dest = SET_DEST (set);

6861

if (GET_CODE (reg) == SUBREG)

6862

@@ -1557,19 +1631,25 @@ move_invariant_reg (struct loop *loop, unsigned invno)

6863

if (REG_P (reg))

6864

regno = REGNO (reg);

6865

6866

- reg = gen_reg_rtx_and_attrs (dest);

6867

+ if (!can_move_invariant_reg (loop, inv, reg))

6868

+ {

6869

+ reg = gen_reg_rtx_and_attrs (dest);

6870

6871

- /* Try replacing the destination by a new pseudoregister. */

6872

- validate_change (inv->insn, &SET_DEST (set), reg, true);

6873

+ /* Try replacing the destination by a new pseudoregister. */

6874

+ validate_change (inv->insn, &SET_DEST (set), reg, true);

6875

6876

- /* As well as all the dominated uses. */

6877

- replace_uses (inv, reg, true);

6878

+ /* As well as all the dominated uses. */

6879

+ replace_uses (inv, reg, true);

6880

6881

- /* And validate all the changes. */

6882

- if (!apply_change_group ())

6883

- goto fail;

6884

+ /* And validate all the changes. */

6885

+ if (!apply_change_group ())

6886

+ goto fail;

6887

6888

- emit_insn_after (gen_move_insn (dest, reg), inv->insn);

6889

+ emit_insn_after (gen_move_insn (dest, reg), inv->insn);

6890

+ }

6891

+ else if (dump_file)

6892

+ fprintf (dump_file, "Invariant %d moved without introducing a new "

6893

+ "temporary register\n", invno);

6894

reorder_insns (inv->insn, inv->insn, BB_END (preheader));

6895

6896

/* If there is a REG_EQUAL note on the insn we just moved, and the

6897

--- a/src/gcc/lra-constraints.c

6898

+++ b/src/gcc/lra-constraints.c

6899

@@ -1656,8 +1656,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass,

6900

{

6901

HARD_REG_SET temp;

6902

6903

- // ??? Is this assert right

6904

- // lra_assert (hard_reg_set_subset_p (set, reg_class_contents[rclass]));

6905

+ lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set));

6906

COPY_HARD_REG_SET (temp, set);

6907

AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs);

6908

return (hard_reg_set_subset_p

6909

--- a/src/gcc/objc/Make-lang.in

6910

+++ b/src/gcc/objc/Make-lang.in

6911

@@ -114,6 +114,7 @@ objc.uninstall:

6912

objc.mostlyclean:

6913

-rm -f objc/*$(objext) objc/xforward objc/fflags

6914

-rm -f objc/*$(coverageexts)

6915

+ -rm -f cc1obj$(exeext)

6916

objc.clean: objc.mostlyclean

6917

-rm -rf objc-headers

6918

objc.distclean:

6919

--- a/src/gcc/objcp/Make-lang.in

6920

+++ b/src/gcc/objcp/Make-lang.in

6921

@@ -142,6 +142,7 @@ obj-c++.uninstall:

6922

obj-c++.mostlyclean:

6923

-rm -f objcp/*$(objext)

6924

-rm -f objcp/*$(coverageexts)

6925

+ -rm -f cc1objplus$(exeext)

6926

obj-c++.clean: obj-c++.mostlyclean

6927

obj-c++.distclean:

6928

-rm -f objcp/config.status objcp/Makefile

6929

--- a/src/gcc/optabs.c

6930

+++ b/src/gcc/optabs.c

6931

@@ -6544,18 +6544,28 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,

6932

{

6933

struct expand_operand ops[2];

6934

rtx rtx_op0, rtx_op1;

6935

+ machine_mode m0, m1;

6936

enum rtx_code rcode = get_rtx_code (tcode, unsignedp);

6937

6938

gcc_assert (TREE_CODE_CLASS (tcode) == tcc_comparison);

6939

6940

- /* Expand operands. */

6941

+ /* Expand operands. For vector types with scalar modes, e.g. where int64x1_t

6942

+ has mode DImode, this can produce a constant RTX of mode VOIDmode; in such

6943

+ cases, use the original mode. */

6944

rtx_op0 = expand_expr (t_op0, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op0)),

6945

EXPAND_STACK_PARM);

6946

+ m0 = GET_MODE (rtx_op0);

6947

+ if (m0 == VOIDmode)

6948

+ m0 = TYPE_MODE (TREE_TYPE (t_op0));

6949

+

6950

rtx_op1 = expand_expr (t_op1, NULL_RTX, TYPE_MODE (TREE_TYPE (t_op1)),

6951

EXPAND_STACK_PARM);

6952

+ m1 = GET_MODE (rtx_op1);

6953

+ if (m1 == VOIDmode)

6954

+ m1 = TYPE_MODE (TREE_TYPE (t_op1));

6955

6956

- create_input_operand (&ops[0], rtx_op0, GET_MODE (rtx_op0));

6957

- create_input_operand (&ops[1], rtx_op1, GET_MODE (rtx_op1));

6958

+ create_input_operand (&ops[0], rtx_op0, m0);

6959

+ create_input_operand (&ops[1], rtx_op1, m1);

6960

if (!maybe_legitimize_operands (icode, 4, 2, ops))

6961

gcc_unreachable ();

6962

return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);

6963

--- a/src/gcc/params.def

6964

+++ b/src/gcc/params.def

6965

@@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH,

6966

"Maximum depth of search in the dominator tree for expressions to hoist",

6967

30, 0, 0)

6968

6969

+

6970

+/* When synthesizing expnonentiation by a real constant operations using square

6971

+ roots, this controls how deep sqrt chains we are willing to generate. */

6972

+DEFPARAM(PARAM_MAX_POW_SQRT_DEPTH,

6973

+ "max-pow-sqrt-depth",

6974

+ "Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant",

6975

+ 5, 1, 32)

6976

+

6977

/* This parameter limits the number of insns in a loop that will be unrolled,

6978

and by how much the loop is unrolled.

6979

6980

--- a/src/gcc/rtlanal.c

6981

+++ b/src/gcc/rtlanal.c

6982

@@ -104,7 +104,10 @@ generic_subrtx_iterator <T>::add_single_to_queue (array_type &array,

6983

return base;

6984

}

6985

gcc_checking_assert (i == LOCAL_ELEMS);

6986

- vec_safe_grow (array.heap, i + 1);

6987

+ /* A previous iteration might also have moved from the stack to the

6988

+ heap, in which case the heap array will already be big enough. */

6989

+ if (vec_safe_length (array.heap) <= i)

6990

+ vec_safe_grow (array.heap, i + 1);

6991

base = array.heap->address ();

6992

memcpy (base, array.stack, sizeof (array.stack));

6993

base[LOCAL_ELEMS] = x;

6994

--- a/src/gcc/simplify-rtx.c

6995

+++ b/src/gcc/simplify-rtx.c

6996

@@ -1171,7 +1171,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)

6997

= (float_truncate:SF foo:DF).

6998

6999

(float_truncate:DF (float_extend:XF foo:SF))

7000

- = (float_extend:SF foo:DF). */

7001

+ = (float_extend:DF foo:SF). */

7002

if ((GET_CODE (op) == FLOAT_TRUNCATE

7003

&& flag_unsafe_math_optimizations)

7004

|| GET_CODE (op) == FLOAT_EXTEND)

7005

@@ -1183,14 +1183,14 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)

7006

XEXP (op, 0), mode);

7007

7008

/* (float_truncate (float x)) is (float x) */

7009

- if (GET_CODE (op) == FLOAT

7010

+ if ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)

7011

&& (flag_unsafe_math_optimizations

7012

|| (SCALAR_FLOAT_MODE_P (GET_MODE (op))

7013

&& ((unsigned)significand_size (GET_MODE (op))

7014

>= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))

7015

- num_sign_bit_copies (XEXP (op, 0),

7016

GET_MODE (XEXP (op, 0))))))))

7017

- return simplify_gen_unary (FLOAT, mode,

7018

+ return simplify_gen_unary (GET_CODE (op), mode,

7019

XEXP (op, 0),

7020

GET_MODE (XEXP (op, 0)));

7021

7022

@@ -1221,7 +1221,7 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)

7023

rounding can't happen.

7024

*/

7025

if (GET_CODE (op) == FLOAT_EXTEND

7026

- || (GET_CODE (op) == FLOAT

7027

+ || ((GET_CODE (op) == FLOAT || GET_CODE (op) == UNSIGNED_FLOAT)

7028

&& SCALAR_FLOAT_MODE_P (GET_MODE (op))

7029

&& ((unsigned)significand_size (GET_MODE (op))

7030

>= (GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))

7031

--- a/src/gcc/stmt.c

7032

+++ b/src/gcc/stmt.c

7033

@@ -342,13 +342,7 @@ parse_output_constraint (const char **constraint_p, int operand_num,

7034

else if (insn_extra_memory_constraint (cn))

7035

*allows_mem = true;

7036

else

7037

- {

7038

- /* Otherwise we can't assume anything about the nature of

7039

- the constraint except that it isn't purely registers.

7040

- Treat it like "g" and hope for the best. */

7041

- *allows_reg = true;

7042

- *allows_mem = true;

7043

- }

7044

+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);

7045

break;

7046

}

7047

7048

@@ -465,13 +459,7 @@ parse_input_constraint (const char **constraint_p, int input_num,

7049

else if (insn_extra_memory_constraint (cn))

7050

*allows_mem = true;

7051

else

7052

- {

7053

- /* Otherwise we can't assume anything about the nature of

7054

- the constraint except that it isn't purely registers.

7055

- Treat it like "g" and hope for the best. */

7056

- *allows_reg = true;

7057

- *allows_mem = true;

7058

- }

7059

+ insn_extra_constraint_allows_reg_mem (cn, allows_reg, allows_mem);

7060

break;

7061

}

7062

7063

--- a/src/gcc/target.def

7064

+++ b/src/gcc/target.def

7065

@@ -1975,7 +1975,7 @@ merging.",

7066

DEFHOOKPOD

7067

(attribute_table,

7068

"If defined, this target hook points to an array of @samp{struct\n\

7069

-attribute_spec} (defined in @file{tree.h}) specifying the machine\n\

7070

+attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\

7071

specific attributes for this target and some of the restrictions on the\n\

7072

entities to which these attributes are applied and the arguments they\n\

7073

take.",

7074

--- a/src//dev/null

7075

+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr65648.c

7076

@@ -0,0 +1,34 @@

7077

+/* PR target/65648 */

7078

+

7079

+int a = 0, *b = 0, c = 0;

7080

+static int d = 0;

7081

+short e = 1;

7082

+static long long f = 0;

7083

+long long *i = &f;

7084

+unsigned char j = 0;

7085

+

7086

+__attribute__((noinline, noclone)) void

7087

+foo (int x, int *y)

7088

+{

7089

+ asm volatile ("" : : "r" (x), "r" (y) : "memory");

7090

+}

7091

+

7092

+__attribute__((noinline, noclone)) void

7093

+bar (const char *x, long long y)

7094

+{

7095

+ asm volatile ("" : : "r" (x), "r" (&y) : "memory");

7096

+ if (y != 0)

7097

+ __builtin_abort ();

7098

+}

7099

+

7100

+int

7101

+main ()

7102

+{

7103

+ int k = 0;

7104

+ b = &k;

7105

+ j = (!a) - (c <= e);

7106

+ *i = j;

7107

+ foo (a, &k);

7108

+ bar ("", f);

7109

+ return 0;

7110

+}

7111

--- a/src//dev/null

7112

+++ b/src/gcc/testsuite/gcc.dg/loop-8.c

7113

@@ -0,0 +1,24 @@

7114

+/* { dg-do compile } */

7115

+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */

7116

+

7117

+void

7118

+f (int *a, int *b)

7119

+{

7120

+ int i;

7121

+

7122

+ for (i = 0; i < 100; i++)

7123

+ {

7124

+ int d = 42;

7125

+

7126

+ a[i] = d;

7127

+ if (i % 2)

7128

+ d = i;

7129

+ b[i] = d;

7130

+ }

7131

+}

7132

+

7133

+/* Load of 42 is moved out of the loop, introducing a new pseudo register. */

7134

+/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */

7135

+/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */

7136

+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */

7137

+

7138

--- a/src//dev/null

7139

+++ b/src/gcc/testsuite/gcc.dg/loop-9.c

7140

@@ -0,0 +1,16 @@

7141

+/* { dg-do compile } */

7142

+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */

7143

+

7144

+void

7145

+f (double *a)

7146

+{

7147

+ int i;

7148

+ for (i = 0; i < 100; i++)

7149

+ a[i] = 18.4242;

7150

+}

7151

+

7152

+/* Load of x is moved out of the loop. */

7153

+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */

7154

+/* { dg-final { scan-rtl-dump "without introducing a new temporary register" "loop2_invariant" } } */

7155

+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */

7156

+

7157

--- a/src//dev/null

7158

+++ b/src/gcc/testsuite/gcc.dg/loop-invariant.c

7159

@@ -0,0 +1,43 @@

7160

+/* { dg-do compile { target x86_64-*-* } } */

7161

+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */

7162

+/* NOTE: The target list above could be extended to other targets that have

7163

+ conditional moves, but don't have zero registers. */

7164

+

7165

+enum test_type

7166

+{

7167

+ TYPE0,

7168

+ TYPE1

7169

+};

7170

+

7171

+struct type_node

7172

+{

7173

+ enum test_type type;

7174

+};

7175

+

7176

+struct test_ref

7177

+{

7178

+ struct type_node *referring;

7179

+};

7180

+

7181

+struct test_node

7182

+{

7183

+ struct test_node *next;

7184

+};

7185

+

7186

+int iterate (struct test_node *, unsigned, struct test_ref **);

7187

+

7188

+int

7189

+loop_invar (struct test_node *node)

7190

+{

7191

+ struct test_ref *ref;

7192

+

7193

+ for (unsigned i = 0; iterate (node, i, &ref); i++)

7194

+ if (loop_invar ((ref->referring && ref->referring->type == TYPE0)

7195

+ ? ((struct test_node *) (ref->referring)) : 0))

7196

+ return 1;

7197

+

7198

+ return 0;

7199

+}

7200

+

7201

+/* { dg-final { scan-rtl-dump "Decided to move invariant" "loop2_invariant" } } */

7202

+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */

7203

--- a/src//dev/null

7204

+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-1.c

7205

@@ -0,0 +1,6 @@

7206

+/* { dg-do run } */

7207

+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */

7208

+

7209

+#define EXPN (-6 * (0.5*0.5*0.5*0.5))

7210

+

7211

+#include "pow-sqrt.x"

7212

--- a/src//dev/null

7213

+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-2.c

7214

@@ -0,0 +1,5 @@

7215

+/* { dg-do run } */

7216

+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=5" } */

7217

+

7218

+#define EXPN (-5.875)

7219

+#include "pow-sqrt.x"

7220

--- a/src//dev/null

7221

+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt-3.c

7222

@@ -0,0 +1,5 @@

7223

+/* { dg-do run } */

7224

+/* { dg-options "-O2 -ffast-math --param max-pow-sqrt-depth=3" } */

7225

+

7226

+#define EXPN (1.25)

7227

+#include "pow-sqrt.x"

7228

--- a/src//dev/null

7229

+++ b/src/gcc/testsuite/gcc.dg/pow-sqrt.x

7230

@@ -0,0 +1,30 @@

7231

+

7232

+extern void abort (void);

7233

+

7234

+

7235

+__attribute__((noinline)) double

7236

+real_pow (double x, double pow_exp)

7237

+{

7238

+ return __builtin_pow (x, pow_exp);

7239

+}

7240

+

7241

+#define EPS (0.000000000000000000001)

7242

+

7243

+#define SYNTH_POW(X, Y) __builtin_pow (X, Y)

7244

+volatile double arg;

7245

+

7246

+int

7247

+main (void)

7248

+{

7249

+ double i_arg = 0.1;

7250

+

7251

+ for (arg = i_arg; arg < 100.0; arg += 1.0)

7252

+ {

7253

+ double synth_res = SYNTH_POW (arg, EXPN);

7254

+ double real_res = real_pow (arg, EXPN);

7255

+

7256

+ if (__builtin_abs (SYNTH_POW (arg, EXPN) - real_pow (arg, EXPN)) > EPS)

7257

+ abort ();

7258

+ }

7259

+ return 0;

7260

+}

7261

--- a/src//dev/null

7262

+++ b/src/gcc/testsuite/gcc.dg/torture/pr66076.c

7263

@@ -0,0 +1,11 @@

7264

+/* { dg-do compile } */

7265

+/* { dg-options "" } */

7266

+/* { dg-options "-mno-prefer-avx128 -march=bdver4" { target i?86-*-* x86_64-*-* } } */

7267

+

7268

+void

7269

+f0a (char *result, char *arg1, char *arg4, char temp_6)

7270

+{

7271

+ int idx = 0;

7272

+ for (idx = 0; idx < 416; idx += 1)

7273

+ result[idx] = (arg1[idx] + arg4[idx]) * temp_6;

7274

+}

7275

--- a/src//dev/null

7276

+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c

7277

@@ -0,0 +1,54 @@

7278

+/* { dg-do compile } */

7279

+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */

7280

+

7281

+void foo (double *p)

7282

+{

7283

+ int i;

7284

+ for (i = -20000; i < 200000; i+= 40)

7285

+ {

7286

+ p[i+0] = 1.0;

7287

+ p[i+1] = 1.0;

7288

+ p[i+2] = 1.0;

7289

+ p[i+3] = 1.0;

7290

+ p[i+4] = 1.0;

7291

+ p[i+5] = 1.0;

7292

+ p[i+6] = 1.0;

7293

+ p[i+7] = 1.0;

7294

+ p[i+8] = 1.0;

7295

+ p[i+9] = 1.0;

7296

+ p[i+10] = 1.0;

7297

+ p[i+11] = 1.0;

7298

+ p[i+12] = 1.0;

7299

+ p[i+13] = 1.0;

7300

+ p[i+14] = 1.0;

7301

+ p[i+15] = 1.0;

7302

+ p[i+16] = 1.0;

7303

+ p[i+17] = 1.0;

7304

+ p[i+18] = 1.0;

7305

+ p[i+19] = 1.0;

7306

+ p[i+20] = 1.0;

7307

+ p[i+21] = 1.0;

7308

+ p[i+22] = 1.0;

7309

+ p[i+23] = 1.0;

7310

+ p[i+24] = 1.0;

7311

+ p[i+25] = 1.0;

7312

+ p[i+26] = 1.0;

7313

+ p[i+27] = 1.0;

7314

+ p[i+28] = 1.0;

7315

+ p[i+29] = 1.0;

7316

+ p[i+30] = 1.0;

7317

+ p[i+31] = 1.0;

7318

+ p[i+32] = 1.0;

7319

+ p[i+33] = 1.0;

7320

+ p[i+34] = 1.0;

7321

+ p[i+35] = 1.0;

7322

+ p[i+36] = 1.0;

7323

+ p[i+37] = 1.0;

7324

+ p[i+38] = 1.0;

7325

+ p[i+39] = 1.0;

7326

+ }

7327

+}

7328

+

7329

+/* We should groups address type IV uses. */

7330

+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } } */

7331

+/* { dg-final { cleanup-tree-dump "ivopts" } } */

7332

--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c

7333

+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c

7334

@@ -12,6 +12,8 @@

7335

7336

vf2_t vf2 = (vf2_t){ 17.f, 18.f };

7337

vi4_t vi4 = (vi4_t){ 0xdeadbabe, 0xbabecafe, 0xcafebeef, 0xbeefdead };

7338

+vlf1_t vlf1 = (vlf1_t) { 17.0 };

7339

+

7340

union int128_t qword;

7341

7342

int *int_ptr = (int *)0xabcdef0123456789ULL;

7343

@@ -41,4 +43,5 @@ FUNC_VAL_CHECK (11, long double, 98765432123456789.987654321L, Q0, flat)

7344

FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64)

7345

FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128)

7346

FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat)

7347

+FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat)

7348

#endif

7349

--- a/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h

7350

+++ b/src/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h

7351

@@ -10,6 +10,9 @@ typedef float vf4_t __attribute__((vector_size (16)));

7352

/* 128-bit vector of 4 ints. */

7353

typedef int vi4_t __attribute__((vector_size (16)));

7354

7355

+/* 128-bit vector of 1 quad precision float. */

7356

+typedef long double vlf1_t __attribute__((vector_size (16)));

7357

+

7358

/* signed quad-word (in an union for the convenience of initialization). */

7359

union int128_t

7360

{

7361

--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp

7362

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp

7363

@@ -27,14 +27,26 @@ load_lib gcc-dg.exp

7364

7365

# Initialize `dg'.

7366

load_lib c-torture.exp

7367

-load_lib target-supports.exp

7368

-load_lib torture-options.exp

7369

7370

dg-init

7371

7372

-if {[istarget arm*-*-*]

7373

- && ![check_effective_target_arm_neon_ok]} then {

7374

- return

7375

+# The default action for a test is 'run'. Save current default.

7376

+global dg-do-what-default

7377

+set save-dg-do-what-default ${dg-do-what-default}

7378

+

7379

+# For ARM, make sure that we have a target compatible with NEON, and do

7380

+# not attempt to run execution tests if the hardware doesn't support it.

7381

+if {[istarget arm*-*-*]} then {

7382

+ if {![check_effective_target_arm_neon_ok]} then {

7383

+ return

7384

+ }

7385

+ if {![is-effective-target arm_neon_hw]} then {

7386

+ set dg-do-what-default compile

7387

+ } else {

7388

+ set dg-do-what-default run

7389

+ }

7390

+} else {

7391

+ set dg-do-what-default run

7392

}

7393

7394

torture-init

7395

@@ -44,22 +56,10 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS

7396

set additional_flags [add_options_for_arm_neon ""]

7397

7398

# Main loop.

7399

-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {

7400

- # If we're only testing specific files and this isn't one of them, skip it.

7401

- if ![runtest_file_p $runtests $src] then {

7402

- continue

7403

- }

7404

-

7405

- # runtest_file_p is already run above, and the code below can run

7406

- # runtest_file_p again, make sure everything for this test is

7407

- # performed if the above runtest_file_p decided this runtest

7408

- # instance should execute the test

7409

- gcc_parallel_test_enable 0

7410

- c-torture-execute $src $additional_flags

7411

- gcc-dg-runtest $src "" $additional_flags

7412

- gcc_parallel_test_enable 1

7413

-}

7414

+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \

7415

+ "" ${additional_flags}

7416

7417

# All done.

7418

+set dg-do-what-default ${save-dg-do-what-default}

7419

torture-finish

7420

dg-finish

7421

--- a/src//dev/null

7422

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovn.c

7423

@@ -0,0 +1,134 @@

7424

+#include <arm_neon.h>

7425

+#include "arm-neon-ref.h"

7426

+#include "compute-ref-data.h"

7427

+

7428

+/* Expected values of cumulative_saturation flag. */

7429

+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;

7430

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

7431

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

7432

+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;

7433

+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;

7434

+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;

7435

+

7436

+/* Expected results. */

7437

+VECT_VAR_DECL(expected,int,8,8) [] = { 0x12, 0x12, 0x12, 0x12,

7438

+ 0x12, 0x12, 0x12, 0x12 };

7439

+VECT_VAR_DECL(expected,int,16,4) [] = { 0x1278, 0x1278, 0x1278, 0x1278 };

7440

+VECT_VAR_DECL(expected,int,32,2) [] = { 0x12345678, 0x12345678 };

7441

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x82, 0x82, 0x82, 0x82,

7442

+ 0x82, 0x82, 0x82, 0x82 };

7443

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8765, 0x8765, 0x8765, 0x8765 };

7444

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x87654321, 0x87654321 };

7445

+

7446

+/* Expected values of cumulative_saturation flag when saturation occurs. */

7447

+int VECT_VAR(expected_cumulative_sat1,int,8,8) = 1;

7448

+int VECT_VAR(expected_cumulative_sat1,int,16,4) = 1;

7449

+int VECT_VAR(expected_cumulative_sat1,int,32,2) = 1;

7450

+int VECT_VAR(expected_cumulative_sat1,uint,8,8) = 1;

7451

+int VECT_VAR(expected_cumulative_sat1,uint,16,4) = 1;

7452

+int VECT_VAR(expected_cumulative_sat1,uint,32,2) = 1;

7453

+

7454

+/* Expected results when saturation occurs. */

7455

+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

7456

+ 0x7f, 0x7f, 0x7f, 0x7f };

7457

+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7458

+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

7459

+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

7460

+ 0xff, 0xff, 0xff, 0xff };

7461

+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

7462

+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xffffffff, 0xffffffff };

7463

+

7464

+#define INSN_NAME vqmovn

7465

+#define TEST_MSG "VQMOVN"

7466

+

7467

+#define FNNAME1(NAME) void exec_ ## NAME (void)

7468

+#define FNNAME(NAME) FNNAME1(NAME)

7469

+

7470

+FNNAME (INSN_NAME)

7471

+{

7472

+ /* Basic test: y=OP(x), then store the result. */

7473

+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7474

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

7475

+ VECT_VAR(vector_res, T1, W, N) = \

7476

+ INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \

7477

+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \

7478

+ VECT_VAR(vector_res, T1, W, N)); \

7479

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7480

+

7481

+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7482

+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)

7483

+

7484

+ /* No need for 64 bits variants. */

7485

+ DECL_VARIABLE(vector, int, 16, 8);

7486

+ DECL_VARIABLE(vector, int, 32, 4);

7487

+ DECL_VARIABLE(vector, int, 64, 2);

7488

+ DECL_VARIABLE(vector, uint, 16, 8);

7489

+ DECL_VARIABLE(vector, uint, 32, 4);

7490

+ DECL_VARIABLE(vector, uint, 64, 2);

7491

+

7492

+ DECL_VARIABLE(vector_res, int, 8, 8);

7493

+ DECL_VARIABLE(vector_res, int, 16, 4);

7494

+ DECL_VARIABLE(vector_res, int, 32, 2);

7495

+ DECL_VARIABLE(vector_res, uint, 8, 8);

7496

+ DECL_VARIABLE(vector_res, uint, 16, 4);

7497

+ DECL_VARIABLE(vector_res, uint, 32, 2);

7498

+

7499

+ clean_results ();

7500

+

7501

+ /* Fill input vector with arbitrary values. */

7502

+ VDUP(vector, q, int, s, 16, 8, 0x12);

7503

+ VDUP(vector, q, int, s, 32, 4, 0x1278);

7504

+ VDUP(vector, q, int, s, 64, 2, 0x12345678);

7505

+ VDUP(vector, q, uint, u, 16, 8, 0x82);

7506

+ VDUP(vector, q, uint, u, 32, 4, 0x8765);

7507

+ VDUP(vector, q, uint, u, 64, 2, 0x87654321);

7508

+

7509

+ /* Apply a unary operator named INSN_NAME. */

7510

+#define CMT ""

7511

+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat, CMT);

7512

+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat, CMT);

7513

+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat, CMT);

7514

+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);

7515

+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);

7516

+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);

7517

+

7518

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

7519

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

7520

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

7521

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

7522

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

7523

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

7524

+

7525

+

7526

+ /* Fill input vector with arbitrary values which cause cumulative

7527

+ saturation. */

7528

+ VDUP(vector, q, int, s, 16, 8, 0x1234);

7529

+ VDUP(vector, q, int, s, 32, 4, 0x12345678);

7530

+ VDUP(vector, q, int, s, 64, 2, 0x1234567890ABLL);

7531

+ VDUP(vector, q, uint, u, 16, 8, 0x8234);

7532

+ VDUP(vector, q, uint, u, 32, 4, 0x87654321);

7533

+ VDUP(vector, q, uint, u, 64, 2, 0x8765432187654321ULL);

7534

+

7535

+ /* Apply a unary operator named INSN_NAME. */

7536

+#undef CMT

7537

+#define CMT " (with saturation)"

7538

+ TEST_UNARY_OP(INSN_NAME, int, s, 8, 16, 8, expected_cumulative_sat1, CMT);

7539

+ TEST_UNARY_OP(INSN_NAME, int, s, 16, 32, 4, expected_cumulative_sat1, CMT);

7540

+ TEST_UNARY_OP(INSN_NAME, int, s, 32, 64, 2, expected_cumulative_sat1, CMT);

7541

+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat1, CMT);

7542

+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat1, CMT);

7543

+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat1, CMT);

7544

+

7545

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected1, CMT);

7546

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected1, CMT);

7547

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected1, CMT);

7548

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected1, CMT);

7549

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected1, CMT);

7550

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected1, CMT);

7551

+}

7552

+

7553

+int main (void)

7554

+{

7555

+ exec_vqmovn ();

7556

+ return 0;

7557

+}

7558

--- a/src//dev/null

7559

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqmovun.c

7560

@@ -0,0 +1,93 @@

7561

+#include <arm_neon.h>

7562

+#include "arm-neon-ref.h"

7563

+#include "compute-ref-data.h"

7564

+

7565

+/* Expected values of cumulative_saturation flag. */

7566

+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;

7567

+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;

7568

+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;

7569

+

7570

+/* Expected results. */

7571

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x34, 0x34, 0x34, 0x34,

7572

+ 0x34, 0x34, 0x34, 0x34 };

7573

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x5678, 0x5678, 0x5678, 0x5678 };

7574

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x12345678, 0x12345678 };

7575

+

7576

+/* Expected values of cumulative_saturation flag with negative input. */

7577

+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 1;

7578

+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 1;

7579

+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 1;

7580

+

7581

+/* Expected results with negative input. */

7582

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

7583

+ 0x0, 0x0, 0x0, 0x0 };

7584

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

7585

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };

7586

+

7587

+#define INSN_NAME vqmovun

7588

+#define TEST_MSG "VQMOVUN"

7589

+

7590

+#define FNNAME1(NAME) void exec_ ## NAME (void)

7591

+#define FNNAME(NAME) FNNAME1(NAME)

7592

+

7593

+FNNAME (INSN_NAME)

7594

+{

7595

+ /* Basic test: y=OP(x), then store the result. */

7596

+#define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7597

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

7598

+ VECT_VAR(vector_res, T1, W, N) = \

7599

+ INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \

7600

+ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \

7601

+ VECT_VAR(vector_res, T1, W, N)); \

7602

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7603

+

7604

+#define TEST_UNARY_OP(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7605

+ TEST_UNARY_OP1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)

7606

+

7607

+ DECL_VARIABLE(vector, int, 16, 8);

7608

+ DECL_VARIABLE(vector, int, 32, 4);

7609

+ DECL_VARIABLE(vector, int, 64, 2);

7610

+

7611

+ DECL_VARIABLE(vector_res, uint, 8, 8);

7612

+ DECL_VARIABLE(vector_res, uint, 16, 4);

7613

+ DECL_VARIABLE(vector_res, uint, 32, 2);

7614

+

7615

+ clean_results ();

7616

+

7617

+ /* Fill input vector with arbitrary values. */

7618

+ VDUP(vector, q, int, s, 16, 8, 0x34);

7619

+ VDUP(vector, q, int, s, 32, 4, 0x5678);

7620

+ VDUP(vector, q, int, s, 64, 2, 0x12345678);

7621

+

7622

+ /* Apply a unary operator named INSN_NAME. */

7623

+#define CMT ""

7624

+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat, CMT);

7625

+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat, CMT);

7626

+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat, CMT);

7627

+

7628

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

7629

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

7630

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

7631

+

7632

+ /* Fill input vector with negative values. */

7633

+ VDUP(vector, q, int, s, 16, 8, 0x8234);

7634

+ VDUP(vector, q, int, s, 32, 4, 0x87654321);

7635

+ VDUP(vector, q, int, s, 64, 2, 0x8765432187654321LL);

7636

+

7637

+ /* Apply a unary operator named INSN_NAME. */

7638

+#undef CMT

7639

+#define CMT " (negative input)"

7640

+ TEST_UNARY_OP(INSN_NAME, uint, u, 8, 16, 8, expected_cumulative_sat_neg, CMT);

7641

+ TEST_UNARY_OP(INSN_NAME, uint, u, 16, 32, 4, expected_cumulative_sat_neg, CMT);

7642

+ TEST_UNARY_OP(INSN_NAME, uint, u, 32, 64, 2, expected_cumulative_sat_neg, CMT);

7643

+

7644

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

7645

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

7646

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

7647

+}

7648

+

7649

+int main (void)

7650

+{

7651

+ exec_vqmovun ();

7652

+ return 0;

7653

+}

7654

--- a/src//dev/null

7655

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh.c

7656

@@ -0,0 +1,161 @@

7657

+#include <arm_neon.h>

7658

+#include "arm-neon-ref.h"

7659

+#include "compute-ref-data.h"

7660

+

7661

+/* Expected values of cumulative_saturation flag. */

7662

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

7663

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

7664

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

7665

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

7666

+

7667

+/* Expected results. */

7668

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff5, 0xfff6, 0xfff7, 0xfff7 };

7669

+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };

7670

+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };

7671

+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

7672

+

7673

+/* Expected values of cumulative_saturation flag when multiplication

7674

+ saturates. */

7675

+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;

7676

+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;

7677

+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;

7678

+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;

7679

+

7680

+/* Expected results when multiplication saturates. */

7681

+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7682

+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

7683

+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

7684

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7685

+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

7686

+ 0x7fffffff, 0x7fffffff };

7687

+

7688

+/* Expected values of cumulative_saturation flag when rounding

7689

+ should not cause saturation. */

7690

+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;

7691

+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;

7692

+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;

7693

+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;

7694

+

7695

+/* Expected results when rounding should not cause saturation. */

7696

+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7697

+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

7698

+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

7699

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7700

+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

7701

+ 0x7fffffff, 0x7fffffff };

7702

+

7703

+#define INSN vqrdmulh

7704

+#define TEST_MSG "VQRDMULH"

7705

+

7706

+#define FNNAME1(NAME) void exec_ ## NAME (void)

7707

+#define FNNAME(NAME) FNNAME1(NAME)

7708

+

7709

+FNNAME (INSN)

7710

+{

7711

+ /* vector_res = vqrdmulh(vector,vector2), then store the result. */

7712

+#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7713

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

7714

+ VECT_VAR(vector_res, T1, W, N) = \

7715

+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \

7716

+ VECT_VAR(vector2, T1, W, N)); \

7717

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

7718

+ VECT_VAR(vector_res, T1, W, N)); \

7719

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7720

+

7721

+ /* Two auxliary macros are necessary to expand INSN */

7722

+#define TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7723

+ TEST_VQRDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7724

+

7725

+#define TEST_VQRDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

7726

+ TEST_VQRDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7727

+

7728

+

7729

+ DECL_VARIABLE(vector, int, 16, 4);

7730

+ DECL_VARIABLE(vector, int, 32, 2);

7731

+ DECL_VARIABLE(vector, int, 16, 8);

7732

+ DECL_VARIABLE(vector, int, 32, 4);

7733

+

7734

+ DECL_VARIABLE(vector_res, int, 16, 4);

7735

+ DECL_VARIABLE(vector_res, int, 32, 2);

7736

+ DECL_VARIABLE(vector_res, int, 16, 8);

7737

+ DECL_VARIABLE(vector_res, int, 32, 4);

7738

+

7739

+ DECL_VARIABLE(vector2, int, 16, 4);

7740

+ DECL_VARIABLE(vector2, int, 32, 2);

7741

+ DECL_VARIABLE(vector2, int, 16, 8);

7742

+ DECL_VARIABLE(vector2, int, 32, 4);

7743

+

7744

+ clean_results ();

7745

+

7746

+ VLOAD(vector, buffer, , int, s, 16, 4);

7747

+ VLOAD(vector, buffer, , int, s, 32, 2);

7748

+ VLOAD(vector, buffer, q, int, s, 16, 8);

7749

+ VLOAD(vector, buffer, q, int, s, 32, 4);

7750

+

7751

+ /* Initialize vector2. */

7752

+ VDUP(vector2, , int, s, 16, 4, 0x5555);

7753

+ VDUP(vector2, , int, s, 32, 2, 0xBB);

7754

+ VDUP(vector2, q, int, s, 16, 8, 0x33);

7755

+ VDUP(vector2, q, int, s, 32, 4, 0x22);

7756

+

7757

+#define CMT ""

7758

+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat, CMT);

7759

+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat, CMT);

7760

+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat, CMT);

7761

+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat, CMT);

7762

+

7763

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

7764

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

7765

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

7766

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

7767

+

7768

+ /* Now use input values such that the multiplication causes

7769

+ saturation. */

7770

+#define TEST_MSG_MUL " (check mul cumulative saturation)"

7771

+ VDUP(vector, , int, s, 16, 4, 0x8000);

7772

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

7773

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

7774

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

7775

+ VDUP(vector2, , int, s, 16, 4, 0x8000);

7776

+ VDUP(vector2, , int, s, 32, 2, 0x80000000);

7777

+ VDUP(vector2, q, int, s, 16, 8, 0x8000);

7778

+ VDUP(vector2, q, int, s, 32, 4, 0x80000000);

7779

+

7780

+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);

7781

+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);

7782

+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);

7783

+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);

7784

+

7785

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);

7786

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);

7787

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);

7788

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);

7789

+

7790

+ /* Use input values where rounding produces a result equal to the

7791

+ saturation value, but does not set the saturation flag. */

7792

+#define TEST_MSG_ROUND " (check rounding)"

7793

+ VDUP(vector, , int, s, 16, 4, 0x8000);

7794

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

7795

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

7796

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

7797

+ VDUP(vector2, , int, s, 16, 4, 0x8001);

7798

+ VDUP(vector2, , int, s, 32, 2, 0x80000001);

7799

+ VDUP(vector2, q, int, s, 16, 8, 0x8001);

7800

+ VDUP(vector2, q, int, s, 32, 4, 0x80000001);

7801

+

7802

+ TEST_VQRDMULH(, int, s, 16, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);

7803

+ TEST_VQRDMULH(, int, s, 32, 2, expected_cumulative_sat_round, TEST_MSG_ROUND);

7804

+ TEST_VQRDMULH(q, int, s, 16, 8, expected_cumulative_sat_round, TEST_MSG_ROUND);

7805

+ TEST_VQRDMULH(q, int, s, 32, 4, expected_cumulative_sat_round, TEST_MSG_ROUND);

7806

+

7807

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);

7808

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);

7809

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);

7810

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);

7811

+}

7812

+

7813

+int main (void)

7814

+{

7815

+ exec_vqrdmulh ();

7816

+ return 0;

7817

+}

7818

--- a/src//dev/null

7819

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_lane.c

7820

@@ -0,0 +1,169 @@

7821

+#include <arm_neon.h>

7822

+#include "arm-neon-ref.h"

7823

+#include "compute-ref-data.h"

7824

+

7825

+/* Expected values of cumulative_saturation flag. */

7826

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

7827

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

7828

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

7829

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

7830

+

7831

+/* Expected results. */

7832

+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

7833

+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };

7834

+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

7835

+ 0x0, 0x0, 0x0, 0x0 };

7836

+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

7837

+

7838

+/* Expected values of cumulative_saturation flag when multiplication

7839

+ saturates. */

7840

+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;

7841

+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;

7842

+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;

7843

+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;

7844

+

7845

+/* Expected results when multiplication saturates. */

7846

+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7847

+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

7848

+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

7849

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7850

+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

7851

+ 0x7fffffff, 0x7fffffff };

7852

+

7853

+/* Expected values of cumulative_saturation flag when rounding

7854

+ should not cause saturation. */

7855

+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;

7856

+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;

7857

+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;

7858

+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;

7859

+

7860

+/* Expected results when rounding should not cause saturation. */

7861

+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7862

+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

7863

+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

7864

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

7865

+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

7866

+ 0x7fffffff, 0x7fffffff };

7867

+

7868

+#define INSN vqrdmulh

7869

+#define TEST_MSG "VQRDMULH_LANE"

7870

+

7871

+#define FNNAME1(NAME) void exec_ ## NAME ## _lane (void)

7872

+#define FNNAME(NAME) FNNAME1(NAME)

7873

+

7874

+FNNAME (INSN)

7875

+{

7876

+ /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */

7877

+#define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \

7878

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

7879

+ VECT_VAR(vector_res, T1, W, N) = \

7880

+ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \

7881

+ VECT_VAR(vector2, T1, W, N2), \

7882

+ L); \

7883

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

7884

+ VECT_VAR(vector_res, T1, W, N)); \

7885

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

7886

+

7887

+ /* Two auxliary macros are necessary to expand INSN */

7888

+#define TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \

7889

+ TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)

7890

+

7891

+#define TEST_VQRDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \

7892

+ TEST_VQRDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)

7893

+

7894

+

7895

+ DECL_VARIABLE(vector, int, 16, 4);

7896

+ DECL_VARIABLE(vector, int, 32, 2);

7897

+ DECL_VARIABLE(vector, int, 16, 8);

7898

+ DECL_VARIABLE(vector, int, 32, 4);

7899

+

7900

+ DECL_VARIABLE(vector_res, int, 16, 4);

7901

+ DECL_VARIABLE(vector_res, int, 32, 2);

7902

+ DECL_VARIABLE(vector_res, int, 16, 8);

7903

+ DECL_VARIABLE(vector_res, int, 32, 4);

7904

+

7905

+ /* vector2: vqrdmulh_lane and vqrdmulhq_lane have a 2nd argument with

7906

+ the same number of elements, so we need only one variable of each

7907

+ type. */

7908

+ DECL_VARIABLE(vector2, int, 16, 4);

7909

+ DECL_VARIABLE(vector2, int, 32, 2);

7910

+

7911

+ clean_results ();

7912

+

7913

+ VLOAD(vector, buffer, , int, s, 16, 4);

7914

+ VLOAD(vector, buffer, , int, s, 32, 2);

7915

+

7916

+ VLOAD(vector, buffer, q, int, s, 16, 8);

7917

+ VLOAD(vector, buffer, q, int, s, 32, 4);

7918

+

7919

+ /* Initialize vector2. */

7920

+ VDUP(vector2, , int, s, 16, 4, 0x55);

7921

+ VDUP(vector2, , int, s, 32, 2, 0xBB);

7922

+

7923

+ /* Choose lane arbitrarily. */

7924

+#define CMT ""

7925

+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);

7926

+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);

7927

+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);

7928

+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);

7929

+

7930

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

7931

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

7932

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

7933

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

7934

+

7935

+ /* Now use input values such that the multiplication causes

7936

+ saturation. */

7937

+#define TEST_MSG_MUL " (check mul cumulative saturation)"

7938

+ VDUP(vector, , int, s, 16, 4, 0x8000);

7939

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

7940

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

7941

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

7942

+ VDUP(vector2, , int, s, 16, 4, 0x8000);

7943

+ VDUP(vector2, , int, s, 32, 2, 0x80000000);

7944

+

7945

+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,

7946

+ TEST_MSG_MUL);

7947

+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,

7948

+ TEST_MSG_MUL);

7949

+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,

7950

+ TEST_MSG_MUL);

7951

+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,

7952

+ TEST_MSG_MUL);

7953

+

7954

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);

7955

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);

7956

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);

7957

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);

7958

+

7959

+ VDUP(vector, , int, s, 16, 4, 0x8000);

7960

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

7961

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

7962

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

7963

+ VDUP(vector2, , int, s, 16, 4, 0x8001);

7964

+ VDUP(vector2, , int, s, 32, 2, 0x80000001);

7965

+

7966

+ /* Use input values where rounding produces a result equal to the

7967

+ saturation value, but does not set the saturation flag. */

7968

+#define TEST_MSG_ROUND " (check rounding)"

7969

+ TEST_VQRDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,

7970

+ TEST_MSG_ROUND);

7971

+ TEST_VQRDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,

7972

+ TEST_MSG_ROUND);

7973

+ TEST_VQRDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,

7974

+ TEST_MSG_ROUND);

7975

+ TEST_VQRDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,

7976

+ TEST_MSG_ROUND);

7977

+

7978

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);

7979

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);

7980

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);

7981

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);

7982

+}

7983

+

7984

+int main (void)

7985

+{

7986

+ exec_vqrdmulh_lane ();

7987

+ return 0;

7988

+}

7989

+

7990

--- a/src//dev/null

7991

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmulh_n.c

7992

@@ -0,0 +1,155 @@

7993

+#include <arm_neon.h>

7994

+#include "arm-neon-ref.h"

7995

+#include "compute-ref-data.h"

7996

+

7997

+/* Expected values of cumulative_saturation flag. */

7998

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

7999

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

8000

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

8001

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

8002

+

8003

+/* Expected results. */

8004

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffd };

8005

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffe, 0xfffffffe };

8006

+VECT_VAR_DECL(expected,int,16,8) [] = { 0x6, 0x6, 0x6, 0x5,

8007

+ 0x5, 0x4, 0x4, 0x4 };

8008

+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xfffffffe,

8009

+ 0xfffffffe, 0xfffffffe };

8010

+

8011

+/* Expected values of cumulative_saturation flag when multiplication

8012

+ saturates. */

8013

+int VECT_VAR(expected_cumulative_sat_mul,int,16,4) = 1;

8014

+int VECT_VAR(expected_cumulative_sat_mul,int,32,2) = 1;

8015

+int VECT_VAR(expected_cumulative_sat_mul,int,16,8) = 1;

8016

+int VECT_VAR(expected_cumulative_sat_mul,int,32,4) = 1;

8017

+

8018

+/* Expected results when multiplication saturates. */

8019

+VECT_VAR_DECL(expected_mul,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

8020

+VECT_VAR_DECL(expected_mul,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

8021

+VECT_VAR_DECL(expected_mul,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

8022

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

8023

+VECT_VAR_DECL(expected_mul,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

8024

+ 0x7fffffff, 0x7fffffff };

8025

+

8026

+/* Expected values of cumulative_saturation flag when rounding

8027

+ should not cause saturation. */

8028

+int VECT_VAR(expected_cumulative_sat_round,int,16,4) = 0;

8029

+int VECT_VAR(expected_cumulative_sat_round,int,32,2) = 0;

8030

+int VECT_VAR(expected_cumulative_sat_round,int,16,8) = 0;

8031

+int VECT_VAR(expected_cumulative_sat_round,int,32,4) = 0;

8032

+

8033

+/* Expected results when rounding should not cause saturation. */

8034

+VECT_VAR_DECL(expected_round,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

8035

+VECT_VAR_DECL(expected_round,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

8036

+VECT_VAR_DECL(expected_round,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

8037

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

8038

+VECT_VAR_DECL(expected_round,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

8039

+ 0x7fffffff, 0x7fffffff };

8040

+

8041

+#define INSN vqrdmulh

8042

+#define TEST_MSG "VQRDMULH_N"

8043

+

8044

+#define FNNAME1(NAME) void exec_ ## NAME ## _n (void)

8045

+#define FNNAME(NAME) FNNAME1(NAME)

8046

+

8047

+FNNAME (INSN)

8048

+{

8049

+ int i;

8050

+

8051

+ /* vector_res = vqrdmulh_n(vector,val), then store the result. */

8052

+#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \

8053

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

8054

+ VECT_VAR(vector_res, T1, W, N) = \

8055

+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \

8056

+ L); \

8057

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

8058

+ VECT_VAR(vector_res, T1, W, N)); \

8059

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

8060

+

8061

+ /* Two auxliary macros are necessary to expand INSN */

8062

+#define TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \

8063

+ TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)

8064

+

8065

+#define TEST_VQRDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \

8066

+ TEST_VQRDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)

8067

+

8068

+

8069

+ DECL_VARIABLE(vector, int, 16, 4);

8070

+ DECL_VARIABLE(vector, int, 32, 2);

8071

+ DECL_VARIABLE(vector, int, 16, 8);

8072

+ DECL_VARIABLE(vector, int, 32, 4);

8073

+

8074

+ DECL_VARIABLE(vector_res, int, 16, 4);

8075

+ DECL_VARIABLE(vector_res, int, 32, 2);

8076

+ DECL_VARIABLE(vector_res, int, 16, 8);

8077

+ DECL_VARIABLE(vector_res, int, 32, 4);

8078

+

8079

+ clean_results ();

8080

+

8081

+ VLOAD(vector, buffer, , int, s, 16, 4);

8082

+ VLOAD(vector, buffer, , int, s, 32, 2);

8083

+ VLOAD(vector, buffer, q, int, s, 16, 8);

8084

+ VLOAD(vector, buffer, q, int, s, 32, 4);

8085

+

8086

+ /* Choose multiplier arbitrarily. */

8087

+#define CMT ""

8088

+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x2233, expected_cumulative_sat, CMT);

8089

+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x12345678, expected_cumulative_sat, CMT);

8090

+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0xCD12, expected_cumulative_sat, CMT);

8091

+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0xFA23456, expected_cumulative_sat, CMT);

8092

+

8093

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

8094

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

8095

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

8096

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

8097

+

8098

+ /* Now use input values such that the multiplication causes

8099

+ saturation. */

8100

+#define TEST_MSG_MUL " (check mul cumulative saturation)"

8101

+ VDUP(vector, , int, s, 16, 4, 0x8000);

8102

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

8103

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

8104

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

8105

+

8106

+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat_mul,

8107

+ TEST_MSG_MUL);

8108

+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat_mul,

8109

+ TEST_MSG_MUL);

8110

+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat_mul,

8111

+ TEST_MSG_MUL);

8112

+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat_mul,

8113

+ TEST_MSG_MUL);

8114

+

8115

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);

8116

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);

8117

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);

8118

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);

8119

+

8120

+ /* Use input values where rounding produces a result equal to the

8121

+ saturation value, but does not set the saturation flag. */

8122

+#define TEST_MSG_ROUND " (check rounding)"

8123

+ VDUP(vector, , int, s, 16, 4, 0x8000);

8124

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

8125

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

8126

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

8127

+

8128

+ TEST_VQRDMULH_N(, int, s, 16, 4, 0x8001, expected_cumulative_sat_round,

8129

+ TEST_MSG_ROUND);

8130

+ TEST_VQRDMULH_N(, int, s, 32, 2, 0x80000001, expected_cumulative_sat_round,

8131

+ TEST_MSG_ROUND);

8132

+ TEST_VQRDMULH_N(q, int, s, 16, 8, 0x8001, expected_cumulative_sat_round,

8133

+ TEST_MSG_ROUND);

8134

+ TEST_VQRDMULH_N(q, int, s, 32, 4, 0x80000001, expected_cumulative_sat_round,

8135

+ TEST_MSG_ROUND);

8136

+

8137

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);

8138

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);

8139

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);

8140

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);

8141

+}

8142

+

8143

+int main (void)

8144

+{

8145

+ exec_vqrdmulh_n ();

8146

+ return 0;

8147

+}

8148

--- a/src//dev/null

8149

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshl.c

8150

@@ -0,0 +1,1090 @@

8151

+#include <arm_neon.h>

8152

+#include "arm-neon-ref.h"

8153

+#include "compute-ref-data.h"

8154

+

8155

+/* Expected values of cumulative_saturation flag with input=0. */

8156

+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;

8157

+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;

8158

+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;

8159

+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;

8160

+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;

8161

+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;

8162

+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;

8163

+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;

8164

+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;

8165

+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;

8166

+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;

8167

+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;

8168

+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;

8169

+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;

8170

+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;

8171

+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;

8172

+

8173

+/* Expected results with input=0. */

8174

+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8175

+ 0x0, 0x0, 0x0, 0x0 };

8176

+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8177

+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };

8178

+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };

8179

+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8180

+ 0x0, 0x0, 0x0, 0x0 };

8181

+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8182

+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };

8183

+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };

8184

+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8185

+ 0x0, 0x0, 0x0, 0x0,

8186

+ 0x0, 0x0, 0x0, 0x0,

8187

+ 0x0, 0x0, 0x0, 0x0 };

8188

+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8189

+ 0x0, 0x0, 0x0, 0x0 };

8190

+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8191

+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };

8192

+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8193

+ 0x0, 0x0, 0x0, 0x0,

8194

+ 0x0, 0x0, 0x0, 0x0,

8195

+ 0x0, 0x0, 0x0, 0x0 };

8196

+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8197

+ 0x0, 0x0, 0x0, 0x0 };

8198

+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8199

+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };

8200

+

8201

+/* Expected values of cumulative_saturation flag with input=0 and

8202

+ negative shift amount. */

8203

+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;

8204

+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;

8205

+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;

8206

+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;

8207

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;

8208

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;

8209

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;

8210

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;

8211

+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;

8212

+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;

8213

+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;

8214

+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;

8215

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;

8216

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;

8217

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;

8218

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;

8219

+

8220

+/* Expected results with input=0 and negative shift amount. */

8221

+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8222

+ 0x0, 0x0, 0x0, 0x0 };

8223

+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8224

+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };

8225

+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };

8226

+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8227

+ 0x0, 0x0, 0x0, 0x0 };

8228

+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8229

+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };

8230

+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };

8231

+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8232

+ 0x0, 0x0, 0x0, 0x0,

8233

+ 0x0, 0x0, 0x0, 0x0,

8234

+ 0x0, 0x0, 0x0, 0x0 };

8235

+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8236

+ 0x0, 0x0, 0x0, 0x0 };

8237

+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8238

+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };

8239

+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8240

+ 0x0, 0x0, 0x0, 0x0,

8241

+ 0x0, 0x0, 0x0, 0x0,

8242

+ 0x0, 0x0, 0x0, 0x0 };

8243

+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8244

+ 0x0, 0x0, 0x0, 0x0 };

8245

+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8246

+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };

8247

+

8248

+/* Expected values of cumulative_saturation flag. */

8249

+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;

8250

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

8251

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

8252

+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;

8253

+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;

8254

+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;

8255

+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;

8256

+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;

8257

+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;

8258

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;

8259

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;

8260

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;

8261

+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;

8262

+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;

8263

+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;

8264

+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;

8265

+

8266

+/* Expected results. */

8267

+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,

8268

+ 0xe8, 0xea, 0xec, 0xee };

8269

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };

8270

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };

8271

+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };

8272

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

8273

+ 0xff, 0xff, 0xff, 0xff };

8274

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

8275

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };

8276

+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };

8277

+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,

8278

+ 0x80, 0x80, 0x80, 0x80,

8279

+ 0x80, 0x80, 0x80, 0x80,

8280

+ 0x80, 0x80, 0x80, 0x80 };

8281

+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,

8282

+ 0x8000, 0x8000, 0x8000, 0x8000 };

8283

+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,

8284

+ 0x80000000, 0x80000000 };

8285

+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000, 0x8000000000000000 };

8286

+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

8287

+ 0xff, 0xff, 0xff, 0xff,

8288

+ 0xff, 0xff, 0xff, 0xff,

8289

+ 0xff, 0xff, 0xff, 0xff };

8290

+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

8291

+ 0xffff, 0xffff, 0xffff, 0xffff };

8292

+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,

8293

+ 0xffffffff, 0xffffffff };

8294

+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,

8295

+ 0xffffffffffffffff };

8296

+

8297

+/* Expected values of cumulative_saturation flag with negative shift

8298

+ amount. */

8299

+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;

8300

+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;

8301

+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;

8302

+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;

8303

+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;

8304

+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;

8305

+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;

8306

+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;

8307

+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;

8308

+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;

8309

+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;

8310

+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;

8311

+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;

8312

+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;

8313

+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;

8314

+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;

8315

+

8316

+/* Expected results with negative shift amount. */

8317

+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xfc, 0xfc, 0xfd, 0xfd,

8318

+ 0xfd, 0xfd, 0xfe, 0xfe };

8319

+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffd, 0xfffd };

8320

+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };

8321

+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };

8322

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x3c, 0x3c, 0x3d, 0x3d,

8323

+ 0x3d, 0x3d, 0x3e, 0x3e };

8324

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffd, 0x3ffd };

8325

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };

8326

+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };

8327

+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8328

+ 0x0, 0x0, 0x0, 0x0,

8329

+ 0x0, 0x0, 0x0, 0x0,

8330

+ 0x0, 0x0, 0x0, 0x0 };

8331

+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8332

+ 0x0, 0x0, 0x0, 0x0 };

8333

+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8334

+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0x0, 0x0 };

8335

+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x2, 0x2, 0x2, 0x2,

8336

+ 0x2, 0x2, 0x2, 0x2,

8337

+ 0x2, 0x2, 0x2, 0x2,

8338

+ 0x2, 0x2, 0x2, 0x2 };

8339

+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x20, 0x20, 0x20, 0x20,

8340

+ 0x20, 0x20, 0x20, 0x20 };

8341

+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x80000, 0x80000,

8342

+ 0x80000, 0x80000 };

8343

+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x100000000000, 0x100000000000 };

8344

+

8345

+/* Expected values of cumulative_saturation flag with input=max and

8346

+ shift by -1. */

8347

+int VECT_VAR(expected_cumulative_sat_minus1,int,8,8) = 0;

8348

+int VECT_VAR(expected_cumulative_sat_minus1,int,16,4) = 0;

8349

+int VECT_VAR(expected_cumulative_sat_minus1,int,32,2) = 0;

8350

+int VECT_VAR(expected_cumulative_sat_minus1,int,64,1) = 0;

8351

+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,8) = 0;

8352

+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,4) = 0;

8353

+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,2) = 0;

8354

+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,1) = 0;

8355

+int VECT_VAR(expected_cumulative_sat_minus1,int,8,16) = 0;

8356

+int VECT_VAR(expected_cumulative_sat_minus1,int,16,8) = 0;

8357

+int VECT_VAR(expected_cumulative_sat_minus1,int,32,4) = 0;

8358

+int VECT_VAR(expected_cumulative_sat_minus1,int,64,2) = 0;

8359

+int VECT_VAR(expected_cumulative_sat_minus1,uint,8,16) = 0;

8360

+int VECT_VAR(expected_cumulative_sat_minus1,uint,16,8) = 0;

8361

+int VECT_VAR(expected_cumulative_sat_minus1,uint,32,4) = 0;

8362

+int VECT_VAR(expected_cumulative_sat_minus1,uint,64,2) = 0;

8363

+

8364

+/* Expected results with input=max and shift by -1. */

8365

+VECT_VAR_DECL(expected_minus1,int,8,8) [] = { 0x40, 0x40, 0x40, 0x40,

8366

+ 0x40, 0x40, 0x40, 0x40 };

8367

+VECT_VAR_DECL(expected_minus1,int,16,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };

8368

+VECT_VAR_DECL(expected_minus1,int,32,2) [] = { 0x40000000, 0x40000000 };

8369

+VECT_VAR_DECL(expected_minus1,int,64,1) [] = { 0x4000000000000000 };

8370

+VECT_VAR_DECL(expected_minus1,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,

8371

+ 0x80, 0x80, 0x80, 0x80 };

8372

+VECT_VAR_DECL(expected_minus1,uint,16,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };

8373

+VECT_VAR_DECL(expected_minus1,uint,32,2) [] = { 0x80000000, 0x80000000 };

8374

+VECT_VAR_DECL(expected_minus1,uint,64,1) [] = { 0x8000000000000000 };

8375

+VECT_VAR_DECL(expected_minus1,int,8,16) [] = { 0x40, 0x40, 0x40, 0x40,

8376

+ 0x40, 0x40, 0x40, 0x40,

8377

+ 0x40, 0x40, 0x40, 0x40,

8378

+ 0x40, 0x40, 0x40, 0x40 };

8379

+VECT_VAR_DECL(expected_minus1,int,16,8) [] = { 0x4000, 0x4000, 0x4000, 0x4000,

8380

+ 0x4000, 0x4000, 0x4000, 0x4000 };

8381

+VECT_VAR_DECL(expected_minus1,int,32,4) [] = { 0x40000000, 0x40000000,

8382

+ 0x40000000, 0x40000000 };

8383

+VECT_VAR_DECL(expected_minus1,int,64,2) [] = { 0x4000000000000000,

8384

+ 0x4000000000000000 };

8385

+VECT_VAR_DECL(expected_minus1,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,

8386

+ 0x80, 0x80, 0x80, 0x80,

8387

+ 0x80, 0x80, 0x80, 0x80,

8388

+ 0x80, 0x80, 0x80, 0x80 };

8389

+VECT_VAR_DECL(expected_minus1,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,

8390

+ 0x8000, 0x8000, 0x8000, 0x8000 };

8391

+VECT_VAR_DECL(expected_minus1,uint,32,4) [] = { 0x80000000, 0x80000000,

8392

+ 0x80000000, 0x80000000 };

8393

+VECT_VAR_DECL(expected_minus1,uint,64,2) [] = { 0x8000000000000000,

8394

+ 0x8000000000000000 };

8395

+

8396

+/* Expected values of cumulative_saturation flag with input=max and

8397

+ shift by -3. */

8398

+int VECT_VAR(expected_cumulative_sat_minus3,int,8,8) = 0;

8399

+int VECT_VAR(expected_cumulative_sat_minus3,int,16,4) = 0;

8400

+int VECT_VAR(expected_cumulative_sat_minus3,int,32,2) = 0;

8401

+int VECT_VAR(expected_cumulative_sat_minus3,int,64,1) = 0;

8402

+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,8) = 0;

8403

+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,4) = 0;

8404

+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,2) = 0;

8405

+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,1) = 0;

8406

+int VECT_VAR(expected_cumulative_sat_minus3,int,8,16) = 0;

8407

+int VECT_VAR(expected_cumulative_sat_minus3,int,16,8) = 0;

8408

+int VECT_VAR(expected_cumulative_sat_minus3,int,32,4) = 0;

8409

+int VECT_VAR(expected_cumulative_sat_minus3,int,64,2) = 0;

8410

+int VECT_VAR(expected_cumulative_sat_minus3,uint,8,16) = 0;

8411

+int VECT_VAR(expected_cumulative_sat_minus3,uint,16,8) = 0;

8412

+int VECT_VAR(expected_cumulative_sat_minus3,uint,32,4) = 0;

8413

+int VECT_VAR(expected_cumulative_sat_minus3,uint,64,2) = 0;

8414

+

8415

+/* Expected results with input=max and shift by -3. */

8416

+VECT_VAR_DECL(expected_minus3,int,8,8) [] = { 0x10, 0x10, 0x10, 0x10,

8417

+ 0x10, 0x10, 0x10, 0x10 };

8418

+VECT_VAR_DECL(expected_minus3,int,16,4) [] = { 0x1000, 0x1000, 0x1000, 0x1000 };

8419

+VECT_VAR_DECL(expected_minus3,int,32,2) [] = { 0x10000000, 0x10000000 };

8420

+VECT_VAR_DECL(expected_minus3,int,64,1) [] = { 0x1000000000000000 };

8421

+VECT_VAR_DECL(expected_minus3,uint,8,8) [] = { 0x20, 0x20, 0x20, 0x20,

8422

+ 0x20, 0x20, 0x20, 0x20 };

8423

+VECT_VAR_DECL(expected_minus3,uint,16,4) [] = { 0x2000, 0x2000, 0x2000, 0x2000 };

8424

+VECT_VAR_DECL(expected_minus3,uint,32,2) [] = { 0x20000000, 0x20000000 };

8425

+VECT_VAR_DECL(expected_minus3,uint,64,1) [] = { 0x2000000000000000 };

8426

+VECT_VAR_DECL(expected_minus3,int,8,16) [] = { 0x10, 0x10, 0x10, 0x10,

8427

+ 0x10, 0x10, 0x10, 0x10,

8428

+ 0x10, 0x10, 0x10, 0x10,

8429

+ 0x10, 0x10, 0x10, 0x10 };

8430

+VECT_VAR_DECL(expected_minus3,int,16,8) [] = { 0x1000, 0x1000, 0x1000, 0x1000,

8431

+ 0x1000, 0x1000, 0x1000, 0x1000 };

8432

+VECT_VAR_DECL(expected_minus3,int,32,4) [] = { 0x10000000, 0x10000000,

8433

+ 0x10000000, 0x10000000 };

8434

+VECT_VAR_DECL(expected_minus3,int,64,2) [] = { 0x1000000000000000,

8435

+ 0x1000000000000000 };

8436

+VECT_VAR_DECL(expected_minus3,uint,8,16) [] = { 0x20, 0x20, 0x20, 0x20,

8437

+ 0x20, 0x20, 0x20, 0x20,

8438

+ 0x20, 0x20, 0x20, 0x20,

8439

+ 0x20, 0x20, 0x20, 0x20 };

8440

+VECT_VAR_DECL(expected_minus3,uint,16,8) [] = { 0x2000, 0x2000, 0x2000, 0x2000,

8441

+ 0x2000, 0x2000, 0x2000, 0x2000 };

8442

+VECT_VAR_DECL(expected_minus3,uint,32,4) [] = { 0x20000000, 0x20000000,

8443

+ 0x20000000, 0x20000000 };

8444

+VECT_VAR_DECL(expected_minus3,uint,64,2) [] = { 0x2000000000000000,

8445

+ 0x2000000000000000 };

8446

+

8447

+/* Expected values of cumulative_saturation flag with input=max and

8448

+ large shift amount. */

8449

+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,8) = 1;

8450

+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,4) = 1;

8451

+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,2) = 1;

8452

+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,1) = 1;

8453

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,8) = 1;

8454

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,4) = 1;

8455

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,2) = 1;

8456

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,1) = 1;

8457

+int VECT_VAR(expected_cumulative_sat_large_sh,int,8,16) = 1;

8458

+int VECT_VAR(expected_cumulative_sat_large_sh,int,16,8) = 1;

8459

+int VECT_VAR(expected_cumulative_sat_large_sh,int,32,4) = 1;

8460

+int VECT_VAR(expected_cumulative_sat_large_sh,int,64,2) = 1;

8461

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,8,16) = 1;

8462

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,16,8) = 1;

8463

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,32,4) = 1;

8464

+int VECT_VAR(expected_cumulative_sat_large_sh,uint,64,2) = 1;

8465

+

8466

+/* Expected results with input=max and large shift amount. */

8467

+VECT_VAR_DECL(expected_large_sh,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

8468

+ 0x7f, 0x7f, 0x7f, 0x7f };

8469

+VECT_VAR_DECL(expected_large_sh,int,16,4) [] = { 0x7fff, 0x7fff,

8470

+ 0x7fff, 0x7fff };

8471

+VECT_VAR_DECL(expected_large_sh,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

8472

+VECT_VAR_DECL(expected_large_sh,int,64,1) [] = { 0x7fffffffffffffff };

8473

+VECT_VAR_DECL(expected_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

8474

+ 0xff, 0xff, 0xff, 0xff };

8475

+VECT_VAR_DECL(expected_large_sh,uint,16,4) [] = { 0xffff, 0xffff,

8476

+ 0xffff, 0xffff };

8477

+VECT_VAR_DECL(expected_large_sh,uint,32,2) [] = { 0xffffffff, 0xffffffff };

8478

+VECT_VAR_DECL(expected_large_sh,uint,64,1) [] = { 0xffffffffffffffff };

8479

+VECT_VAR_DECL(expected_large_sh,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

8480

+ 0x7f, 0x7f, 0x7f, 0x7f,

8481

+ 0x7f, 0x7f, 0x7f, 0x7f,

8482

+ 0x7f, 0x7f, 0x7f, 0x7f };

8483

+VECT_VAR_DECL(expected_large_sh,int,16,8) [] = { 0x7fff, 0x7fff,

8484

+ 0x7fff, 0x7fff,

8485

+ 0x7fff, 0x7fff,

8486

+ 0x7fff, 0x7fff };

8487

+VECT_VAR_DECL(expected_large_sh,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

8488

+ 0x7fffffff, 0x7fffffff };

8489

+VECT_VAR_DECL(expected_large_sh,int,64,2) [] = { 0x7fffffffffffffff,

8490

+ 0x7fffffffffffffff };

8491

+VECT_VAR_DECL(expected_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

8492

+ 0xff, 0xff, 0xff, 0xff,

8493

+ 0xff, 0xff, 0xff, 0xff,

8494

+ 0xff, 0xff, 0xff, 0xff };

8495

+VECT_VAR_DECL(expected_large_sh,uint,16,8) [] = { 0xffff, 0xffff,

8496

+ 0xffff, 0xffff,

8497

+ 0xffff, 0xffff,

8498

+ 0xffff, 0xffff };

8499

+VECT_VAR_DECL(expected_large_sh,uint,32,4) [] = { 0xffffffff, 0xffffffff,

8500

+ 0xffffffff, 0xffffffff };

8501

+VECT_VAR_DECL(expected_large_sh,uint,64,2) [] = { 0xffffffffffffffff,

8502

+ 0xffffffffffffffff };

8503

+

8504

+/* Expected values of cumulative_saturation flag with negative input and

8505

+ large shift amount. */

8506

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,8) = 1;

8507

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,4) = 1;

8508

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,2) = 1;

8509

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,1) = 1;

8510

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,8) = 1;

8511

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,4) = 1;

8512

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,2) = 1;

8513

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,1) = 1;

8514

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,8,16) = 1;

8515

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,16,8) = 1;

8516

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,32,4) = 1;

8517

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,int,64,2) = 1;

8518

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,8,16) = 1;

8519

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,16,8) = 1;

8520

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,32,4) = 1;

8521

+int VECT_VAR(expected_cumulative_sat_neg_large_sh,uint,64,2) = 1;

8522

+

8523

+/* Expected results with negative input and large shift amount. */

8524

+VECT_VAR_DECL(expected_neg_large_sh,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,

8525

+ 0x80, 0x80, 0x80, 0x80 };

8526

+VECT_VAR_DECL(expected_neg_large_sh,int,16,4) [] = { 0x8000, 0x8000,

8527

+ 0x8000, 0x8000 };

8528

+VECT_VAR_DECL(expected_neg_large_sh,int,32,2) [] = { 0x80000000, 0x80000000 };

8529

+VECT_VAR_DECL(expected_neg_large_sh,int,64,1) [] = { 0x8000000000000000 };

8530

+VECT_VAR_DECL(expected_neg_large_sh,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

8531

+ 0xff, 0xff, 0xff, 0xff };

8532

+VECT_VAR_DECL(expected_neg_large_sh,uint,16,4) [] = { 0xffff, 0xffff,

8533

+ 0xffff, 0xffff };

8534

+VECT_VAR_DECL(expected_neg_large_sh,uint,32,2) [] = { 0xffffffff,

8535

+ 0xffffffff };

8536

+VECT_VAR_DECL(expected_neg_large_sh,uint,64,1) [] = { 0xffffffffffffffff };

8537

+VECT_VAR_DECL(expected_neg_large_sh,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,

8538

+ 0x80, 0x80, 0x80, 0x80,

8539

+ 0x80, 0x80, 0x80, 0x80,

8540

+ 0x80, 0x80, 0x80, 0x80 };

8541

+VECT_VAR_DECL(expected_neg_large_sh,int,16,8) [] = { 0x8000, 0x8000,

8542

+ 0x8000, 0x8000,

8543

+ 0x8000, 0x8000,

8544

+ 0x8000, 0x8000 };

8545

+VECT_VAR_DECL(expected_neg_large_sh,int,32,4) [] = { 0x80000000, 0x80000000,

8546

+ 0x80000000, 0x80000000 };

8547

+VECT_VAR_DECL(expected_neg_large_sh,int,64,2) [] = { 0x8000000000000000,

8548

+ 0x8000000000000000 };

8549

+VECT_VAR_DECL(expected_neg_large_sh,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

8550

+ 0xff, 0xff, 0xff, 0xff,

8551

+ 0xff, 0xff, 0xff, 0xff,

8552

+ 0xff, 0xff, 0xff, 0xff };

8553

+VECT_VAR_DECL(expected_neg_large_sh,uint,16,8) [] = { 0xffff, 0xffff,

8554

+ 0xffff, 0xffff,

8555

+ 0xffff, 0xffff,

8556

+ 0xffff, 0xffff };

8557

+VECT_VAR_DECL(expected_neg_large_sh,uint,32,4) [] = { 0xffffffff,

8558

+ 0xffffffff,

8559

+ 0xffffffff,

8560

+ 0xffffffff };

8561

+VECT_VAR_DECL(expected_neg_large_sh,uint,64,2) [] = { 0xffffffffffffffff,

8562

+ 0xffffffffffffffff };

8563

+

8564

+/* Expected values of cumulative_saturation flag with max/min input and

8565

+ large negative shift amount. */

8566

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,8) = 0;

8567

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,4) = 0;

8568

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,2) = 0;

8569

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,1) = 0;

8570

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,8) = 0;

8571

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,4) = 0;

8572

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,2) = 0;

8573

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,1) = 0;

8574

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,8,16) = 0;

8575

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,16,8) = 0;

8576

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,32,4) = 0;

8577

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,int,64,2) = 0;

8578

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,8,16) = 0;

8579

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,16,8) = 0;

8580

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,32,4) = 0;

8581

+int VECT_VAR(expected_cumulative_sat_large_neg_sh,uint,64,2) = 0;

8582

+

8583

+/* Expected results with max/min input and large negative shift amount. */

8584

+VECT_VAR_DECL(expected_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8585

+ 0x0, 0x0, 0x0, 0x0 };

8586

+VECT_VAR_DECL(expected_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8587

+VECT_VAR_DECL(expected_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };

8588

+VECT_VAR_DECL(expected_large_neg_sh,int,64,1) [] = { 0x0 };

8589

+VECT_VAR_DECL(expected_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8590

+ 0x0, 0x0, 0x0, 0x0 };

8591

+VECT_VAR_DECL(expected_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8592

+VECT_VAR_DECL(expected_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };

8593

+VECT_VAR_DECL(expected_large_neg_sh,uint,64,1) [] = { 0x0 };

8594

+VECT_VAR_DECL(expected_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8595

+ 0x0, 0x0, 0x0, 0x0,

8596

+ 0x0, 0x0, 0x0, 0x0,

8597

+ 0x0, 0x0, 0x0, 0x0 };

8598

+VECT_VAR_DECL(expected_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8599

+ 0x0, 0x0, 0x0, 0x0 };

8600

+VECT_VAR_DECL(expected_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8601

+VECT_VAR_DECL(expected_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };

8602

+VECT_VAR_DECL(expected_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8603

+ 0x0, 0x0, 0x0, 0x0,

8604

+ 0x0, 0x0, 0x0, 0x0,

8605

+ 0x0, 0x0, 0x0, 0x0 };

8606

+VECT_VAR_DECL(expected_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8607

+ 0x0, 0x0, 0x0, 0x0 };

8608

+VECT_VAR_DECL(expected_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8609

+VECT_VAR_DECL(expected_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };

8610

+

8611

+/* Expected values of cumulative_saturation flag with input=0 and

8612

+ large negative shift amount. */

8613

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,8) = 0;

8614

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,4) = 0;

8615

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,2) = 0;

8616

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,1) = 0;

8617

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,8) = 0;

8618

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,4) = 0;

8619

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,2) = 0;

8620

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,1) = 0;

8621

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,8,16) = 0;

8622

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,16,8) = 0;

8623

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,32,4) = 0;

8624

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,int,64,2) = 0;

8625

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,8,16) = 0;

8626

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,16,8) = 0;

8627

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,32,4) = 0;

8628

+int VECT_VAR(expected_cumulative_sat_0_large_neg_sh,uint,64,2) = 0;

8629

+

8630

+/* Expected results with input=0 and large negative shift amount. */

8631

+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8632

+ 0x0, 0x0, 0x0, 0x0 };

8633

+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8634

+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,2) [] = { 0x0, 0x0 };

8635

+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,1) [] = { 0x0 };

8636

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

8637

+ 0x0, 0x0, 0x0, 0x0 };

8638

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8639

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,2) [] = { 0x0, 0x0 };

8640

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,1) [] = { 0x0 };

8641

+VECT_VAR_DECL(expected_0_large_neg_sh,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8642

+ 0x0, 0x0, 0x0, 0x0,

8643

+ 0x0, 0x0, 0x0, 0x0,

8644

+ 0x0, 0x0, 0x0, 0x0 };

8645

+VECT_VAR_DECL(expected_0_large_neg_sh,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8646

+ 0x0, 0x0, 0x0, 0x0 };

8647

+VECT_VAR_DECL(expected_0_large_neg_sh,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8648

+VECT_VAR_DECL(expected_0_large_neg_sh,int,64,2) [] = { 0x0, 0x0 };

8649

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

8650

+ 0x0, 0x0, 0x0, 0x0,

8651

+ 0x0, 0x0, 0x0, 0x0,

8652

+ 0x0, 0x0, 0x0, 0x0 };

8653

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

8654

+ 0x0, 0x0, 0x0, 0x0 };

8655

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

8656

+VECT_VAR_DECL(expected_0_large_neg_sh,uint,64,2) [] = { 0x0, 0x0 };

8657

+

8658

+#define INSN vqrshl

8659

+#define TEST_MSG "VQRSHL/VQRSHLQ"

8660

+

8661

+#define FNNAME1(NAME) void exec_ ## NAME (void)

8662

+#define FNNAME(NAME) FNNAME1(NAME)

8663

+

8664

+FNNAME (INSN)

8665

+{

8666

+ /* Basic test: v3=vqrshl(v1,v2), then store the result. */

8667

+#define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

8668

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

8669

+ VECT_VAR(vector_res, T1, W, N) = \

8670

+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \

8671

+ VECT_VAR(vector_shift, T3, W, N)); \

8672

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

8673

+ VECT_VAR(vector_res, T1, W, N)); \

8674

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

8675

+

8676

+ /* Two auxliary macros are necessary to expand INSN */

8677

+#define TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

8678

+ TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

8679

+

8680

+#define TEST_VQRSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

8681

+ TEST_VQRSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

8682

+

8683

+ DECL_VARIABLE_ALL_VARIANTS(vector);

8684

+ DECL_VARIABLE_ALL_VARIANTS(vector_res);

8685

+

8686

+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);

8687

+

8688

+ clean_results ();

8689

+

8690

+ /* Fill input vector with 0, to check saturation on limits. */

8691

+ VDUP(vector, , int, s, 8, 8, 0);

8692

+ VDUP(vector, , int, s, 16, 4, 0);

8693

+ VDUP(vector, , int, s, 32, 2, 0);

8694

+ VDUP(vector, , int, s, 64, 1, 0);

8695

+ VDUP(vector, , uint, u, 8, 8, 0);

8696

+ VDUP(vector, , uint, u, 16, 4, 0);

8697

+ VDUP(vector, , uint, u, 32, 2, 0);

8698

+ VDUP(vector, , uint, u, 64, 1, 0);

8699

+ VDUP(vector, q, int, s, 8, 16, 0);

8700

+ VDUP(vector, q, int, s, 16, 8, 0);

8701

+ VDUP(vector, q, int, s, 32, 4, 0);

8702

+ VDUP(vector, q, int, s, 64, 2, 0);

8703

+ VDUP(vector, q, uint, u, 8, 16, 0);

8704

+ VDUP(vector, q, uint, u, 16, 8, 0);

8705

+ VDUP(vector, q, uint, u, 32, 4, 0);

8706

+ VDUP(vector, q, uint, u, 64, 2, 0);

8707

+

8708

+ /* Choose init value arbitrarily, will be used as shift amount */

8709

+ /* Use values equal to or one-less-than the type width to check

8710

+ behaviour on limits. */

8711

+ VDUP(vector_shift, , int, s, 8, 8, 7);

8712

+ VDUP(vector_shift, , int, s, 16, 4, 15);

8713

+ VDUP(vector_shift, , int, s, 32, 2, 31);

8714

+ VDUP(vector_shift, , int, s, 64, 1, 63);

8715

+ VDUP(vector_shift, q, int, s, 8, 16, 8);

8716

+ VDUP(vector_shift, q, int, s, 16, 8, 16);

8717

+ VDUP(vector_shift, q, int, s, 32, 4, 32);

8718

+ VDUP(vector_shift, q, int, s, 64, 2, 64);

8719

+

8720

+#define CMT " (with input = 0)"

8721

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);

8722

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);

8723

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);

8724

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);

8725

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);

8726

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);

8727

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);

8728

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);

8729

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);

8730

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);

8731

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);

8732

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);

8733

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);

8734

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);

8735

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);

8736

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);

8737

+

8738

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);

8739

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);

8740

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);

8741

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);

8742

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);

8743

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);

8744

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);

8745

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);

8746

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);

8747

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);

8748

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);

8749

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);

8750

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);

8751

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);

8752

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);

8753

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);

8754

+

8755

+

8756

+ /* Use negative shift amounts. */

8757

+ VDUP(vector_shift, , int, s, 8, 8, -1);

8758

+ VDUP(vector_shift, , int, s, 16, 4, -2);

8759

+ VDUP(vector_shift, , int, s, 32, 2, -3);

8760

+ VDUP(vector_shift, , int, s, 64, 1, -4);

8761

+ VDUP(vector_shift, q, int, s, 8, 16, -7);

8762

+ VDUP(vector_shift, q, int, s, 16, 8, -11);

8763

+ VDUP(vector_shift, q, int, s, 32, 4, -13);

8764

+ VDUP(vector_shift, q, int, s, 64, 2, -20);

8765

+

8766

+#undef CMT

8767

+#define CMT " (input 0 and negative shift amount)"

8768

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);

8769

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);

8770

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);

8771

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);

8772

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);

8773

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);

8774

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);

8775

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);

8776

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);

8777

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);

8778

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);

8779

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);

8780

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);

8781

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);

8782

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);

8783

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);

8784

+

8785

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);

8786

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);

8787

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);

8788

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);

8789

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);

8790

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);

8791

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);

8792

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);

8793

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);

8794

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);

8795

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);

8796

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);

8797

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);

8798

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);

8799

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);

8800

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);

8801

+

8802

+

8803

+ /* Test again, with predefined input values. */

8804

+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);

8805

+

8806

+ /* Choose init value arbitrarily, will be used as shift amount. */

8807

+ VDUP(vector_shift, , int, s, 8, 8, 1);

8808

+ VDUP(vector_shift, , int, s, 16, 4, 3);

8809

+ VDUP(vector_shift, , int, s, 32, 2, 8);

8810

+ VDUP(vector_shift, , int, s, 64, 1, 3);

8811

+ VDUP(vector_shift, q, int, s, 8, 16, 10);

8812

+ VDUP(vector_shift, q, int, s, 16, 8, 12);

8813

+ VDUP(vector_shift, q, int, s, 32, 4, 31);

8814

+ VDUP(vector_shift, q, int, s, 64, 2, 63);

8815

+

8816

+#undef CMT

8817

+#define CMT ""

8818

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);

8819

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);

8820

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);

8821

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);

8822

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);

8823

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);

8824

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);

8825

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);

8826

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);

8827

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);

8828

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);

8829

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);

8830

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);

8831

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);

8832

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);

8833

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);

8834

+

8835

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

8836

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

8837

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

8838

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);

8839

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

8840

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

8841

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

8842

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);

8843

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);

8844

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

8845

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

8846

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);

8847

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);

8848

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);

8849

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);

8850

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);

8851

+

8852

+

8853

+ /* Use negative shift amounts. */

8854

+ VDUP(vector_shift, , int, s, 8, 8, -2);

8855

+ VDUP(vector_shift, , int, s, 16, 4, -2);

8856

+ VDUP(vector_shift, , int, s, 32, 2, -3);

8857

+ VDUP(vector_shift, , int, s, 64, 1, -4);

8858

+ VDUP(vector_shift, q, int, s, 8, 16, -7);

8859

+ VDUP(vector_shift, q, int, s, 16, 8, -11);

8860

+ VDUP(vector_shift, q, int, s, 32, 4, -13);

8861

+ VDUP(vector_shift, q, int, s, 64, 2, -20);

8862

+

8863

+#undef CMT

8864

+#define CMT " (negative shift amount)"

8865

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);

8866

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);

8867

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);

8868

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);

8869

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);

8870

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);

8871

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);

8872

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);

8873

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);

8874

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);

8875

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);

8876

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);

8877

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);

8878

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);

8879

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);

8880

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);

8881

+

8882

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);

8883

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);

8884

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);

8885

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);

8886

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

8887

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

8888

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

8889

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);

8890

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);

8891

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);

8892

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);

8893

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);

8894

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);

8895

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);

8896

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);

8897

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);

8898

+

8899

+

8900

+ /* Fill input vector with max value, to check saturation on

8901

+ limits. */

8902

+ VDUP(vector, , int, s, 8, 8, 0x7F);

8903

+ VDUP(vector, , int, s, 16, 4, 0x7FFF);

8904

+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);

8905

+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);

8906

+ VDUP(vector, , uint, u, 8, 8, 0xFF);

8907

+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);

8908

+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);

8909

+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);

8910

+ VDUP(vector, q, int, s, 8, 16, 0x7F);

8911

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

8912

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

8913

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

8914

+ VDUP(vector, q, uint, u, 8, 16, 0xFF);

8915

+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);

8916

+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);

8917

+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);

8918

+

8919

+ /* Use -1 shift amount to check cumulative saturation with

8920

+ round_const. */

8921

+ VDUP(vector_shift, , int, s, 8, 8, -1);

8922

+ VDUP(vector_shift, , int, s, 16, 4, -1);

8923

+ VDUP(vector_shift, , int, s, 32, 2, -1);

8924

+ VDUP(vector_shift, , int, s, 64, 1, -1);

8925

+ VDUP(vector_shift, q, int, s, 8, 16, -1);

8926

+ VDUP(vector_shift, q, int, s, 16, 8, -1);

8927

+ VDUP(vector_shift, q, int, s, 32, 4, -1);

8928

+ VDUP(vector_shift, q, int, s, 64, 2, -1);

8929

+

8930

+#undef CMT

8931

+#define CMT " (checking cumulative saturation: shift by -1)"

8932

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus1, CMT);

8933

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus1, CMT);

8934

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus1, CMT);

8935

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus1, CMT);

8936

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus1, CMT);

8937

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus1, CMT);

8938

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus1, CMT);

8939

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus1, CMT);

8940

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus1, CMT);

8941

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus1, CMT);

8942

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus1, CMT);

8943

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus1, CMT);

8944

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus1, CMT);

8945

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus1, CMT);

8946

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus1, CMT);

8947

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus1, CMT);

8948

+

8949

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus1, CMT);

8950

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus1, CMT);

8951

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus1, CMT);

8952

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus1, CMT);

8953

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus1, CMT);

8954

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus1, CMT);

8955

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus1, CMT);

8956

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus1, CMT);

8957

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus1, CMT);

8958

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus1, CMT);

8959

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus1, CMT);

8960

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus1, CMT);

8961

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus1, CMT);

8962

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus1, CMT);

8963

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus1, CMT);

8964

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus1, CMT);

8965

+

8966

+

8967

+ /* Use -3 shift amount to check cumulative saturation with

8968

+ round_const. */

8969

+ VDUP(vector_shift, , int, s, 8, 8, -3);

8970

+ VDUP(vector_shift, , int, s, 16, 4, -3);

8971

+ VDUP(vector_shift, , int, s, 32, 2, -3);

8972

+ VDUP(vector_shift, , int, s, 64, 1, -3);

8973

+ VDUP(vector_shift, q, int, s, 8, 16, -3);

8974

+ VDUP(vector_shift, q, int, s, 16, 8, -3);

8975

+ VDUP(vector_shift, q, int, s, 32, 4, -3);

8976

+ VDUP(vector_shift, q, int, s, 64, 2, -3);

8977

+

8978

+#undef CMT

8979

+#define CMT " (checking cumulative saturation: shift by -3)"

8980

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_minus3, CMT);

8981

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_minus3, CMT);

8982

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_minus3, CMT);

8983

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_minus3, CMT);

8984

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_minus3, CMT);

8985

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_minus3, CMT);

8986

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_minus3, CMT);

8987

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_minus3, CMT);

8988

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_minus3, CMT);

8989

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_minus3, CMT);

8990

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_minus3, CMT);

8991

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_minus3, CMT);

8992

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_minus3, CMT);

8993

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_minus3, CMT);

8994

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_minus3, CMT);

8995

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_minus3, CMT);

8996

+

8997

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_minus3, CMT);

8998

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_minus3, CMT);

8999

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_minus3, CMT);

9000

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_minus3, CMT);

9001

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_minus3, CMT);

9002

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_minus3, CMT);

9003

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minus3, CMT);

9004

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_minus3, CMT);

9005

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_minus3, CMT);

9006

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_minus3, CMT);

9007

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_minus3, CMT);

9008

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_minus3, CMT);

9009

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_minus3, CMT);

9010

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_minus3, CMT);

9011

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_minus3, CMT);

9012

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_minus3, CMT);

9013

+

9014

+

9015

+ /* Use large shift amount. */

9016

+ VDUP(vector_shift, , int, s, 8, 8, 10);

9017

+ VDUP(vector_shift, , int, s, 16, 4, 20);

9018

+ VDUP(vector_shift, , int, s, 32, 2, 40);

9019

+ VDUP(vector_shift, , int, s, 64, 1, 70);

9020

+ VDUP(vector_shift, q, int, s, 8, 16, 10);

9021

+ VDUP(vector_shift, q, int, s, 16, 8, 20);

9022

+ VDUP(vector_shift, q, int, s, 32, 4, 40);

9023

+ VDUP(vector_shift, q, int, s, 64, 2, 70);

9024

+

9025

+#undef CMT

9026

+#define CMT " (checking cumulative saturation: large shift amount)"

9027

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_sh, CMT);

9028

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_sh, CMT);

9029

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_sh, CMT);

9030

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_sh, CMT);

9031

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_sh, CMT);

9032

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_sh, CMT);

9033

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_sh, CMT);

9034

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_sh, CMT);

9035

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_sh, CMT);

9036

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_sh, CMT);

9037

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_sh, CMT);

9038

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_sh, CMT);

9039

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_sh, CMT);

9040

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_sh, CMT);

9041

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_sh, CMT);

9042

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_sh, CMT);

9043

+

9044

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_sh, CMT);

9045

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_sh, CMT);

9046

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_sh, CMT);

9047

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_sh, CMT);

9048

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_sh, CMT);

9049

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_sh, CMT);

9050

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_sh, CMT);

9051

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_sh, CMT);

9052

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_sh, CMT);

9053

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_sh, CMT);

9054

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_sh, CMT);

9055

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_sh, CMT);

9056

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_sh, CMT);

9057

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_sh, CMT);

9058

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_sh, CMT);

9059

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_sh, CMT);

9060

+

9061

+

9062

+ /* Fill input vector with negative values, to check saturation on

9063

+ limits. */

9064

+ VDUP(vector, , int, s, 8, 8, 0x80);

9065

+ VDUP(vector, , int, s, 16, 4, 0x8000);

9066

+ VDUP(vector, , int, s, 32, 2, 0x80000000);

9067

+ VDUP(vector, , int, s, 64, 1, 0x8000000000000000LL);

9068

+ VDUP(vector, q, int, s, 8, 16, 0x80);

9069

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

9070

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

9071

+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);

9072

+

9073

+ /* Use large shift amount. */

9074

+ VDUP(vector_shift, , int, s, 8, 8, 10);

9075

+ VDUP(vector_shift, , int, s, 16, 4, 20);

9076

+ VDUP(vector_shift, , int, s, 32, 2, 40);

9077

+ VDUP(vector_shift, , int, s, 64, 1, 70);

9078

+ VDUP(vector_shift, q, int, s, 8, 16, 10);

9079

+ VDUP(vector_shift, q, int, s, 16, 8, 20);

9080

+ VDUP(vector_shift, q, int, s, 32, 4, 40);

9081

+ VDUP(vector_shift, q, int, s, 64, 2, 70);

9082

+

9083

+#undef CMT

9084

+#define CMT " (checking cumulative saturation: large shift amount with negative input)"

9085

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);

9086

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);

9087

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);

9088

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);

9089

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large_sh, CMT);

9090

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large_sh, CMT);

9091

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large_sh, CMT);

9092

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large_sh, CMT);

9093

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);

9094

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);

9095

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);

9096

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);

9097

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large_sh, CMT);

9098

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large_sh, CMT);

9099

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large_sh, CMT);

9100

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large_sh, CMT);

9101

+

9102

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large_sh, CMT);

9103

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large_sh, CMT);

9104

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large_sh, CMT);

9105

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large_sh, CMT);

9106

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large_sh, CMT);

9107

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large_sh, CMT);

9108

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large_sh, CMT);

9109

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large_sh, CMT);

9110

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large_sh, CMT);

9111

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large_sh, CMT);

9112

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large_sh, CMT);

9113

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large_sh, CMT);

9114

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large_sh, CMT);

9115

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large_sh, CMT);

9116

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large_sh, CMT);

9117

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large_sh, CMT);

9118

+

9119

+

9120

+ /* Fill input vector with negative and positive values, to check

9121

+ * saturation on limits */

9122

+ VDUP(vector, , int, s, 8, 8, 0x7F);

9123

+ VDUP(vector, , int, s, 16, 4, 0x7FFF);

9124

+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);

9125

+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);

9126

+ VDUP(vector, q, int, s, 8, 16, 0x80);

9127

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

9128

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

9129

+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);

9130

+

9131

+ /* Use large negative shift amount */

9132

+ VDUP(vector_shift, , int, s, 8, 8, -10);

9133

+ VDUP(vector_shift, , int, s, 16, 4, -20);

9134

+ VDUP(vector_shift, , int, s, 32, 2, -40);

9135

+ VDUP(vector_shift, , int, s, 64, 1, -70);

9136

+ VDUP(vector_shift, q, int, s, 8, 16, -10);

9137

+ VDUP(vector_shift, q, int, s, 16, 8, -20);

9138

+ VDUP(vector_shift, q, int, s, 32, 4, -40);

9139

+ VDUP(vector_shift, q, int, s, 64, 2, -70);

9140

+

9141

+#undef CMT

9142

+#define CMT " (checking cumulative saturation: large negative shift amount)"

9143

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);

9144

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);

9145

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);

9146

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);

9147

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);

9148

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);

9149

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);

9150

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);

9151

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);

9152

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);

9153

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);

9154

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);

9155

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);

9156

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);

9157

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);

9158

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);

9159

+

9160

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);

9161

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);

9162

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);

9163

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);

9164

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);

9165

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);

9166

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);

9167

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);

9168

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);

9169

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);

9170

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);

9171

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);

9172

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);

9173

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);

9174

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);

9175

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);

9176

+

9177

+

9178

+ /* Fill input vector with 0, to check saturation in case of large

9179

+ * shift amount */

9180

+ VDUP(vector, , int, s, 8, 8, 0);

9181

+ VDUP(vector, , int, s, 16, 4, 0);

9182

+ VDUP(vector, , int, s, 32, 2, 0);

9183

+ VDUP(vector, , int, s, 64, 1, 0);

9184

+ VDUP(vector, q, int, s, 8, 16, 0);

9185

+ VDUP(vector, q, int, s, 16, 8, 0);

9186

+ VDUP(vector, q, int, s, 32, 4, 0);

9187

+ VDUP(vector, q, int, s, 64, 2, 0);

9188

+

9189

+ /* Use large shift amount */

9190

+ VDUP(vector_shift, , int, s, 8, 8, -10);

9191

+ VDUP(vector_shift, , int, s, 16, 4, -20);

9192

+ VDUP(vector_shift, , int, s, 32, 2, -40);

9193

+ VDUP(vector_shift, , int, s, 64, 1, -70);

9194

+ VDUP(vector_shift, q, int, s, 8, 16, -10);

9195

+ VDUP(vector_shift, q, int, s, 16, 8, -20);

9196

+ VDUP(vector_shift, q, int, s, 32, 4, -40);

9197

+ VDUP(vector_shift, q, int, s, 64, 2, -70);

9198

+

9199

+#undef CMT

9200

+#define CMT " (checking cumulative saturation: large negative shift amount with 0 input)"

9201

+ TEST_VQRSHL(int, , int, s, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);

9202

+ TEST_VQRSHL(int, , int, s, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);

9203

+ TEST_VQRSHL(int, , int, s, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);

9204

+ TEST_VQRSHL(int, , int, s, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);

9205

+ TEST_VQRSHL(int, , uint, u, 8, 8, expected_cumulative_sat_large_neg_sh, CMT);

9206

+ TEST_VQRSHL(int, , uint, u, 16, 4, expected_cumulative_sat_large_neg_sh, CMT);

9207

+ TEST_VQRSHL(int, , uint, u, 32, 2, expected_cumulative_sat_large_neg_sh, CMT);

9208

+ TEST_VQRSHL(int, , uint, u, 64, 1, expected_cumulative_sat_large_neg_sh, CMT);

9209

+ TEST_VQRSHL(int, q, int, s, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);

9210

+ TEST_VQRSHL(int, q, int, s, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);

9211

+ TEST_VQRSHL(int, q, int, s, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);

9212

+ TEST_VQRSHL(int, q, int, s, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);

9213

+ TEST_VQRSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_large_neg_sh, CMT);

9214

+ TEST_VQRSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_large_neg_sh, CMT);

9215

+ TEST_VQRSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_large_neg_sh, CMT);

9216

+ TEST_VQRSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_large_neg_sh, CMT);

9217

+

9218

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_large_neg_sh, CMT);

9219

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_large_neg_sh, CMT);

9220

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_large_neg_sh, CMT);

9221

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_large_neg_sh, CMT);

9222

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_large_neg_sh, CMT);

9223

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_large_neg_sh, CMT);

9224

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_large_neg_sh, CMT);

9225

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_large_neg_sh, CMT);

9226

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_large_neg_sh, CMT);

9227

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_large_neg_sh, CMT);

9228

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_large_neg_sh, CMT);

9229

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_large_neg_sh, CMT);

9230

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_large_neg_sh, CMT);

9231

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_large_neg_sh, CMT);

9232

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_large_neg_sh, CMT);

9233

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_large_neg_sh, CMT);

9234

+}

9235

+

9236

+int main (void)

9237

+{

9238

+ exec_vqrshl ();

9239

+ return 0;

9240

+}

9241

--- a/src//dev/null

9242

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_n.c

9243

@@ -0,0 +1,174 @@

9244

+#include <arm_neon.h>

9245

+#include "arm-neon-ref.h"

9246

+#include "compute-ref-data.h"

9247

+

9248

+/* Expected values of cumulative_saturation flag. */

9249

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

9250

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

9251

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

9252

+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;

9253

+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;

9254

+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;

9255

+

9256

+/* Expected results. */

9257

+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xf9, 0xfa,

9258

+ 0xfa, 0xfb, 0xfb, 0xfc };

9259

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff9, 0xfff9, 0xfffa };

9260

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };

9261

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9262

+ 0xff, 0xff, 0xff, 0xff };

9263

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

9264

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9265

+

9266

+/* Expected values of cumulative_saturation flag with shift by 3. */

9267

+int VECT_VAR(expected_cumulative_sat_sh3,int,16,8) = 1;

9268

+int VECT_VAR(expected_cumulative_sat_sh3,int,32,4) = 1;

9269

+int VECT_VAR(expected_cumulative_sat_sh3,int,64,2) = 1;

9270

+int VECT_VAR(expected_cumulative_sat_sh3,uint,16,8) = 1;

9271

+int VECT_VAR(expected_cumulative_sat_sh3,uint,32,4) = 1;

9272

+int VECT_VAR(expected_cumulative_sat_sh3,uint,64,2) = 1;

9273

+

9274

+/* Expected results with shift by 3. */

9275

+VECT_VAR_DECL(expected_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9276

+ 0x7f, 0x7f, 0x7f, 0x7f };

9277

+VECT_VAR_DECL(expected_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

9278

+VECT_VAR_DECL(expected_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

9279

+VECT_VAR_DECL(expected_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9280

+ 0xff, 0xff, 0xff, 0xff };

9281

+VECT_VAR_DECL(expected_sh3,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

9282

+VECT_VAR_DECL(expected_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9283

+

9284

+/* Expected values of cumulative_saturation flag with shift by max

9285

+ amount. */

9286

+int VECT_VAR(expected_cumulative_sat_shmax,int,16,8) = 1;

9287

+int VECT_VAR(expected_cumulative_sat_shmax,int,32,4) = 1;

9288

+int VECT_VAR(expected_cumulative_sat_shmax,int,64,2) = 1;

9289

+int VECT_VAR(expected_cumulative_sat_shmax,uint,16,8) = 1;

9290

+int VECT_VAR(expected_cumulative_sat_shmax,uint,32,4) = 1;

9291

+int VECT_VAR(expected_cumulative_sat_shmax,uint,64,2) = 1;

9292

+

9293

+/* Expected results with shift by max amount. */

9294

+VECT_VAR_DECL(expected_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9295

+ 0x7f, 0x7f, 0x7f, 0x7f };

9296

+VECT_VAR_DECL(expected_shmax,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

9297

+VECT_VAR_DECL(expected_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

9298

+VECT_VAR_DECL(expected_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9299

+ 0xff, 0xff, 0xff, 0xff };

9300

+VECT_VAR_DECL(expected_shmax,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

9301

+VECT_VAR_DECL(expected_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9302

+

9303

+#define INSN vqrshrn_n

9304

+#define TEST_MSG "VQRSHRN_N"

9305

+

9306

+#define FNNAME1(NAME) void exec_ ## NAME (void)

9307

+#define FNNAME(NAME) FNNAME1(NAME)

9308

+

9309

+FNNAME (INSN)

9310

+{

9311

+ /* Basic test: y=vqrshrn_n(x,v), then store the result. */

9312

+#define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9313

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \

9314

+ VECT_VAR(vector_res, T1, W2, N) = \

9315

+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \

9316

+ V); \

9317

+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \

9318

+ VECT_VAR(vector_res, T1, W2, N)); \

9319

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

9320

+

9321

+ /* Two auxliary macros are necessary to expand INSN */

9322

+#define TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9323

+ TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

9324

+

9325

+#define TEST_VQRSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9326

+ TEST_VQRSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

9327

+

9328

+

9329

+ /* vector is twice as large as vector_res. */

9330

+ DECL_VARIABLE(vector, int, 16, 8);

9331

+ DECL_VARIABLE(vector, int, 32, 4);

9332

+ DECL_VARIABLE(vector, int, 64, 2);

9333

+ DECL_VARIABLE(vector, uint, 16, 8);

9334

+ DECL_VARIABLE(vector, uint, 32, 4);

9335

+ DECL_VARIABLE(vector, uint, 64, 2);

9336

+

9337

+ DECL_VARIABLE(vector_res, int, 8, 8);

9338

+ DECL_VARIABLE(vector_res, int, 16, 4);

9339

+ DECL_VARIABLE(vector_res, int, 32, 2);

9340

+ DECL_VARIABLE(vector_res, uint, 8, 8);

9341

+ DECL_VARIABLE(vector_res, uint, 16, 4);

9342

+ DECL_VARIABLE(vector_res, uint, 32, 2);

9343

+

9344

+ clean_results ();

9345

+

9346

+ VLOAD(vector, buffer, q, int, s, 16, 8);

9347

+ VLOAD(vector, buffer, q, int, s, 32, 4);

9348

+ VLOAD(vector, buffer, q, int, s, 64, 2);

9349

+ VLOAD(vector, buffer, q, uint, u, 16, 8);

9350

+ VLOAD(vector, buffer, q, uint, u, 32, 4);

9351

+ VLOAD(vector, buffer, q, uint, u, 64, 2);

9352

+

9353

+ /* Choose shift amount arbitrarily. */

9354

+#define CMT ""

9355

+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);

9356

+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);

9357

+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);

9358

+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);

9359

+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);

9360

+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);

9361

+

9362

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

9363

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

9364

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

9365

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

9366

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

9367

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

9368

+

9369

+

9370

+ /* Another set of tests, shifting max value by 3. */

9371

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

9372

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

9373

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

9374

+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);

9375

+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);

9376

+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);

9377

+

9378

+#undef CMT

9379

+#define CMT " (check saturation: shift by 3)"

9380

+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);

9381

+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);

9382

+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);

9383

+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_sh3, CMT);

9384

+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_sh3, CMT);

9385

+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_sh3, CMT);

9386

+

9387

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_sh3, CMT);

9388

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_sh3, CMT);

9389

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_sh3, CMT);

9390

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh3, CMT);

9391

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh3, CMT);

9392

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh3, CMT);

9393

+

9394

+

9395

+ /* Shift by max amount. */

9396

+#undef CMT

9397

+#define CMT " (check saturation: shift by max)"

9398

+ TEST_VQRSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);

9399

+ TEST_VQRSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);

9400

+ TEST_VQRSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);

9401

+ TEST_VQRSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_shmax, CMT);

9402

+ TEST_VQRSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_shmax, CMT);

9403

+ TEST_VQRSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_shmax, CMT);

9404

+

9405

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_shmax, CMT);

9406

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_shmax, CMT);

9407

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_shmax, CMT);

9408

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_shmax, CMT);

9409

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_shmax, CMT);

9410

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_shmax, CMT);

9411

+}

9412

+

9413

+int main (void)

9414

+{

9415

+ exec_vqrshrn_n ();

9416

+ return 0;

9417

+}

9418

--- a/src//dev/null

9419

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_n.c

9420

@@ -0,0 +1,189 @@

9421

+#include <arm_neon.h>

9422

+#include "arm-neon-ref.h"

9423

+#include "compute-ref-data.h"

9424

+

9425

+/* Expected values of cumulative_saturation flag with negative unput. */

9426

+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;

9427

+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;

9428

+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;

9429

+

9430

+/* Expected results with negative input. */

9431

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9432

+ 0x0, 0x0, 0x0, 0x0 };

9433

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9434

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };

9435

+

9436

+/* Expected values of cumulative_saturation flag with max input value

9437

+ shifted by 1. */

9438

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;

9439

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;

9440

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;

9441

+

9442

+/* Expected results with max input value shifted by 1. */

9443

+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9444

+ 0xff, 0xff, 0xff, 0xff };

9445

+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,

9446

+ 0xffff, 0xffff };

9447

+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9448

+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };

9449

+

9450

+/* Expected values of cumulative_saturation flag with max input value

9451

+ shifted by max amount. */

9452

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;

9453

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;

9454

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;

9455

+

9456

+/* Expected results with max input value shifted by max amount. */

9457

+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0x80, 0x80, 0x80, 0x80,

9458

+ 0x80, 0x80, 0x80, 0x80 };

9459

+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0x8000, 0x8000,

9460

+ 0x8000, 0x8000 };

9461

+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0x80000000, 0x80000000 };

9462

+

9463

+/* Expected values of cumulative_saturation flag with min input value

9464

+ shifted by max amount. */

9465

+int VECT_VAR(expected_cumulative_sat_min_shmax,int,16,8) = 1;

9466

+int VECT_VAR(expected_cumulative_sat_min_shmax,int,32,4) = 1;

9467

+int VECT_VAR(expected_cumulative_sat_min_shmax,int,64,2) = 1;

9468

+

9469

+/* Expected results with min input value shifted by max amount. */

9470

+VECT_VAR_DECL(expected_min_shmax,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9471

+ 0x0, 0x0, 0x0, 0x0 };

9472

+VECT_VAR_DECL(expected_min_shmax,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9473

+VECT_VAR_DECL(expected_min_shmax,uint,32,2) [] = { 0x0, 0x0 };

9474

+

9475

+/* Expected values of cumulative_saturation flag with inputs in usual

9476

+ range. */

9477

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

9478

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;

9479

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

9480

+

9481

+/* Expected results with inputs in usual range. */

9482

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x49, 0x49, 0x49, 0x49,

9483

+ 0x49, 0x49, 0x49, 0x49 };

9484

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9485

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbf, 0xdeadbf };

9486

+

9487

+#define INSN vqrshrun_n

9488

+#define TEST_MSG "VQRSHRUN_N"

9489

+

9490

+#define FNNAME1(NAME) void exec_ ## NAME (void)

9491

+#define FNNAME(NAME) FNNAME1(NAME)

9492

+

9493

+FNNAME (INSN)

9494

+{

9495

+ /* Basic test: y=vqrshrun_n(x,v), then store the result. */

9496

+#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9497

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \

9498

+ VECT_VAR(vector_res, uint, W2, N) = \

9499

+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \

9500

+ V); \

9501

+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \

9502

+ VECT_VAR(vector_res, uint, W2, N)); \

9503

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

9504

+

9505

+ /* Two auxliary macros are necessary to expand INSN */

9506

+#define TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9507

+ TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

9508

+

9509

+#define TEST_VQRSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

9510

+ TEST_VQRSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

9511

+

9512

+

9513

+ /* vector is twice as large as vector_res. */

9514

+ DECL_VARIABLE(vector, int, 16, 8);

9515

+ DECL_VARIABLE(vector, int, 32, 4);

9516

+ DECL_VARIABLE(vector, int, 64, 2);

9517

+

9518

+ DECL_VARIABLE(vector_res, uint, 8, 8);

9519

+ DECL_VARIABLE(vector_res, uint, 16, 4);

9520

+ DECL_VARIABLE(vector_res, uint, 32, 2);

9521

+

9522

+ clean_results ();

9523

+

9524

+ /* Fill input vector with negative values, to check saturation on

9525

+ limits. */

9526

+ VDUP(vector, q, int, s, 16, 8, -2);

9527

+ VDUP(vector, q, int, s, 32, 4, -3);

9528

+ VDUP(vector, q, int, s, 64, 2, -4);

9529

+

9530

+ /* Choose shift amount arbitrarily. */

9531

+#define CMT " (negative input)"

9532

+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);

9533

+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);

9534

+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);

9535

+

9536

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

9537

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

9538

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

9539

+

9540

+

9541

+ /* Fill input vector with max value, to check saturation on

9542

+ limits. */

9543

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

9544

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

9545

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

9546

+

9547

+ /* shift by 1. */

9548

+#undef CMT

9549

+#define CMT " (check cumulative saturation: shift by 1)"

9550

+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);

9551

+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);

9552

+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);

9553

+

9554

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);

9555

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);

9556

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);

9557

+

9558

+

9559

+ /* shift by max. */

9560

+#undef CMT

9561

+#define CMT " (check cumulative saturation: shift by max, positive input)"

9562

+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);

9563

+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);

9564

+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);

9565

+

9566

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);

9567

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);

9568

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);

9569

+

9570

+

9571

+ /* Fill input vector with min value, to check saturation on limits. */

9572

+ VDUP(vector, q, int, s, 16, 8, 0x8000);

9573

+ VDUP(vector, q, int, s, 32, 4, 0x80000000);

9574

+ VDUP(vector, q, int, s, 64, 2, 0x8000000000000000LL);

9575

+

9576

+ /* shift by max */

9577

+#undef CMT

9578

+#define CMT " (check cumulative saturation: shift by max, negative input)"

9579

+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_min_shmax, CMT);

9580

+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_min_shmax, CMT);

9581

+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_min_shmax, CMT);

9582

+

9583

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_min_shmax, CMT);

9584

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_min_shmax, CMT);

9585

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_min_shmax, CMT);

9586

+

9587

+

9588

+ /* Fill input vector with positive values, to check normal case. */

9589

+ VDUP(vector, q, int, s, 16, 8, 0x1234);

9590

+ VDUP(vector, q, int, s, 32, 4, 0x87654321);

9591

+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);

9592

+

9593

+ /* shift arbitrary amount. */

9594

+#undef CMT

9595

+#define CMT ""

9596

+ TEST_VQRSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);

9597

+ TEST_VQRSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);

9598

+ TEST_VQRSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);

9599

+

9600

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

9601

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

9602

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

9603

+}

9604

+

9605

+int main (void)

9606

+{

9607

+ exec_vqrshrun_n ();

9608

+ return 0;

9609

+}

9610

--- a/src//dev/null

9611

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl.c

9612

@@ -0,0 +1,829 @@

9613

+#include <arm_neon.h>

9614

+#include "arm-neon-ref.h"

9615

+#include "compute-ref-data.h"

9616

+

9617

+/* Expected values of cumulative_saturation flag with input=0. */

9618

+int VECT_VAR(expected_cumulative_sat_0,int,8,8) = 0;

9619

+int VECT_VAR(expected_cumulative_sat_0,int,16,4) = 0;

9620

+int VECT_VAR(expected_cumulative_sat_0,int,32,2) = 0;

9621

+int VECT_VAR(expected_cumulative_sat_0,int,64,1) = 0;

9622

+int VECT_VAR(expected_cumulative_sat_0,uint,8,8) = 0;

9623

+int VECT_VAR(expected_cumulative_sat_0,uint,16,4) = 0;

9624

+int VECT_VAR(expected_cumulative_sat_0,uint,32,2) = 0;

9625

+int VECT_VAR(expected_cumulative_sat_0,uint,64,1) = 0;

9626

+int VECT_VAR(expected_cumulative_sat_0,int,8,16) = 0;

9627

+int VECT_VAR(expected_cumulative_sat_0,int,16,8) = 0;

9628

+int VECT_VAR(expected_cumulative_sat_0,int,32,4) = 0;

9629

+int VECT_VAR(expected_cumulative_sat_0,int,64,2) = 0;

9630

+int VECT_VAR(expected_cumulative_sat_0,uint,8,16) = 0;

9631

+int VECT_VAR(expected_cumulative_sat_0,uint,16,8) = 0;

9632

+int VECT_VAR(expected_cumulative_sat_0,uint,32,4) = 0;

9633

+int VECT_VAR(expected_cumulative_sat_0,uint,64,2) = 0;

9634

+

9635

+/* Expected results with input=0. */

9636

+VECT_VAR_DECL(expected_0,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9637

+ 0x0, 0x0, 0x0, 0x0 };

9638

+VECT_VAR_DECL(expected_0,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9639

+VECT_VAR_DECL(expected_0,int,32,2) [] = { 0x0, 0x0 };

9640

+VECT_VAR_DECL(expected_0,int,64,1) [] = { 0x0 };

9641

+VECT_VAR_DECL(expected_0,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9642

+ 0x0, 0x0, 0x0, 0x0 };

9643

+VECT_VAR_DECL(expected_0,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9644

+VECT_VAR_DECL(expected_0,uint,32,2) [] = { 0x0, 0x0 };

9645

+VECT_VAR_DECL(expected_0,uint,64,1) [] = { 0x0 };

9646

+VECT_VAR_DECL(expected_0,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

9647

+ 0x0, 0x0, 0x0, 0x0,

9648

+ 0x0, 0x0, 0x0, 0x0,

9649

+ 0x0, 0x0, 0x0, 0x0 };

9650

+VECT_VAR_DECL(expected_0,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

9651

+ 0x0, 0x0, 0x0, 0x0 };

9652

+VECT_VAR_DECL(expected_0,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9653

+VECT_VAR_DECL(expected_0,int,64,2) [] = { 0x0, 0x0 };

9654

+VECT_VAR_DECL(expected_0,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

9655

+ 0x0, 0x0, 0x0, 0x0,

9656

+ 0x0, 0x0, 0x0, 0x0,

9657

+ 0x0, 0x0, 0x0, 0x0 };

9658

+VECT_VAR_DECL(expected_0,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

9659

+ 0x0, 0x0, 0x0, 0x0 };

9660

+VECT_VAR_DECL(expected_0,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9661

+VECT_VAR_DECL(expected_0,uint,64,2) [] = { 0x0, 0x0 };

9662

+

9663

+/* Expected values of cumulative_saturation flag with input=0 and

9664

+ negative shift amount. */

9665

+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,8) = 0;

9666

+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,4) = 0;

9667

+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,2) = 0;

9668

+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,1) = 0;

9669

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,8) = 0;

9670

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,4) = 0;

9671

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,2) = 0;

9672

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,1) = 0;

9673

+int VECT_VAR(expected_cumulative_sat_0_neg,int,8,16) = 0;

9674

+int VECT_VAR(expected_cumulative_sat_0_neg,int,16,8) = 0;

9675

+int VECT_VAR(expected_cumulative_sat_0_neg,int,32,4) = 0;

9676

+int VECT_VAR(expected_cumulative_sat_0_neg,int,64,2) = 0;

9677

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,8,16) = 0;

9678

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,16,8) = 0;

9679

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,32,4) = 0;

9680

+int VECT_VAR(expected_cumulative_sat_0_neg,uint,64,2) = 0;

9681

+

9682

+/* Expected results with input=0 and negative shift amount. */

9683

+VECT_VAR_DECL(expected_0_neg,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9684

+ 0x0, 0x0, 0x0, 0x0 };

9685

+VECT_VAR_DECL(expected_0_neg,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9686

+VECT_VAR_DECL(expected_0_neg,int,32,2) [] = { 0x0, 0x0 };

9687

+VECT_VAR_DECL(expected_0_neg,int,64,1) [] = { 0x0 };

9688

+VECT_VAR_DECL(expected_0_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

9689

+ 0x0, 0x0, 0x0, 0x0 };

9690

+VECT_VAR_DECL(expected_0_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9691

+VECT_VAR_DECL(expected_0_neg,uint,32,2) [] = { 0x0, 0x0 };

9692

+VECT_VAR_DECL(expected_0_neg,uint,64,1) [] = { 0x0 };

9693

+VECT_VAR_DECL(expected_0_neg,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

9694

+ 0x0, 0x0, 0x0, 0x0,

9695

+ 0x0, 0x0, 0x0, 0x0,

9696

+ 0x0, 0x0, 0x0, 0x0 };

9697

+VECT_VAR_DECL(expected_0_neg,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

9698

+ 0x0, 0x0, 0x0, 0x0 };

9699

+VECT_VAR_DECL(expected_0_neg,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9700

+VECT_VAR_DECL(expected_0_neg,int,64,2) [] = { 0x0, 0x0 };

9701

+VECT_VAR_DECL(expected_0_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

9702

+ 0x0, 0x0, 0x0, 0x0,

9703

+ 0x0, 0x0, 0x0, 0x0,

9704

+ 0x0, 0x0, 0x0, 0x0 };

9705

+VECT_VAR_DECL(expected_0_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

9706

+ 0x0, 0x0, 0x0, 0x0 };

9707

+VECT_VAR_DECL(expected_0_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

9708

+VECT_VAR_DECL(expected_0_neg,uint,64,2) [] = { 0x0, 0x0 };

9709

+

9710

+/* Expected values of cumulative_saturation flag. */

9711

+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;

9712

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

9713

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

9714

+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;

9715

+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;

9716

+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;

9717

+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;

9718

+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;

9719

+int VECT_VAR(expected_cumulative_sat,int,8,16) = 1;

9720

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 1;

9721

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;

9722

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 1;

9723

+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;

9724

+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;

9725

+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;

9726

+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;

9727

+

9728

+/* Expected results. */

9729

+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,

9730

+ 0xe8, 0xea, 0xec, 0xee };

9731

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };

9732

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };

9733

+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffe };

9734

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9735

+ 0xff, 0xff, 0xff, 0xff };

9736

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

9737

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9738

+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffffffffffe };

9739

+VECT_VAR_DECL(expected,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,

9740

+ 0x80, 0x80, 0x80, 0x80,

9741

+ 0x80, 0x80, 0x80, 0x80,

9742

+ 0x80, 0x80, 0x80, 0x80 };

9743

+VECT_VAR_DECL(expected,int,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,

9744

+ 0x8000, 0x8000, 0x8000, 0x8000 };

9745

+VECT_VAR_DECL(expected,int,32,4) [] = { 0x80000000, 0x80000000,

9746

+ 0x80000000, 0x80000000 };

9747

+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8000000000000000,

9748

+ 0x8000000000000000 };

9749

+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

9750

+ 0xff, 0xff, 0xff, 0xff,

9751

+ 0xff, 0xff, 0xff, 0xff,

9752

+ 0xff, 0xff, 0xff, 0xff };

9753

+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

9754

+ 0xffff, 0xffff, 0xffff, 0xffff };

9755

+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,

9756

+ 0xffffffff, 0xffffffff };

9757

+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,

9758

+ 0xffffffffffffffff };

9759

+

9760

+/* Expected values of cumulative_sat_saturation flag with negative shift

9761

+ amount. */

9762

+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 0;

9763

+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 0;

9764

+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 0;

9765

+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 0;

9766

+int VECT_VAR(expected_cumulative_sat_neg,uint,8,8) = 0;

9767

+int VECT_VAR(expected_cumulative_sat_neg,uint,16,4) = 0;

9768

+int VECT_VAR(expected_cumulative_sat_neg,uint,32,2) = 0;

9769

+int VECT_VAR(expected_cumulative_sat_neg,uint,64,1) = 0;

9770

+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 0;

9771

+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 0;

9772

+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 0;

9773

+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 0;

9774

+int VECT_VAR(expected_cumulative_sat_neg,uint,8,16) = 0;

9775

+int VECT_VAR(expected_cumulative_sat_neg,uint,16,8) = 0;

9776

+int VECT_VAR(expected_cumulative_sat_neg,uint,32,4) = 0;

9777

+int VECT_VAR(expected_cumulative_sat_neg,uint,64,2) = 0;

9778

+

9779

+/* Expected results with negative shift amount. */

9780

+VECT_VAR_DECL(expected_neg,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,

9781

+ 0xfa, 0xfa, 0xfb, 0xfb };

9782

+VECT_VAR_DECL(expected_neg,int,16,4) [] = { 0xfffc, 0xfffc, 0xfffc, 0xfffc };

9783

+VECT_VAR_DECL(expected_neg,int,32,2) [] = { 0xfffffffe, 0xfffffffe };

9784

+VECT_VAR_DECL(expected_neg,int,64,1) [] = { 0xffffffffffffffff };

9785

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,

9786

+ 0x7a, 0x7a, 0x7b, 0x7b };

9787

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x3ffc, 0x3ffc, 0x3ffc, 0x3ffc };

9788

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x1ffffffe, 0x1ffffffe };

9789

+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0xfffffffffffffff };

9790

+VECT_VAR_DECL(expected_neg,int,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

9791

+ 0xff, 0xff, 0xff, 0xff,

9792

+ 0xff, 0xff, 0xff, 0xff,

9793

+ 0xff, 0xff, 0xff, 0xff };

9794

+VECT_VAR_DECL(expected_neg,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

9795

+ 0xffff, 0xffff, 0xffff, 0xffff };

9796

+VECT_VAR_DECL(expected_neg,int,32,4) [] = { 0xffffffff, 0xffffffff,

9797

+ 0xffffffff, 0xffffffff };

9798

+VECT_VAR_DECL(expected_neg,int,64,2) [] = { 0xffffffffffffffff,

9799

+ 0xffffffffffffffff };

9800

+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x1, 0x1, 0x1, 0x1,

9801

+ 0x1, 0x1, 0x1, 0x1,

9802

+ 0x1, 0x1, 0x1, 0x1,

9803

+ 0x1, 0x1, 0x1, 0x1 };

9804

+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x1f, 0x1f, 0x1f, 0x1f,

9805

+ 0x1f, 0x1f, 0x1f, 0x1f };

9806

+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x7ffff, 0x7ffff,

9807

+ 0x7ffff, 0x7ffff };

9808

+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0xfffffffffff, 0xfffffffffff };

9809

+

9810

+/* Expected values of cumulative_sat_saturation flag with negative

9811

+ input and large shift amount. */

9812

+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,8) = 1;

9813

+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,4) = 1;

9814

+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,2) = 1;

9815

+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,1) = 1;

9816

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,8) = 1;

9817

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,4) = 1;

9818

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,2) = 1;

9819

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,1) = 1;

9820

+int VECT_VAR(expected_cumulative_sat_neg_large,int,8,16) = 1;

9821

+int VECT_VAR(expected_cumulative_sat_neg_large,int,16,8) = 1;

9822

+int VECT_VAR(expected_cumulative_sat_neg_large,int,32,4) = 1;

9823

+int VECT_VAR(expected_cumulative_sat_neg_large,int,64,2) = 1;

9824

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,8,16) = 1;

9825

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,16,8) = 1;

9826

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,32,4) = 1;

9827

+int VECT_VAR(expected_cumulative_sat_neg_large,uint,64,2) = 1;

9828

+

9829

+/* Expected results with negative input and large shift amount. */

9830

+VECT_VAR_DECL(expected_neg_large,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,

9831

+ 0x80, 0x80, 0x80, 0x80 };

9832

+VECT_VAR_DECL(expected_neg_large,int,16,4) [] = { 0x8000, 0x8000,

9833

+ 0x8000, 0x8000 };

9834

+VECT_VAR_DECL(expected_neg_large,int,32,2) [] = { 0x80000000, 0x80000000 };

9835

+VECT_VAR_DECL(expected_neg_large,int,64,1) [] = { 0x8000000000000000 };

9836

+VECT_VAR_DECL(expected_neg_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9837

+ 0xff, 0xff, 0xff, 0xff };

9838

+VECT_VAR_DECL(expected_neg_large,uint,16,4) [] = { 0xffff, 0xffff,

9839

+ 0xffff, 0xffff };

9840

+VECT_VAR_DECL(expected_neg_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9841

+VECT_VAR_DECL(expected_neg_large,uint,64,1) [] = { 0xffffffffffffffff };

9842

+VECT_VAR_DECL(expected_neg_large,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,

9843

+ 0x80, 0x80, 0x80, 0x80,

9844

+ 0x80, 0x80, 0x80, 0x80,

9845

+ 0x80, 0x80, 0x80, 0x80 };

9846

+VECT_VAR_DECL(expected_neg_large,int,16,8) [] = { 0x8000, 0x8000,

9847

+ 0x8000, 0x8000,

9848

+ 0x8000, 0x8000,

9849

+ 0x8000, 0x8000 };

9850

+VECT_VAR_DECL(expected_neg_large,int,32,4) [] = { 0x80000000, 0x80000000,

9851

+ 0x80000000, 0x80000000 };

9852

+VECT_VAR_DECL(expected_neg_large,int,64,2) [] = { 0x8000000000000000,

9853

+ 0x8000000000000000 };

9854

+VECT_VAR_DECL(expected_neg_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

9855

+ 0xff, 0xff, 0xff, 0xff,

9856

+ 0xff, 0xff, 0xff, 0xff,

9857

+ 0xff, 0xff, 0xff, 0xff };

9858

+VECT_VAR_DECL(expected_neg_large,uint,16,8) [] = { 0xffff, 0xffff,

9859

+ 0xffff, 0xffff,

9860

+ 0xffff, 0xffff,

9861

+ 0xffff, 0xffff };

9862

+VECT_VAR_DECL(expected_neg_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,

9863

+ 0xffffffff, 0xffffffff };

9864

+VECT_VAR_DECL(expected_neg_large,uint,64,2) [] = { 0xffffffffffffffff,

9865

+ 0xffffffffffffffff };

9866

+

9867

+/* Expected values of cumulative_sat_saturation flag with max input

9868

+ and shift by -1. */

9869

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,8) = 0;

9870

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,4) = 0;

9871

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,2) = 0;

9872

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,1) = 0;

9873

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,8) = 0;

9874

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,4) = 0;

9875

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,2) = 0;

9876

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,1) = 0;

9877

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,8,16) = 0;

9878

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,16,8) = 0;

9879

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,32,4) = 0;

9880

+int VECT_VAR(expected_cumulative_sat_max_minus1,int,64,2) = 0;

9881

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,8,16) = 0;

9882

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,16,8) = 0;

9883

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,32,4) = 0;

9884

+int VECT_VAR(expected_cumulative_sat_max_minus1,uint,64,2) = 0;

9885

+

9886

+/* Expected results with max input and shift by -1. */

9887

+VECT_VAR_DECL(expected_max_minus1,int,8,8) [] = { 0x3f, 0x3f, 0x3f, 0x3f,

9888

+ 0x3f, 0x3f, 0x3f, 0x3f };

9889

+VECT_VAR_DECL(expected_max_minus1,int,16,4) [] = { 0x3fff, 0x3fff,

9890

+ 0x3fff, 0x3fff };

9891

+VECT_VAR_DECL(expected_max_minus1,int,32,2) [] = { 0x3fffffff, 0x3fffffff };

9892

+VECT_VAR_DECL(expected_max_minus1,int,64,1) [] = { 0x3fffffffffffffff };

9893

+VECT_VAR_DECL(expected_max_minus1,uint,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9894

+ 0x7f, 0x7f, 0x7f, 0x7f };

9895

+VECT_VAR_DECL(expected_max_minus1,uint,16,4) [] = { 0x7fff, 0x7fff,

9896

+ 0x7fff, 0x7fff };

9897

+VECT_VAR_DECL(expected_max_minus1,uint,32,2) [] = { 0x7fffffff, 0x7fffffff };

9898

+VECT_VAR_DECL(expected_max_minus1,uint,64,1) [] = { 0x7fffffffffffffff };

9899

+VECT_VAR_DECL(expected_max_minus1,int,8,16) [] = { 0x3f, 0x3f, 0x3f, 0x3f,

9900

+ 0x3f, 0x3f, 0x3f, 0x3f,

9901

+ 0x3f, 0x3f, 0x3f, 0x3f,

9902

+ 0x3f, 0x3f, 0x3f, 0x3f };

9903

+VECT_VAR_DECL(expected_max_minus1,int,16,8) [] = { 0x3fff, 0x3fff,

9904

+ 0x3fff, 0x3fff,

9905

+ 0x3fff, 0x3fff,

9906

+ 0x3fff, 0x3fff };

9907

+VECT_VAR_DECL(expected_max_minus1,int,32,4) [] = { 0x3fffffff, 0x3fffffff,

9908

+ 0x3fffffff, 0x3fffffff };

9909

+VECT_VAR_DECL(expected_max_minus1,int,64,2) [] = { 0x3fffffffffffffff,

9910

+ 0x3fffffffffffffff };

9911

+VECT_VAR_DECL(expected_max_minus1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9912

+ 0x7f, 0x7f, 0x7f, 0x7f,

9913

+ 0x7f, 0x7f, 0x7f, 0x7f,

9914

+ 0x7f, 0x7f, 0x7f, 0x7f };

9915

+VECT_VAR_DECL(expected_max_minus1,uint,16,8) [] = { 0x7fff, 0x7fff,

9916

+ 0x7fff, 0x7fff,

9917

+ 0x7fff, 0x7fff,

9918

+ 0x7fff, 0x7fff };

9919

+VECT_VAR_DECL(expected_max_minus1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,

9920

+ 0x7fffffff, 0x7fffffff };

9921

+VECT_VAR_DECL(expected_max_minus1,uint,64,2) [] = { 0x7fffffffffffffff,

9922

+ 0x7fffffffffffffff };

9923

+

9924

+/* Expected values of cumulative_sat_saturation flag with max input

9925

+ and large shift amount. */

9926

+int VECT_VAR(expected_cumulative_sat_max_large,int,8,8) = 1;

9927

+int VECT_VAR(expected_cumulative_sat_max_large,int,16,4) = 1;

9928

+int VECT_VAR(expected_cumulative_sat_max_large,int,32,2) = 1;

9929

+int VECT_VAR(expected_cumulative_sat_max_large,int,64,1) = 1;

9930

+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,8) = 1;

9931

+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,4) = 1;

9932

+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,2) = 1;

9933

+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,1) = 1;

9934

+int VECT_VAR(expected_cumulative_sat_max_large,int,8,16) = 1;

9935

+int VECT_VAR(expected_cumulative_sat_max_large,int,16,8) = 1;

9936

+int VECT_VAR(expected_cumulative_sat_max_large,int,32,4) = 1;

9937

+int VECT_VAR(expected_cumulative_sat_max_large,int,64,2) = 1;

9938

+int VECT_VAR(expected_cumulative_sat_max_large,uint,8,16) = 1;

9939

+int VECT_VAR(expected_cumulative_sat_max_large,uint,16,8) = 1;

9940

+int VECT_VAR(expected_cumulative_sat_max_large,uint,32,4) = 1;

9941

+int VECT_VAR(expected_cumulative_sat_max_large,uint,64,2) = 1;

9942

+

9943

+/* Expected results with max input and large shift amount. */

9944

+VECT_VAR_DECL(expected_max_large,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9945

+ 0x7f, 0x7f, 0x7f, 0x7f };

9946

+VECT_VAR_DECL(expected_max_large,int,16,4) [] = { 0x7fff, 0x7fff,

9947

+ 0x7fff, 0x7fff };

9948

+VECT_VAR_DECL(expected_max_large,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

9949

+VECT_VAR_DECL(expected_max_large,int,64,1) [] = { 0x7fffffffffffffff };

9950

+VECT_VAR_DECL(expected_max_large,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

9951

+ 0xff, 0xff, 0xff, 0xff };

9952

+VECT_VAR_DECL(expected_max_large,uint,16,4) [] = { 0xffff, 0xffff,

9953

+ 0xffff, 0xffff };

9954

+VECT_VAR_DECL(expected_max_large,uint,32,2) [] = { 0xffffffff, 0xffffffff };

9955

+VECT_VAR_DECL(expected_max_large,uint,64,1) [] = { 0xffffffffffffffff };

9956

+VECT_VAR_DECL(expected_max_large,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

9957

+ 0x7f, 0x7f, 0x7f, 0x7f,

9958

+ 0x7f, 0x7f, 0x7f, 0x7f,

9959

+ 0x7f, 0x7f, 0x7f, 0x7f };

9960

+VECT_VAR_DECL(expected_max_large,int,16,8) [] = { 0x7fff, 0x7fff,

9961

+ 0x7fff, 0x7fff,

9962

+ 0x7fff, 0x7fff,

9963

+ 0x7fff, 0x7fff };

9964

+VECT_VAR_DECL(expected_max_large,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

9965

+ 0x7fffffff, 0x7fffffff };

9966

+VECT_VAR_DECL(expected_max_large,int,64,2) [] = { 0x7fffffffffffffff,

9967

+ 0x7fffffffffffffff };

9968

+VECT_VAR_DECL(expected_max_large,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

9969

+ 0xff, 0xff, 0xff, 0xff,

9970

+ 0xff, 0xff, 0xff, 0xff,

9971

+ 0xff, 0xff, 0xff, 0xff };

9972

+VECT_VAR_DECL(expected_max_large,uint,16,8) [] = { 0xffff, 0xffff,

9973

+ 0xffff, 0xffff,

9974

+ 0xffff, 0xffff,

9975

+ 0xffff, 0xffff };

9976

+VECT_VAR_DECL(expected_max_large,uint,32,4) [] = { 0xffffffff, 0xffffffff,

9977

+ 0xffffffff, 0xffffffff };

9978

+VECT_VAR_DECL(expected_max_large,uint,64,2) [] = { 0xffffffffffffffff,

9979

+ 0xffffffffffffffff };

9980

+

9981

+/* Expected values of cumulative_sat_saturation flag with saturation

9982

+ on 64-bits values. */

9983

+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 1;

9984

+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 1;

9985

+

9986

+/* Expected results with saturation on 64-bits values.. */

9987

+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0x8000000000000000 };

9988

+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0x7fffffffffffffff,

9989

+ 0x7fffffffffffffff };

9990

+

9991

+#define INSN vqshl

9992

+#define TEST_MSG "VQSHL/VQSHLQ"

9993

+

9994

+#define FNNAME1(NAME) void exec_ ## NAME (void)

9995

+#define FNNAME(NAME) FNNAME1(NAME)

9996

+

9997

+FNNAME (INSN)

9998

+{

9999

+ /* Basic test: v3=vqshl(v1,v2), then store the result. */

10000

+#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

10001

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

10002

+ VECT_VAR(vector_res, T1, W, N) = \

10003

+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \

10004

+ VECT_VAR(vector_shift, T3, W, N)); \

10005

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

10006

+ VECT_VAR(vector_res, T1, W, N)); \

10007

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10008

+

10009

+ /* Two auxliary macros are necessary to expand INSN */

10010

+#define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

10011

+ TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10012

+

10013

+#define TEST_VQSHL(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

10014

+ TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10015

+

10016

+

10017

+ DECL_VARIABLE_ALL_VARIANTS(vector);

10018

+ DECL_VARIABLE_ALL_VARIANTS(vector_res);

10019

+

10020

+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);

10021

+

10022

+ clean_results ();

10023

+

10024

+ /* Fill input vector with 0, to check saturation on limits. */

10025

+ VDUP(vector, , int, s, 8, 8, 0);

10026

+ VDUP(vector, , int, s, 16, 4, 0);

10027

+ VDUP(vector, , int, s, 32, 2, 0);

10028

+ VDUP(vector, , int, s, 64, 1, 0);

10029

+ VDUP(vector, , uint, u, 8, 8, 0);

10030

+ VDUP(vector, , uint, u, 16, 4, 0);

10031

+ VDUP(vector, , uint, u, 32, 2, 0);

10032

+ VDUP(vector, , uint, u, 64, 1, 0);

10033

+ VDUP(vector, q, int, s, 8, 16, 0);

10034

+ VDUP(vector, q, int, s, 16, 8, 0);

10035

+ VDUP(vector, q, int, s, 32, 4, 0);

10036

+ VDUP(vector, q, int, s, 64, 2, 0);

10037

+ VDUP(vector, q, uint, u, 8, 16, 0);

10038

+ VDUP(vector, q, uint, u, 16, 8, 0);

10039

+ VDUP(vector, q, uint, u, 32, 4, 0);

10040

+ VDUP(vector, q, uint, u, 64, 2, 0);

10041

+

10042

+ /* Choose init value arbitrarily, will be used as shift amount */

10043

+ /* Use values equal or one-less-than the type width to check

10044

+ behaviour on limits. */

10045

+

10046

+ /* 64-bits vectors first. */

10047

+ /* Shift 8-bits lanes by 7... */

10048

+ VDUP(vector_shift, , int, s, 8, 8, 7);

10049

+ /* ... except: lane 0 (by 6), lane 1 (by 8) and lane 2 (by 9). */

10050

+ VSET_LANE(vector_shift, , int, s, 8, 8, 0, 6);

10051

+ VSET_LANE(vector_shift, , int, s, 8, 8, 1, 8);

10052

+ VSET_LANE(vector_shift, , int, s, 8, 8, 2, 9);

10053

+

10054

+ /* Shift 16-bits lanes by 15... */

10055

+ VDUP(vector_shift, , int, s, 16, 4, 15);

10056

+ /* ... except: lane 0 (by 14), lane 1 (by 16), and lane 2 (by 17). */

10057

+ VSET_LANE(vector_shift, , int, s, 16, 4, 0, 14);

10058

+ VSET_LANE(vector_shift, , int, s, 16, 4, 1, 16);

10059

+ VSET_LANE(vector_shift, , int, s, 16, 4, 2, 17);

10060

+

10061

+ /* Shift 32-bits lanes by 31... */

10062

+ VDUP(vector_shift, , int, s, 32, 2, 31);

10063

+ /* ... except lane 1 (by 30). */

10064

+ VSET_LANE(vector_shift, , int, s, 32, 2, 1, 30);

10065

+

10066

+ /* Shift 64 bits lane by 63. */

10067

+ VDUP(vector_shift, , int, s, 64, 1, 63);

10068

+

10069

+ /* 128-bits vectors. */

10070

+ /* Shift 8-bits lanes by 8. */

10071

+ VDUP(vector_shift, q, int, s, 8, 16, 8);

10072

+ /* Shift 16-bits lanes by 16. */

10073

+ VDUP(vector_shift, q, int, s, 16, 8, 16);

10074

+ /* Shift 32-bits lanes by 32... */

10075

+ VDUP(vector_shift, q, int, s, 32, 4, 32);

10076

+ /* ... except lane 1 (by 33). */

10077

+ VSET_LANE(vector_shift, q, int, s, 32, 4, 1, 33);

10078

+

10079

+ /* Shift 64-bits lanes by 64... */

10080

+ VDUP(vector_shift, q, int, s, 64, 2, 64);

10081

+ /* ... except lane 1 (by 62). */

10082

+ VSET_LANE(vector_shift, q, int, s, 64, 2, 1, 62);

10083

+

10084

+#define CMT " (with input = 0)"

10085

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0, CMT);

10086

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0, CMT);

10087

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0, CMT);

10088

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0, CMT);

10089

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0, CMT);

10090

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0, CMT);

10091

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0, CMT);

10092

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0, CMT);

10093

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0, CMT);

10094

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0, CMT);

10095

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0, CMT);

10096

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0, CMT);

10097

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0, CMT);

10098

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0, CMT);

10099

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0, CMT);

10100

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0, CMT);

10101

+

10102

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0, CMT);

10103

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0, CMT);

10104

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0, CMT);

10105

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0, CMT);

10106

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0, CMT);

10107

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0, CMT);

10108

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0, CMT);

10109

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0, CMT);

10110

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0, CMT);

10111

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0, CMT);

10112

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0, CMT);

10113

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0, CMT);

10114

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0, CMT);

10115

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0, CMT);

10116

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0, CMT);

10117

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0, CMT);

10118

+

10119

+

10120

+ /* Use negative shift amounts */

10121

+ VDUP(vector_shift, , int, s, 8, 8, -1);

10122

+ VDUP(vector_shift, , int, s, 16, 4, -2);

10123

+ VDUP(vector_shift, , int, s, 32, 2, -3);

10124

+ VDUP(vector_shift, , int, s, 64, 1, -4);

10125

+ VDUP(vector_shift, q, int, s, 8, 16, -7);

10126

+ VDUP(vector_shift, q, int, s, 16, 8, -11);

10127

+ VDUP(vector_shift, q, int, s, 32, 4, -13);

10128

+ VDUP(vector_shift, q, int, s, 64, 2, -20);

10129

+

10130

+#undef CMT

10131

+#define CMT " (input 0 and negative shift amount)"

10132

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_0_neg, CMT);

10133

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_0_neg, CMT);

10134

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_0_neg, CMT);

10135

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_0_neg, CMT);

10136

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_0_neg, CMT);

10137

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_0_neg, CMT);

10138

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_0_neg, CMT);

10139

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_0_neg, CMT);

10140

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_0_neg, CMT);

10141

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_0_neg, CMT);

10142

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_0_neg, CMT);

10143

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_0_neg, CMT);

10144

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_0_neg, CMT);

10145

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_0_neg, CMT);

10146

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_0_neg, CMT);

10147

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_0_neg, CMT);

10148

+

10149

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_0_neg, CMT);

10150

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_0_neg, CMT);

10151

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_0_neg, CMT);

10152

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_0_neg, CMT);

10153

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_0_neg, CMT);

10154

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_0_neg, CMT);

10155

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_0_neg, CMT);

10156

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_0_neg, CMT);

10157

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_0_neg, CMT);

10158

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_0_neg, CMT);

10159

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_0_neg, CMT);

10160

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_0_neg, CMT);

10161

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_0_neg, CMT);

10162

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_0_neg, CMT);

10163

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_0_neg, CMT);

10164

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_0_neg, CMT);

10165

+

10166

+ /* Test again, with predefined input values. */

10167

+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);

10168

+

10169

+ /* Choose init value arbitrarily, will be used as shift amount. */

10170

+ VDUP(vector_shift, , int, s, 8, 8, 1);

10171

+ VDUP(vector_shift, , int, s, 16, 4, 3);

10172

+ VDUP(vector_shift, , int, s, 32, 2, 8);

10173

+ VDUP(vector_shift, , int, s, 64, 1, -3);

10174

+ VDUP(vector_shift, q, int, s, 8, 16, 10);

10175

+ VDUP(vector_shift, q, int, s, 16, 8, 12);

10176

+ VDUP(vector_shift, q, int, s, 32, 4, 32);

10177

+ VDUP(vector_shift, q, int, s, 64, 2, 63);

10178

+

10179

+#undef CMT

10180

+#define CMT ""

10181

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat, CMT);

10182

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat, CMT);

10183

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat, CMT);

10184

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat, CMT);

10185

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat, CMT);

10186

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat, CMT);

10187

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat, CMT);

10188

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat, CMT);

10189

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat, CMT);

10190

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat, CMT);

10191

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat, CMT);

10192

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat, CMT);

10193

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat, CMT);

10194

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat, CMT);

10195

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat, CMT);

10196

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat, CMT);

10197

+

10198

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

10199

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

10200

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

10201

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);

10202

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

10203

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

10204

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

10205

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);

10206

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);

10207

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

10208

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

10209

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);

10210

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);

10211

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);

10212

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);

10213

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);

10214

+

10215

+

10216

+ /* Use negative shift amounts */

10217

+ VDUP(vector_shift, , int, s, 8, 8, -1);

10218

+ VDUP(vector_shift, , int, s, 16, 4, -2);

10219

+ VDUP(vector_shift, , int, s, 32, 2, -3);

10220

+ VDUP(vector_shift, , int, s, 64, 1, -4);

10221

+ VDUP(vector_shift, q, int, s, 8, 16, -7);

10222

+ VDUP(vector_shift, q, int, s, 16, 8, -11);

10223

+ VDUP(vector_shift, q, int, s, 32, 4, -13);

10224

+ VDUP(vector_shift, q, int, s, 64, 2, -20);

10225

+

10226

+#undef CMT

10227

+#define CMT " (negative shift amount)"

10228

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg, CMT);

10229

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg, CMT);

10230

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg, CMT);

10231

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg, CMT);

10232

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg, CMT);

10233

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg, CMT);

10234

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg, CMT);

10235

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg, CMT);

10236

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg, CMT);

10237

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg, CMT);

10238

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg, CMT);

10239

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg, CMT);

10240

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg, CMT);

10241

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg, CMT);

10242

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg, CMT);

10243

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg, CMT);

10244

+

10245

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg, CMT);

10246

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg, CMT);

10247

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg, CMT);

10248

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg, CMT);

10249

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

10250

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

10251

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

10252

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);

10253

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg, CMT);

10254

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg, CMT);

10255

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg, CMT);

10256

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg, CMT);

10257

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);

10258

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);

10259

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);

10260

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);

10261

+

10262

+

10263

+ /* Use large shift amounts. */

10264

+ VDUP(vector_shift, , int, s, 8, 8, 8);

10265

+ VDUP(vector_shift, , int, s, 16, 4, 16);

10266

+ VDUP(vector_shift, , int, s, 32, 2, 32);

10267

+ VDUP(vector_shift, , int, s, 64, 1, 64);

10268

+ VDUP(vector_shift, q, int, s, 8, 16, 8);

10269

+ VDUP(vector_shift, q, int, s, 16, 8, 16);

10270

+ VDUP(vector_shift, q, int, s, 32, 4, 32);

10271

+ VDUP(vector_shift, q, int, s, 64, 2, 64);

10272

+

10273

+#undef CMT

10274

+#define CMT " (large shift amount, negative input)"

10275

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_neg_large, CMT);

10276

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_neg_large, CMT);

10277

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_neg_large, CMT);

10278

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_neg_large, CMT);

10279

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_neg_large, CMT);

10280

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_neg_large, CMT);

10281

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_neg_large, CMT);

10282

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_neg_large, CMT);

10283

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_neg_large, CMT);

10284

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_neg_large, CMT);

10285

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_neg_large, CMT);

10286

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_neg_large, CMT);

10287

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_neg_large, CMT);

10288

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_neg_large, CMT);

10289

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_neg_large, CMT);

10290

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_neg_large, CMT);

10291

+

10292

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_neg_large, CMT);

10293

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_neg_large, CMT);

10294

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_neg_large, CMT);

10295

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_neg_large, CMT);

10296

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg_large, CMT);

10297

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg_large, CMT);

10298

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg_large, CMT);

10299

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg_large, CMT);

10300

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_neg_large, CMT);

10301

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_neg_large, CMT);

10302

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_neg_large, CMT);

10303

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_neg_large, CMT);

10304

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg_large, CMT);

10305

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg_large, CMT);

10306

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg_large, CMT);

10307

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg_large, CMT);

10308

+

10309

+

10310

+ /* Fill input vector with max value, to check saturation on limits */

10311

+ VDUP(vector, , int, s, 8, 8, 0x7F);

10312

+ VDUP(vector, , int, s, 16, 4, 0x7FFF);

10313

+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);

10314

+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);

10315

+ VDUP(vector, , uint, u, 8, 8, 0xFF);

10316

+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);

10317

+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);

10318

+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);

10319

+ VDUP(vector, q, int, s, 8, 16, 0x7F);

10320

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

10321

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

10322

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

10323

+ VDUP(vector, q, uint, u, 8, 16, 0xFF);

10324

+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);

10325

+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);

10326

+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);

10327

+

10328

+ /* Shift by -1 */

10329

+ VDUP(vector_shift, , int, s, 8, 8, -1);

10330

+ VDUP(vector_shift, , int, s, 16, 4, -1);

10331

+ VDUP(vector_shift, , int, s, 32, 2, -1);

10332

+ VDUP(vector_shift, , int, s, 64, 1, -1);

10333

+ VDUP(vector_shift, q, int, s, 8, 16, -1);

10334

+ VDUP(vector_shift, q, int, s, 16, 8, -1);

10335

+ VDUP(vector_shift, q, int, s, 32, 4, -1);

10336

+ VDUP(vector_shift, q, int, s, 64, 2, -1);

10337

+

10338

+#undef CMT

10339

+#define CMT " (max input, shift by -1)"

10340

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_minus1, CMT);

10341

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_minus1, CMT);

10342

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_minus1, CMT);

10343

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_minus1, CMT);

10344

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_minus1, CMT);

10345

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_minus1, CMT);

10346

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_minus1, CMT);

10347

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_minus1, CMT);

10348

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_minus1, CMT);

10349

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_minus1, CMT);

10350

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_minus1, CMT);

10351

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_minus1, CMT);

10352

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_minus1, CMT);

10353

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_minus1, CMT);

10354

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_minus1, CMT);

10355

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_minus1, CMT);

10356

+

10357

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_minus1, CMT);

10358

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_minus1, CMT);

10359

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_minus1, CMT);

10360

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_minus1, CMT);

10361

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_minus1, CMT);

10362

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_minus1, CMT);

10363

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_minus1, CMT);

10364

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_minus1, CMT);

10365

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_minus1, CMT);

10366

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_minus1, CMT);

10367

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_minus1, CMT);

10368

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_minus1, CMT);

10369

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_minus1, CMT);

10370

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_minus1, CMT);

10371

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_minus1, CMT);

10372

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_minus1, CMT);

10373

+

10374

+

10375

+ /* Use large shift amounts */

10376

+ VDUP(vector_shift, , int, s, 8, 8, 8);

10377

+ VDUP(vector_shift, , int, s, 16, 4, 16);

10378

+ VDUP(vector_shift, , int, s, 32, 2, 32);

10379

+ VDUP(vector_shift, , int, s, 64, 1, 64);

10380

+ VDUP(vector_shift, q, int, s, 8, 16, 8);

10381

+ VDUP(vector_shift, q, int, s, 16, 8, 16);

10382

+ VDUP(vector_shift, q, int, s, 32, 4, 32);

10383

+ VDUP(vector_shift, q, int, s, 64, 2, 64);

10384

+

10385

+#undef CMT

10386

+#define CMT " (max input, large shift amount)"

10387

+ TEST_VQSHL(int, , int, s, 8, 8, expected_cumulative_sat_max_large, CMT);

10388

+ TEST_VQSHL(int, , int, s, 16, 4, expected_cumulative_sat_max_large, CMT);

10389

+ TEST_VQSHL(int, , int, s, 32, 2, expected_cumulative_sat_max_large, CMT);

10390

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_max_large, CMT);

10391

+ TEST_VQSHL(int, , uint, u, 8, 8, expected_cumulative_sat_max_large, CMT);

10392

+ TEST_VQSHL(int, , uint, u, 16, 4, expected_cumulative_sat_max_large, CMT);

10393

+ TEST_VQSHL(int, , uint, u, 32, 2, expected_cumulative_sat_max_large, CMT);

10394

+ TEST_VQSHL(int, , uint, u, 64, 1, expected_cumulative_sat_max_large, CMT);

10395

+ TEST_VQSHL(int, q, int, s, 8, 16, expected_cumulative_sat_max_large, CMT);

10396

+ TEST_VQSHL(int, q, int, s, 16, 8, expected_cumulative_sat_max_large, CMT);

10397

+ TEST_VQSHL(int, q, int, s, 32, 4, expected_cumulative_sat_max_large, CMT);

10398

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_max_large, CMT);

10399

+ TEST_VQSHL(int, q, uint, u, 8, 16, expected_cumulative_sat_max_large, CMT);

10400

+ TEST_VQSHL(int, q, uint, u, 16, 8, expected_cumulative_sat_max_large, CMT);

10401

+ TEST_VQSHL(int, q, uint, u, 32, 4, expected_cumulative_sat_max_large, CMT);

10402

+ TEST_VQSHL(int, q, uint, u, 64, 2, expected_cumulative_sat_max_large, CMT);

10403

+

10404

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_large, CMT);

10405

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_large, CMT);

10406

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_large, CMT);

10407

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max_large, CMT);

10408

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_large, CMT);

10409

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_large, CMT);

10410

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_large, CMT);

10411

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_large, CMT);

10412

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_large, CMT);

10413

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_large, CMT);

10414

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_large, CMT);

10415

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_large, CMT);

10416

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_large, CMT);

10417

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_large, CMT);

10418

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_large, CMT);

10419

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_large, CMT);

10420

+

10421

+

10422

+ /* Check 64 bits saturation. */

10423

+ VDUP(vector, , int, s, 64, 1, -10);

10424

+ VDUP(vector_shift, , int, s, 64, 1, 64);

10425

+ VDUP(vector, q, int, s, 64, 2, 10);

10426

+ VDUP(vector_shift, q, int, s, 64, 2, 64);

10427

+

10428

+#undef CMT

10429

+#define CMT " (check saturation on 64 bits)"

10430

+ TEST_VQSHL(int, , int, s, 64, 1, expected_cumulative_sat_64, CMT);

10431

+ TEST_VQSHL(int, q, int, s, 64, 2, expected_cumulative_sat_64, CMT);

10432

+

10433

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, CMT);

10434

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, CMT);

10435

+}

10436

+

10437

+int main (void)

10438

+{

10439

+ exec_vqshl ();

10440

+ return 0;

10441

+}

10442

--- a/src//dev/null

10443

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshl_n.c

10444

@@ -0,0 +1,234 @@

10445

+#include <arm_neon.h>

10446

+#include "arm-neon-ref.h"

10447

+#include "compute-ref-data.h"

10448

+

10449

+/* Expected values of cumulative_saturation flag. */

10450

+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;

10451

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

10452

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

10453

+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;

10454

+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;

10455

+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;

10456

+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;

10457

+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;

10458

+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;

10459

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

10460

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

10461

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

10462

+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;

10463

+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;

10464

+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;

10465

+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;

10466

+

10467

+/* Expected results. */

10468

+VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,

10469

+ 0xd0, 0xd4, 0xd8, 0xdc };

10470

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };

10471

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 };

10472

+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };

10473

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

10474

+ 0xff, 0xff, 0xff, 0xff };

10475

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

10476

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };

10477

+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };

10478

+VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,

10479

+ 0xd0, 0xd4, 0xd8, 0xdc,

10480

+ 0xe0, 0xe4, 0xe8, 0xec,

10481

+ 0xf0, 0xf4, 0xf8, 0xfc };

10482

+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,

10483

+ 0xffe8, 0xffea, 0xffec, 0xffee };

10484

+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,

10485

+ 0xffffffe4, 0xffffffe6 };

10486

+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };

10487

+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

10488

+ 0xff, 0xff, 0xff, 0xff,

10489

+ 0xff, 0xff, 0xff, 0xff,

10490

+ 0xff, 0xff, 0xff, 0xff };

10491

+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

10492

+ 0xffff, 0xffff, 0xffff, 0xffff };

10493

+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,

10494

+ 0xffffffff, 0xffffffff };

10495

+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,

10496

+ 0xffffffffffffffff };

10497

+

10498

+/* Expected values of cumulative_saturation flag with max positive input. */

10499

+int VECT_VAR(expected_cumulative_sat_max,int,8,8) = 1;

10500

+int VECT_VAR(expected_cumulative_sat_max,int,16,4) = 1;

10501

+int VECT_VAR(expected_cumulative_sat_max,int,32,2) = 1;

10502

+int VECT_VAR(expected_cumulative_sat_max,int,64,1) = 1;

10503

+int VECT_VAR(expected_cumulative_sat_max,uint,8,8) = 1;

10504

+int VECT_VAR(expected_cumulative_sat_max,uint,16,4) = 1;

10505

+int VECT_VAR(expected_cumulative_sat_max,uint,32,2) = 1;

10506

+int VECT_VAR(expected_cumulative_sat_max,uint,64,1) = 1;

10507

+int VECT_VAR(expected_cumulative_sat_max,int,8,16) = 1;

10508

+int VECT_VAR(expected_cumulative_sat_max,int,16,8) = 1;

10509

+int VECT_VAR(expected_cumulative_sat_max,int,32,4) = 1;

10510

+int VECT_VAR(expected_cumulative_sat_max,int,64,2) = 1;

10511

+int VECT_VAR(expected_cumulative_sat_max,uint,8,16) = 1;

10512

+int VECT_VAR(expected_cumulative_sat_max,uint,16,8) = 1;

10513

+int VECT_VAR(expected_cumulative_sat_max,uint,32,4) = 1;

10514

+int VECT_VAR(expected_cumulative_sat_max,uint,64,2) = 1;

10515

+

10516

+/* Expected results with max positive input. */

10517

+VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

10518

+ 0x7f, 0x7f, 0x7f, 0x7f };

10519

+VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

10520

+VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

10521

+VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff };

10522

+VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

10523

+ 0xff, 0xff, 0xff, 0xff };

10524

+VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

10525

+VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff };

10526

+VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff };

10527

+VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

10528

+ 0x7f, 0x7f, 0x7f, 0x7f,

10529

+ 0x7f, 0x7f, 0x7f, 0x7f,

10530

+ 0x7f, 0x7f, 0x7f, 0x7f };

10531

+VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,

10532

+ 0x7fff, 0x7fff, 0x7fff, 0x7fff };

10533

+VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff,

10534

+ 0x7fffffff, 0x7fffffff };

10535

+VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff,

10536

+ 0x7fffffffffffffff };

10537

+VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

10538

+ 0xff, 0xff, 0xff, 0xff,

10539

+ 0xff, 0xff, 0xff, 0xff,

10540

+ 0xff, 0xff, 0xff, 0xff };

10541

+VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

10542

+ 0xffff, 0xffff, 0xffff, 0xffff };

10543

+VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff,

10544

+ 0xffffffff, 0xffffffff };

10545

+VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff,

10546

+ 0xffffffffffffffff };

10547

+

10548

+#define INSN vqshl

10549

+#define TEST_MSG "VQSHL_N/VQSHLQ_N"

10550

+

10551

+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)

10552

+#define FNNAME(NAME) FNNAME1(NAME)

10553

+

10554

+FNNAME (INSN)

10555

+{

10556

+ /* Basic test: v2=vqshl_n(v1,v), then store the result. */

10557

+#define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

10558

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \

10559

+ VECT_VAR(vector_res, T1, W, N) = \

10560

+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \

10561

+ V); \

10562

+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \

10563

+ VECT_VAR(vector_res, T1, W, N)); \

10564

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10565

+

10566

+ /* Two auxliary macros are necessary to expand INSN */

10567

+#define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

10568

+ TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10569

+

10570

+#define TEST_VQSHL_N(T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \

10571

+ TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10572

+

10573

+ DECL_VARIABLE_ALL_VARIANTS(vector);

10574

+ DECL_VARIABLE_ALL_VARIANTS(vector_res);

10575

+

10576

+ clean_results ();

10577

+

10578

+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);

10579

+

10580

+ /* Choose shift amount arbitrarily. */

10581

+#define CMT ""

10582

+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat, CMT);

10583

+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat, CMT);

10584

+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat, CMT);

10585

+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat, CMT);

10586

+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat, CMT);

10587

+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);

10588

+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);

10589

+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat, CMT);

10590

+

10591

+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat, CMT);

10592

+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat, CMT);

10593

+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat, CMT);

10594

+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat, CMT);

10595

+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat, CMT);

10596

+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat, CMT);

10597

+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat, CMT);

10598

+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat, CMT);

10599

+

10600

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

10601

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

10602

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

10603

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);

10604

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

10605

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

10606

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

10607

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);

10608

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);

10609

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);

10610

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);

10611

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);

10612

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);

10613

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);

10614

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);

10615

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);

10616

+

10617

+

10618

+ /* Fill input vector with max value, to check saturation on limits. */

10619

+ VDUP(vector, , int, s, 8, 8, 0x7F);

10620

+ VDUP(vector, , int, s, 16, 4, 0x7FFF);

10621

+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);

10622

+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);

10623

+ VDUP(vector, , uint, u, 8, 8, 0xFF);

10624

+ VDUP(vector, , uint, u, 16, 4, 0xFFFF);

10625

+ VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);

10626

+ VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);

10627

+ VDUP(vector, q, int, s, 8, 16, 0x7F);

10628

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

10629

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

10630

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

10631

+ VDUP(vector, q, uint, u, 8, 16, 0xFF);

10632

+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);

10633

+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);

10634

+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);

10635

+

10636

+#undef CMT

10637

+#define CMT " (with max input)"

10638

+ TEST_VQSHL_N(, int, s, 8, 8, 2, expected_cumulative_sat_max, CMT);

10639

+ TEST_VQSHL_N(, int, s, 16, 4, 1, expected_cumulative_sat_max, CMT);

10640

+ TEST_VQSHL_N(, int, s, 32, 2, 1, expected_cumulative_sat_max, CMT);

10641

+ TEST_VQSHL_N(, int, s, 64, 1, 2, expected_cumulative_sat_max, CMT);

10642

+ TEST_VQSHL_N(, uint, u, 8, 8, 3, expected_cumulative_sat_max, CMT);

10643

+ TEST_VQSHL_N(, uint, u, 16, 4, 2, expected_cumulative_sat_max, CMT);

10644

+ TEST_VQSHL_N(, uint, u, 32, 2, 3, expected_cumulative_sat_max, CMT);

10645

+ TEST_VQSHL_N(, uint, u, 64, 1, 3, expected_cumulative_sat_max, CMT);

10646

+

10647

+ TEST_VQSHL_N(q, int, s, 8, 16, 2, expected_cumulative_sat_max, CMT);

10648

+ TEST_VQSHL_N(q, int, s, 16, 8, 1, expected_cumulative_sat_max, CMT);

10649

+ TEST_VQSHL_N(q, int, s, 32, 4, 1, expected_cumulative_sat_max, CMT);

10650

+ TEST_VQSHL_N(q, int, s, 64, 2, 2, expected_cumulative_sat_max, CMT);

10651

+ TEST_VQSHL_N(q, uint, u, 8, 16, 3, expected_cumulative_sat_max, CMT);

10652

+ TEST_VQSHL_N(q, uint, u, 16, 8, 2, expected_cumulative_sat_max, CMT);

10653

+ TEST_VQSHL_N(q, uint, u, 32, 4, 3, expected_cumulative_sat_max, CMT);

10654

+ TEST_VQSHL_N(q, uint, u, 64, 2, 3, expected_cumulative_sat_max, CMT);

10655

+

10656

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT);

10657

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT);

10658

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT);

10659

+ CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT);

10660

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT);

10661

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT);

10662

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT);

10663

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT);

10664

+ CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT);

10665

+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT);

10666

+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT);

10667

+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT);

10668

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT);

10669

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT);

10670

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT);

10671

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT);

10672

+}

10673

+

10674

+int main (void)

10675

+{

10676

+ exec_vqshl_n ();

10677

+ return 0;

10678

+}

10679

--- a/src//dev/null

10680

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshlu_n.c

10681

@@ -0,0 +1,263 @@

10682

+#include <arm_neon.h>

10683

+#include "arm-neon-ref.h"

10684

+#include "compute-ref-data.h"

10685

+

10686

+/* Expected values of cumulative_saturation flag with negative

10687

+ input. */

10688

+int VECT_VAR(expected_cumulative_sat_neg,int,8,8) = 1;

10689

+int VECT_VAR(expected_cumulative_sat_neg,int,16,4) = 1;

10690

+int VECT_VAR(expected_cumulative_sat_neg,int,32,2) = 1;

10691

+int VECT_VAR(expected_cumulative_sat_neg,int,64,1) = 1;

10692

+int VECT_VAR(expected_cumulative_sat_neg,int,8,16) = 1;

10693

+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;

10694

+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;

10695

+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;

10696

+

10697

+/* Expected results with negative input. */

10698

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

10699

+ 0x0, 0x0, 0x0, 0x0 };

10700

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

10701

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };

10702

+VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 };

10703

+VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,

10704

+ 0x0, 0x0, 0x0, 0x0,

10705

+ 0x0, 0x0, 0x0, 0x0,

10706

+ 0x0, 0x0, 0x0, 0x0 };

10707

+VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,

10708

+ 0x0, 0x0, 0x0, 0x0 };

10709

+VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };

10710

+VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 };

10711

+

10712

+/* Expected values of cumulative_saturation flag with shift by 1. */

10713

+int VECT_VAR(expected_cumulative_sat_sh1,int,8,8) = 0;

10714

+int VECT_VAR(expected_cumulative_sat_sh1,int,16,4) = 0;

10715

+int VECT_VAR(expected_cumulative_sat_sh1,int,32,2) = 0;

10716

+int VECT_VAR(expected_cumulative_sat_sh1,int,64,1) = 0;

10717

+int VECT_VAR(expected_cumulative_sat_sh1,int,8,16) = 0;

10718

+int VECT_VAR(expected_cumulative_sat_sh1,int,16,8) = 0;

10719

+int VECT_VAR(expected_cumulative_sat_sh1,int,32,4) = 0;

10720

+int VECT_VAR(expected_cumulative_sat_sh1,int,64,2) = 0;

10721

+

10722

+/* Expected results with shift by 1. */

10723

+VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe,

10724

+ 0xfe, 0xfe, 0xfe, 0xfe };

10725

+VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };

10726

+VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };

10727

+VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe };

10728

+VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,

10729

+ 0xfe, 0xfe, 0xfe, 0xfe,

10730

+ 0xfe, 0xfe, 0xfe, 0xfe,

10731

+ 0xfe, 0xfe, 0xfe, 0xfe };

10732

+VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe,

10733

+ 0xfffe, 0xfffe, 0xfffe, 0xfffe };

10734

+VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe,

10735

+ 0xfffffffe, 0xfffffffe };

10736

+VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe,

10737

+ 0xfffffffffffffffe };

10738

+

10739

+/* Expected values of cumulative_saturation flag with shift by 2. */

10740

+int VECT_VAR(expected_cumulative_sat_sh2,int,8,8) = 1;

10741

+int VECT_VAR(expected_cumulative_sat_sh2,int,16,4) = 1;

10742

+int VECT_VAR(expected_cumulative_sat_sh2,int,32,2) = 1;

10743

+int VECT_VAR(expected_cumulative_sat_sh2,int,64,1) = 1;

10744

+int VECT_VAR(expected_cumulative_sat_sh2,int,8,16) = 1;

10745

+int VECT_VAR(expected_cumulative_sat_sh2,int,16,8) = 1;

10746

+int VECT_VAR(expected_cumulative_sat_sh2,int,32,4) = 1;

10747

+int VECT_VAR(expected_cumulative_sat_sh2,int,64,2) = 1;

10748

+

10749

+/* Expected results with shift by 2. */

10750

+VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

10751

+ 0xff, 0xff, 0xff, 0xff };

10752

+VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

10753

+VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff };

10754

+VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff };

10755

+VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,

10756

+ 0xff, 0xff, 0xff, 0xff,

10757

+ 0xff, 0xff, 0xff, 0xff,

10758

+ 0xff, 0xff, 0xff, 0xff };

10759

+VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,

10760

+ 0xffff, 0xffff, 0xffff, 0xffff };

10761

+VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff,

10762

+ 0xffffffff, 0xffffffff };

10763

+VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff,

10764

+ 0xffffffffffffffff };

10765

+

10766

+/* Expected values of cumulative_saturation flag. */

10767

+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;

10768

+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;

10769

+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;

10770

+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;

10771

+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;

10772

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

10773

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

10774

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

10775

+

10776

+/* Expected results. */

10777

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };

10778

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 };

10779

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 };

10780

+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 };

10781

+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0,

10782

+ 0xa0, 0xa0, 0xa0, 0xa0,

10783

+ 0xa0, 0xa0, 0xa0, 0xa0,

10784

+ 0xa0, 0xa0, 0xa0, 0xa0 };

10785

+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180,

10786

+ 0x180, 0x180, 0x180, 0x180 };

10787

+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 };

10788

+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 };

10789

+

10790

+

10791

+#define INSN vqshlu

10792

+#define TEST_MSG "VQSHLU_N/VQSHLUQ_N"

10793

+

10794

+#define FNNAME1(NAME) void exec_ ## NAME ## _n(void)

10795

+#define FNNAME(NAME) FNNAME1(NAME)

10796

+

10797

+FNNAME (INSN)

10798

+{

10799

+ /* Basic test: v2=vqshlu_n(v1,v), then store the result. */

10800

+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

10801

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \

10802

+ VECT_VAR(vector_res, T3, W, N) = \

10803

+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \

10804

+ V); \

10805

+ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \

10806

+ VECT_VAR(vector_res, T3, W, N)); \

10807

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

10808

+

10809

+ /* Two auxliary macros are necessary to expand INSN */

10810

+#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

10811

+ TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

10812

+

10813

+#define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

10814

+ TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

10815

+

10816

+

10817

+ DECL_VARIABLE_ALL_VARIANTS(vector);

10818

+ DECL_VARIABLE_ALL_VARIANTS(vector_res);

10819

+

10820

+ clean_results ();

10821

+

10822

+ /* Fill input vector with negative values, to check saturation on

10823

+ limits. */

10824

+ VDUP(vector, , int, s, 8, 8, -1);

10825

+ VDUP(vector, , int, s, 16, 4, -2);

10826

+ VDUP(vector, , int, s, 32, 2, -3);

10827

+ VDUP(vector, , int, s, 64, 1, -4);

10828

+ VDUP(vector, q, int, s, 8, 16, -1);

10829

+ VDUP(vector, q, int, s, 16, 8, -2);

10830

+ VDUP(vector, q, int, s, 32, 4, -3);

10831

+ VDUP(vector, q, int, s, 64, 2, -4);

10832

+

10833

+ /* Choose shift amount arbitrarily. */

10834

+#define CMT " (negative input)"

10835

+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_neg, CMT);

10836

+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_neg, CMT);

10837

+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_neg, CMT);

10838

+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_neg, CMT);

10839

+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_neg, CMT);

10840

+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_neg, CMT);

10841

+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_neg, CMT);

10842

+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_neg, CMT);

10843

+

10844

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

10845

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

10846

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

10847

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);

10848

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);

10849

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);

10850

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);

10851

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);

10852

+

10853

+

10854

+ /* Fill input vector with max value, to check saturation on

10855

+ limits. */

10856

+ VDUP(vector, , int, s, 8, 8, 0x7F);

10857

+ VDUP(vector, , int, s, 16, 4, 0x7FFF);

10858

+ VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);

10859

+ VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);

10860

+ VDUP(vector, q, int, s, 8, 16, 0x7F);

10861

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

10862

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

10863

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);

10864

+

10865

+ /* shift by 1. */

10866

+#undef CMT

10867

+#define CMT " (shift by 1)"

10868

+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat_sh1, CMT);

10869

+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, expected_cumulative_sat_sh1, CMT);

10870

+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, expected_cumulative_sat_sh1, CMT);

10871

+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, expected_cumulative_sat_sh1, CMT);

10872

+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, expected_cumulative_sat_sh1, CMT);

10873

+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, expected_cumulative_sat_sh1, CMT);

10874

+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, expected_cumulative_sat_sh1, CMT);

10875

+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, expected_cumulative_sat_sh1, CMT);

10876

+

10877

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT);

10878

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT);

10879

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT);

10880

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT);

10881

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT);

10882

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT);

10883

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT);

10884

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT);

10885

+

10886

+ /* shift by 2 to force saturation. */

10887

+#undef CMT

10888

+#define CMT " (shift by 2)"

10889

+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, expected_cumulative_sat_sh2, CMT);

10890

+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat_sh2, CMT);

10891

+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, expected_cumulative_sat_sh2, CMT);

10892

+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, expected_cumulative_sat_sh2, CMT);

10893

+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, expected_cumulative_sat_sh2, CMT);

10894

+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, expected_cumulative_sat_sh2, CMT);

10895

+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, expected_cumulative_sat_sh2, CMT);

10896

+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, expected_cumulative_sat_sh2, CMT);

10897

+

10898

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT);

10899

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT);

10900

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT);

10901

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT);

10902

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT);

10903

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT);

10904

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT);

10905

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT);

10906

+

10907

+

10908

+ /* Fill input vector with positive values, to check normal case. */

10909

+ VDUP(vector, , int, s, 8, 8, 1);

10910

+ VDUP(vector, , int, s, 16, 4, 2);

10911

+ VDUP(vector, , int, s, 32, 2, 3);

10912

+ VDUP(vector, , int, s, 64, 1, 4);

10913

+ VDUP(vector, q, int, s, 8, 16, 5);

10914

+ VDUP(vector, q, int, s, 16, 8, 6);

10915

+ VDUP(vector, q, int, s, 32, 4, 7);

10916

+ VDUP(vector, q, int, s, 64, 2, 8);

10917

+

10918

+ /* Arbitrary shift amount. */

10919

+#undef CMT

10920

+#define CMT ""

10921

+ TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, expected_cumulative_sat, CMT);

10922

+ TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, expected_cumulative_sat, CMT);

10923

+ TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, expected_cumulative_sat, CMT);

10924

+ TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, expected_cumulative_sat, CMT);

10925

+ TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, expected_cumulative_sat, CMT);

10926

+ TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, expected_cumulative_sat, CMT);

10927

+ TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, expected_cumulative_sat, CMT);

10928

+ TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, expected_cumulative_sat, CMT);

10929

+

10930

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

10931

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

10932

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

10933

+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);

10934

+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);

10935

+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);

10936

+ CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);

10937

+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);

10938

+}

10939

+

10940

+int main (void)

10941

+{

10942

+ exec_vqshlu_n ();

10943

+ return 0;

10944

+}

10945

--- a/src//dev/null

10946

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_n.c

10947

@@ -0,0 +1,177 @@

10948

+#include <arm_neon.h>

10949

+#include "arm-neon-ref.h"

10950

+#include "compute-ref-data.h"

10951

+

10952

+/* Expected values of cumulative_saturation flag. */

10953

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

10954

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;

10955

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

10956

+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;

10957

+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;

10958

+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;

10959

+

10960

+/* Expected results. */

10961

+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,

10962

+ 0xfa, 0xfa, 0xfb, 0xfb };

10963

+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff8, 0xfff8, 0xfff9, 0xfff9 };

10964

+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffc };

10965

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

10966

+ 0xff, 0xff, 0xff, 0xff };

10967

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };

10968

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };

10969

+

10970

+/* Expected values of cumulative_saturation flag with max input value

10971

+ shifted by 3. */

10972

+int VECT_VAR(expected_cumulative_sat_max_sh3,int,16,8) = 1;

10973

+int VECT_VAR(expected_cumulative_sat_max_sh3,int,32,4) = 1;

10974

+int VECT_VAR(expected_cumulative_sat_max_sh3,int,64,2) = 1;

10975

+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,16,8) = 1;

10976

+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,32,4) = 1;

10977

+int VECT_VAR(expected_cumulative_sat_max_sh3,uint,64,2) = 1;

10978

+

10979

+/* Expected results with max input value shifted by 3. */

10980

+VECT_VAR_DECL(expected_max_sh3,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

10981

+ 0x7f, 0x7f, 0x7f, 0x7f };

10982

+VECT_VAR_DECL(expected_max_sh3,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };

10983

+VECT_VAR_DECL(expected_max_sh3,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

10984

+VECT_VAR_DECL(expected_max_sh3,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

10985

+ 0xff, 0xff, 0xff, 0xff };

10986

+VECT_VAR_DECL(expected_max_sh3,uint,16,4) [] = { 0xffff, 0xffff,

10987

+ 0xffff, 0xffff };

10988

+VECT_VAR_DECL(expected_max_sh3,uint,32,2) [] = { 0xffffffff, 0xffffffff };

10989

+

10990

+/* Expected values of cumulative_saturation flag with max input value

10991

+ shifted by type size. */

10992

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,16,8) = 0;

10993

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,32,4) = 0;

10994

+int VECT_VAR(expected_cumulative_sat_max_shmax,int,64,2) = 0;

10995

+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,16,8) = 0;

10996

+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,32,4) = 0;

10997

+int VECT_VAR(expected_cumulative_sat_max_shmax,uint,64,2) = 0;

10998

+

10999

+/* Expected results with max input value shifted by type size. */

11000

+VECT_VAR_DECL(expected_max_shmax,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,

11001

+ 0x7f, 0x7f, 0x7f, 0x7f };

11002

+VECT_VAR_DECL(expected_max_shmax,int,16,4) [] = { 0x7fff, 0x7fff,

11003

+ 0x7fff, 0x7fff };

11004

+VECT_VAR_DECL(expected_max_shmax,int,32,2) [] = { 0x7fffffff, 0x7fffffff };

11005

+VECT_VAR_DECL(expected_max_shmax,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

11006

+ 0xff, 0xff, 0xff, 0xff };

11007

+VECT_VAR_DECL(expected_max_shmax,uint,16,4) [] = { 0xffff, 0xffff,

11008

+ 0xffff, 0xffff };

11009

+VECT_VAR_DECL(expected_max_shmax,uint,32,2) [] = { 0xffffffff, 0xffffffff };

11010

+

11011

+#define INSN vqshrn_n

11012

+#define TEST_MSG "VQSHRN_N"

11013

+

11014

+#define FNNAME1(NAME) void exec_ ## NAME (void)

11015

+#define FNNAME(NAME) FNNAME1(NAME)

11016

+

11017

+FNNAME (INSN)

11018

+{

11019

+ /* Basic test: y=vqshrn_n(x,v), then store the result. */

11020

+#define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11021

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \

11022

+ VECT_VAR(vector_res, T1, W2, N) = \

11023

+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \

11024

+ V); \

11025

+ vst1_##T2##W2(VECT_VAR(result, T1, W2, N), \

11026

+ VECT_VAR(vector_res, T1, W2, N)); \

11027

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

11028

+

11029

+ /* Two auxliary macros are necessary to expand INSN */

11030

+#define TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11031

+ TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

11032

+

11033

+#define TEST_VQSHRN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11034

+ TEST_VQSHRN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

11035

+

11036

+

11037

+ /* vector is twice as large as vector_res. */

11038

+ DECL_VARIABLE(vector, int, 16, 8);

11039

+ DECL_VARIABLE(vector, int, 32, 4);

11040

+ DECL_VARIABLE(vector, int, 64, 2);

11041

+ DECL_VARIABLE(vector, uint, 16, 8);

11042

+ DECL_VARIABLE(vector, uint, 32, 4);

11043

+ DECL_VARIABLE(vector, uint, 64, 2);

11044

+

11045

+ DECL_VARIABLE(vector_res, int, 8, 8);

11046

+ DECL_VARIABLE(vector_res, int, 16, 4);

11047

+ DECL_VARIABLE(vector_res, int, 32, 2);

11048

+ DECL_VARIABLE(vector_res, uint, 8, 8);

11049

+ DECL_VARIABLE(vector_res, uint, 16, 4);

11050

+ DECL_VARIABLE(vector_res, uint, 32, 2);

11051

+

11052

+ clean_results ();

11053

+

11054

+ VLOAD(vector, buffer, q, int, s, 16, 8);

11055

+ VLOAD(vector, buffer, q, int, s, 32, 4);

11056

+ VLOAD(vector, buffer, q, int, s, 64, 2);

11057

+ VLOAD(vector, buffer, q, uint, u, 16, 8);

11058

+ VLOAD(vector, buffer, q, uint, u, 32, 4);

11059

+ VLOAD(vector, buffer, q, uint, u, 64, 2);

11060

+

11061

+ /* Choose shift amount arbitrarily. */

11062

+#define CMT ""

11063

+ TEST_VQSHRN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat, CMT);

11064

+ TEST_VQSHRN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat, CMT);

11065

+ TEST_VQSHRN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat, CMT);

11066

+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 2, expected_cumulative_sat, CMT);

11067

+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat, CMT);

11068

+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat, CMT);

11069

+

11070

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);

11071

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);

11072

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);

11073

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

11074

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

11075

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

11076

+

11077

+

11078

+ /* Use max possible value as input. */

11079

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

11080

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

11081

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

11082

+ VDUP(vector, q, uint, u, 16, 8, 0xFFFF);

11083

+ VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);

11084

+ VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);

11085

+

11086

+#undef CMT

11087

+#define CMT " (check saturation: shift by 3)"

11088

+ TEST_VQSHRN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);

11089

+ TEST_VQSHRN_N(int, s, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);

11090

+ TEST_VQSHRN_N(int, s, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);

11091

+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 3, expected_cumulative_sat_max_sh3, CMT);

11092

+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 3, expected_cumulative_sat_max_sh3, CMT);

11093

+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 3, expected_cumulative_sat_max_sh3, CMT);

11094

+

11095

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_sh3, CMT);

11096

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_sh3, CMT);

11097

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_sh3, CMT);

11098

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh3, CMT);

11099

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh3, CMT);

11100

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh3, CMT);

11101

+

11102

+

11103

+#undef CMT

11104

+#define CMT " (check saturation: shift by max)"

11105

+ TEST_VQSHRN_N(int, s, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);

11106

+ TEST_VQSHRN_N(int, s, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);

11107

+ TEST_VQSHRN_N(int, s, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);

11108

+ TEST_VQSHRN_N(uint, u, 16, 8, 8, 8, expected_cumulative_sat_max_shmax, CMT);

11109

+ TEST_VQSHRN_N(uint, u, 32, 16, 4, 16, expected_cumulative_sat_max_shmax, CMT);

11110

+ TEST_VQSHRN_N(uint, u, 64, 32, 2, 32, expected_cumulative_sat_max_shmax, CMT);

11111

+

11112

+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max_shmax, CMT);

11113

+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max_shmax, CMT);

11114

+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max_shmax, CMT);

11115

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_shmax, CMT);

11116

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shmax, CMT);

11117

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shmax, CMT);

11118

+}

11119

+

11120

+int main (void)

11121

+{

11122

+ exec_vqshrn_n ();

11123

+ return 0;

11124

+}

11125

--- a/src//dev/null

11126

+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c

11127

@@ -0,0 +1,133 @@

11128

+#include <arm_neon.h>

11129

+#include "arm-neon-ref.h"

11130

+#include "compute-ref-data.h"

11131

+

11132

+/* Expected values of cumulative_saturation flag with negative input. */

11133

+int VECT_VAR(expected_cumulative_sat_neg,int,16,8) = 1;

11134

+int VECT_VAR(expected_cumulative_sat_neg,int,32,4) = 1;

11135

+int VECT_VAR(expected_cumulative_sat_neg,int,64,2) = 1;

11136

+

11137

+/* Expected results with negative input. */

11138

+VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,

11139

+ 0x0, 0x0, 0x0, 0x0 };

11140

+VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

11141

+VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };

11142

+

11143

+/* Expected values of cumulative_saturation flag with max input value

11144

+ shifted by 1. */

11145

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,16,8) = 1;

11146

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,32,4) = 1;

11147

+int VECT_VAR(expected_cumulative_sat_max_sh1,int,64,2) = 1;

11148

+

11149

+/* Expected results with max input value shifted by 1. */

11150

+VECT_VAR_DECL(expected_max_sh1,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,

11151

+ 0xff, 0xff, 0xff, 0xff };

11152

+VECT_VAR_DECL(expected_max_sh1,uint,16,4) [] = { 0xffff, 0xffff,

11153

+ 0xffff, 0xffff };

11154

+VECT_VAR_DECL(expected_max_sh1,uint,32,2) [] = { 0xffffffff, 0xffffffff };

11155

+VECT_VAR_DECL(expected_max_sh1,uint,64,1) [] = { 0x3333333333333333 };

11156

+

11157

+/* Expected values of cumulative_saturation flag. */

11158

+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;

11159

+int VECT_VAR(expected_cumulative_sat,int,32,4) = 1;

11160

+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;

11161

+

11162

+/* Expected results. */

11163

+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x48, 0x48, 0x48, 0x48,

11164

+ 0x48, 0x48, 0x48, 0x48 };

11165

+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };

11166

+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xdeadbe, 0xdeadbe };

11167

+

11168

+

11169

+#define INSN vqshrun_n

11170

+#define TEST_MSG "VQSHRUN_N"

11171

+

11172

+#define FNNAME1(NAME) void exec_ ## NAME (void)

11173

+#define FNNAME(NAME) FNNAME1(NAME)

11174

+

11175

+FNNAME (INSN)

11176

+{

11177

+ /* Basic test: y=vqshrun_n(x,v), then store the result. */

11178

+#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11179

+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \

11180

+ VECT_VAR(vector_res, uint, W2, N) = \

11181

+ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \

11182

+ V); \

11183

+ vst1_u##W2(VECT_VAR(result, uint, W2, N), \

11184

+ VECT_VAR(vector_res, uint, W2, N)); \

11185

+ CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)

11186

+

11187

+ /* Two auxliary macros are necessary to expand INSN */

11188

+#define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11189

+ TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

11190

+

11191

+#define TEST_VQSHRUN_N(T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \

11192

+ TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)

11193

+

11194

+

11195

+ /* vector is twice as large as vector_res. */

11196

+ DECL_VARIABLE(vector, int, 16, 8);

11197

+ DECL_VARIABLE(vector, int, 32, 4);

11198

+ DECL_VARIABLE(vector, int, 64, 2);

11199

+

11200

+ DECL_VARIABLE(vector_res, uint, 8, 8);

11201

+ DECL_VARIABLE(vector_res, uint, 16, 4);

11202

+ DECL_VARIABLE(vector_res, uint, 32, 2);

11203

+

11204

+ clean_results ();

11205

+

11206

+ /* Fill input vector with negative values, to check saturation on

11207

+ limits. */

11208

+ VDUP(vector, q, int, s, 16, 8, -2);

11209

+ VDUP(vector, q, int, s, 32, 4, -3);

11210

+ VDUP(vector, q, int, s, 64, 2, -4);

11211

+

11212

+ /* Choose shift amount arbitrarily. */

11213

+#define CMT " (negative input)"

11214

+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 3, expected_cumulative_sat_neg, CMT);

11215

+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 4, expected_cumulative_sat_neg, CMT);

11216

+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 2, expected_cumulative_sat_neg, CMT);

11217

+

11218

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);

11219

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);

11220

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);

11221

+

11222

+

11223

+ /* Fill input vector with max value, to check saturation on

11224

+ limits. */

11225

+ VDUP(vector, q, int, s, 16, 8, 0x7FFF);

11226

+ VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);

11227

+ VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);

11228

+

11229

+#undef CMT

11230

+#define CMT " (check cumulative saturation)"

11231

+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 1, expected_cumulative_sat_max_sh1, CMT);

11232

+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 1, expected_cumulative_sat_max_sh1, CMT);

11233

+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 1, expected_cumulative_sat_max_sh1, CMT);

11234

+

11235

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max_sh1, CMT);

11236

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_sh1, CMT);

11237

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_sh1, CMT);

11238

+

11239

+

11240

+ /* Fill input vector with positive values, to check normal case. */

11241

+ VDUP(vector, q, int, s, 16, 8, 0x1234);

11242

+ VDUP(vector, q, int, s, 32, 4, 0x87654321);

11243

+ VDUP(vector, q, int, s, 64, 2, 0xDEADBEEF);

11244

+

11245

+#undef CMT

11246

+#define CMT ""

11247

+ TEST_VQSHRUN_N(int, s, 16, 8, 8, 6, expected_cumulative_sat, CMT);

11248

+ TEST_VQSHRUN_N(int, s, 32, 16, 4, 7, expected_cumulative_sat, CMT);

11249

+ TEST_VQSHRUN_N(int, s, 64, 32, 2, 8, expected_cumulative_sat, CMT);

11250

+

11251

+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);

11252

+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);

11253

+ CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);

11254

+}

11255

+

11256

+int main (void)

11257

+{

11258

+ exec_vqshrun_n ();

11259

+ return 0;

11260

+}

11261

--- a/src//dev/null

11262

+++ b/src/gcc/testsuite/gcc.target/aarch64/c-output-template-4.c

11263

@@ -0,0 +1,10 @@

11264

+/* { dg-do compile } */

11265

+/* { dg-options "-O0" } */

11266

+

11267

+void

11268

+test (void)

11269

+{

11270

+ __asm__ ("@ %c0" : : "S" (&test + 4));

11271

+}

11272

+

11273

+/* { dg-final { scan-assembler "@ test\\+4" } } */

11274

--- a/src//dev/null

11275

+++ b/src/gcc/testsuite/gcc.target/aarch64/pow-sqrt-synth-1.c

11276

@@ -0,0 +1,38 @@

11277

+/* { dg-do compile } */

11278

+/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */

11279

+

11280

+

11281

+double

11282

+foo (double a)

11283

+{

11284

+ return __builtin_pow (a, -5.875);

11285

+}

11286

+

11287

+double

11288

+foof (double a)

11289

+{

11290

+ return __builtin_pow (a, 0.75f);

11291

+}

11292

+

11293

+double

11294

+bar (double a)

11295

+{

11296

+ return __builtin_pow (a, 1.0 + 0.00390625);

11297

+}

11298

+

11299

+double

11300

+baz (double a)

11301

+{

11302

+ return __builtin_pow (a, -1.25) + __builtin_pow (a, 5.75) - __builtin_pow (a, 3.375);

11303

+}

11304

+

11305

+#define N 256

11306

+void

11307

+vecfoo (double *a)

11308

+{

11309

+ for (int i = 0; i < N; i++)

11310

+ a[i] = __builtin_pow (a[i], 1.25);

11311

+}

11312

+

11313

+/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */

11314

+/* { dg-final { cleanup-tree-dump "sincos" } } */

11315

\ No newline at end of file

11316

--- a/src//dev/null

11317

+++ b/src/gcc/testsuite/gcc.target/aarch64/pr65491_1.c

11318

@@ -0,0 +1,11 @@

11319

+/* { dg-do compile } */

11320

+/* { dg-options "-O2" } */

11321

+

11322

+typedef long double a __attribute__((vector_size (16)));

11323

+

11324

+a

11325

+sum (a first, a second)

11326

+{

11327

+ return first + second;

11328

+}

11329

+

11330

--- a/src/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c

11331

+++ b/src/gcc/testsuite/gcc.target/aarch64/singleton_intrinsics_1.c

11332

@@ -235,8 +235,8 @@ test_vrshl_u64 (uint64x1_t a, int64x1_t b)

11333

return vrshl_u64 (a, b);

11334

}

11335

11336

-/* For int64x1_t, sshr...#63 is output instead of the equivalent cmlt...#0. */

11337

-/* { dg-final { scan-assembler-times "\\tsshr\\td\[0-9\]+" 2 } } */

11338

+/* For int64x1_t, sshr...#63 is equivalent to cmlt...#0. */

11339

+/* { dg-final { scan-assembler-times "\\t(?:sshr|cmlt)\\td\[0-9\]+" 2 } } */

11340

11341

int64x1_t

11342

test_vshr_n_s64 (int64x1_t a)

11343

--- a/src//dev/null

11344

+++ b/src/gcc/testsuite/gcc.target/aarch64/unsigned-float.c

11345

@@ -0,0 +1,18 @@

11346

+/* { dg-do compile } */

11347

+/* { dg-options "-O1" } */

11348

+

11349

+#include <stdint.h>

11350

+

11351

+double

11352

+f1 (uint16_t x)

11353

+{

11354

+ return (double)(float)x;

11355

+}

11356

+

11357

+float

11358

+f2 (uint16_t x)

11359

+{

11360

+ return (float)(double)x;

11361

+}

11362

+

11363

+/* { dg-final { scan-assembler-not "fcvt" } } */

11364

--- a/src//dev/null

11365

+++ b/src/gcc/testsuite/gcc.target/aarch64/vec_init_1.c

11366

@@ -0,0 +1,34 @@

11367

+/* { dg-do run } */

11368

+/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-inline" } */

11369

+

11370

+extern void abort (void);

11371

+

11372

+typedef float float16x4_t __attribute__ ((vector_size ((16))));

11373

+

11374

+float a;

11375

+float b;

11376

+

11377

+float16x4_t

11378

+make_vector ()

11379

+{

11380

+ return (float16x4_t) { 0, 0, a, b };

11381

+}

11382

+

11383

+int

11384

+main (int argc, char **argv)

11385

+{

11386

+ a = 4.0;

11387

+ b = 3.0;

11388

+ float16x4_t vec = make_vector ();

11389

+ if (vec[0] != 0 || vec[1] != 0 || vec[2] != a || vec[3] != b)

11390

+ abort ();

11391

+ return 0;

11392

+}

11393

+

11394

+/* { dg-final { scan-assembler-times "ins\\t" 2 } } */

11395

+/* What we want to check, is that make_vector does not stp the whole vector

11396

+ to the stack. Unfortunately here we scan the body of main() too, which may

11397

+ be a bit fragile - the test is currently passing only because of the option

11398

+ -fomit-frame-pointer which avoids use of stp in the prologue to main(). */

11399

+/* { dg-final { scan-assembler-not "stp\\t" } } */

11400

+/* { dg-final { cleanup-saved-temps } } */

11401

--- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c

11402

+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c

11403

@@ -54,11 +54,11 @@ test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data, \

11404

}

11405

11406

11407

-/* Tests of vld2_dup and vld2q_dup. */

11408

+/* Tests of vld2_lane and vld2q_lane. */

11409

VARIANTS (TESTMETH, 2)

11410

-/* Tests of vld3_dup and vld3q_dup. */

11411

+/* Tests of vld3_lane and vld3q_lane. */

11412

VARIANTS (TESTMETH, 3)

11413

-/* Tests of vld4_dup and vld4q_dup. */

11414

+/* Tests of vld4_lane and vld4q_lane. */

11415

VARIANTS (TESTMETH, 4)

11416

11417

#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \

11418

--- a/src//dev/null

11419

+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_lane_1.c

11420

@@ -0,0 +1,75 @@

11421

+/* { dg-do run } */

11422

+/* { dg-options "-O3 -fno-inline" } */

11423

+

11424

+#include <arm_neon.h>

11425

+

11426

+extern void abort (void);

11427

+

11428

+#define VARIANTS(VARIANT, STRUCT) \

11429

+VARIANT (uint8, , 8, _u8, 6, STRUCT) \

11430

+VARIANT (uint16, , 4, _u16, 3, STRUCT) \

11431

+VARIANT (uint32, , 2, _u32, 1, STRUCT) \

11432

+VARIANT (uint64, , 1, _u64, 0, STRUCT) \

11433

+VARIANT (int8, , 8, _s8, 5, STRUCT) \

11434

+VARIANT (int16, , 4, _s16, 2, STRUCT) \

11435

+VARIANT (int32, , 2, _s32, 0, STRUCT) \

11436

+VARIANT (int64, , 1, _s64, 0, STRUCT) \

11437

+VARIANT (poly8, , 8, _p8, 7, STRUCT) \

11438

+VARIANT (poly16, , 4, _p16, 1, STRUCT) \

11439

+VARIANT (float32, , 2, _f32, 1, STRUCT) \

11440

+VARIANT (float64, , 1, _f64, 0, STRUCT) \

11441

+VARIANT (uint8, q, 16, _u8, 14, STRUCT) \

11442

+VARIANT (uint16, q, 8, _u16, 4, STRUCT) \

11443

+VARIANT (uint32, q, 4, _u32, 3, STRUCT) \

11444

+VARIANT (uint64, q, 2, _u64, 0, STRUCT) \

11445

+VARIANT (int8, q, 16, _s8, 13, STRUCT) \

11446

+VARIANT (int16, q, 8, _s16, 6, STRUCT) \

11447

+VARIANT (int32, q, 4, _s32, 2, STRUCT) \

11448

+VARIANT (int64, q, 2, _s64, 1, STRUCT) \

11449

+VARIANT (poly8, q, 16, _p8, 12, STRUCT) \

11450

+VARIANT (poly16, q, 8, _p16, 5, STRUCT) \

11451

+VARIANT (float32, q, 4, _f32, 1, STRUCT)\

11452

+VARIANT (float64, q, 2, _f64, 0, STRUCT)

11453

+

11454

+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \

11455

+int \

11456

+test_vst##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data) \

11457

+{ \

11458

+ BASE##x##ELTS##x##STRUCT##_t vectors; \

11459

+ for (int i = 0; i < STRUCT; i++, data += ELTS) \

11460

+ vectors.val[i] = vld1##Q##SUFFIX (data); \

11461

+ BASE##_t temp[STRUCT]; \

11462

+ vst##STRUCT##Q##_lane##SUFFIX (temp, vectors, LANE); \

11463

+ for (int i = 0; i < STRUCT; i++) \

11464

+ { \

11465

+ if (temp[i] != vget##Q##_lane##SUFFIX (vectors.val[i], LANE)) \

11466

+ return 1; \

11467

+ } \

11468

+ return 0; \

11469

+}

11470

+

11471

+/* Tests of vst2_lane and vst2q_lane. */

11472

+VARIANTS (TESTMETH, 2)

11473

+/* Tests of vst3_lane and vst3q_lane. */

11474

+VARIANTS (TESTMETH, 3)

11475

+/* Tests of vst4_lane and vst4q_lane. */

11476

+VARIANTS (TESTMETH, 4)

11477

+

11478

+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \

11479

+ if (test_vst##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data)) \

11480

+ abort ();

11481

+

11482

+int

11483

+main (int argc, char **argv)

11484

+{

11485

+ /* Original data for all vector formats. */

11486

+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,

11487

+ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,

11488

+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL,

11489

+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};

11490

+

11491

+ VARIANTS (CHECK, 2);

11492

+ VARIANTS (CHECK, 3);

11493

+ VARIANTS (CHECK, 4);

11494

+ return 0;

11495

+}

11496

--- a/src//dev/null

11497

+++ b/src/gcc/testsuite/gcc.target/arm/bics_1.c

11498

@@ -0,0 +1,54 @@

11499

+/* { dg-do run } */

11500

+/* { dg-options "-O2 --save-temps -fno-inline" } */

11501

+/* { dg-require-effective-target arm32 } */

11502

+

11503

+extern void abort (void);

11504

+

11505

+int

11506

+bics_si_test1 (int a, int b, int c)

11507

+{

11508

+ int d = a & ~b;

11509

+

11510

+ /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */

11511

+ if (d == 0)

11512

+ return a + c;

11513

+ else

11514

+ return b + d + c;

11515

+}

11516

+

11517

+int

11518

+bics_si_test2 (int a, int b, int c)

11519

+{

11520

+ int d = a & ~(b << 3);

11521

+

11522

+ /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" 1 } } */

11523

+ if (d == 0)

11524

+ return a + c;

11525

+ else

11526

+ return b + d + c;

11527

+}

11528

+

11529

+int

11530

+main ()

11531

+{

11532

+ int x;

11533

+

11534

+ x = bics_si_test1 (29, ~4, 5);

11535

+ if (x != ((29 & 4) + ~4 + 5))

11536

+ abort ();

11537

+

11538

+ x = bics_si_test1 (5, ~2, 20);

11539

+ if (x != 25)

11540

+ abort ();

11541

+

11542

+ x = bics_si_test2 (35, ~4, 5);

11543

+ if (x != ((35 & ~(~4 << 3)) + ~4 + 5))

11544

+ abort ();

11545

+

11546

+ x = bics_si_test2 (96, ~2, 20);

11547

+ if (x != 116)

11548

+ abort ();

11549

+

11550

+ return 0;

11551

+}

11552

+/* { dg-final { cleanup-saved-temps } } */

11553

--- a/src//dev/null

11554

+++ b/src/gcc/testsuite/gcc.target/arm/bics_2.c

11555

@@ -0,0 +1,57 @@

11556

+/* { dg-do run } */

11557

+/* { dg-options "-O2 --save-temps -fno-inline" } */

11558

+/* { dg-require-effective-target arm32 } */

11559

+

11560

+extern void abort (void);

11561

+

11562

+int

11563

+bics_si_test1 (int a, int b, int c)

11564

+{

11565

+ int d = a & ~b;

11566

+

11567

+ /* { dg-final { scan-assembler-not "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" } } */

11568

+ /* { dg-final { scan-assembler-times "bic\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */

11569

+ if (d <= 0)

11570

+ return a + c;

11571

+ else

11572

+ return b + d + c;

11573

+}

11574

+

11575

+int

11576

+bics_si_test2 (int a, int b, int c)

11577

+{

11578

+ int d = a & ~(b << 3);

11579

+

11580

+ /* { dg-final { scan-assembler-not "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" } } */

11581

+ /* { dg-final { scan-assembler "bic\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl \#3" } } */

11582

+ if (d <= 0)

11583

+ return a + c;

11584

+ else

11585

+ return b + d + c;

11586

+}

11587

+

11588

+int

11589

+main ()

11590

+{

11591

+ int x;

11592

+

11593

+ x = bics_si_test1 (29, ~4, 5);

11594

+ if (x != ((29 & 4) + ~4 + 5))

11595

+ abort ();

11596

+

11597

+ x = bics_si_test1 (5, ~2, 20);

11598

+ if (x != 25)

11599

+ abort ();

11600

+

11601

+ x = bics_si_test2 (35, ~4, 5);

11602

+ if (x != ((35 & ~(~4 << 3)) + ~4 + 5))

11603

+ abort ();

11604

+

11605

+ x = bics_si_test2 (96, ~2, 20);

11606

+ if (x != 116)

11607

+ abort ();

11608

+

11609

+ return 0;

11610

+}

11611

+

11612

+/* { dg-final { cleanup-saved-temps } } */

11613

--- a/src//dev/null

11614

+++ b/src/gcc/testsuite/gcc.target/arm/bics_3.c

11615

@@ -0,0 +1,41 @@

11616

+/* { dg-do run } */

11617

+/* { dg-options "-O2 --save-temps -fno-inline" } */

11618

+/* { dg-require-effective-target arm32 } */

11619

+

11620

+extern void abort (void);

11621

+

11622

+int

11623

+bics_si_test (int a, int b)

11624

+{

11625

+ if (a & ~b)

11626

+ return 1;

11627

+ else

11628

+ return 0;

11629

+}

11630

+

11631

+int

11632

+bics_si_test2 (int a, int b)

11633

+{

11634

+ if (a & ~ (b << 2))

11635

+ return 1;

11636

+ else

11637

+ return 0;

11638

+}

11639

+

11640

+int

11641

+main (void)

11642

+{

11643

+ int a = 5;

11644

+ int b = 5;

11645

+ int c = 20;

11646

+ if (bics_si_test (a, b))

11647

+ abort ();

11648

+ if (bics_si_test2 (c, b))

11649

+ abort ();

11650

+ return 0;

11651

+}

11652

+

11653

+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */

11654

+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, .sl #2" 1 } } */

11655

+

11656

+/* { dg-final { cleanup-saved-temps } } */

11657

--- a/src//dev/null

11658

+++ b/src/gcc/testsuite/gcc.target/arm/bics_4.c

11659

@@ -0,0 +1,49 @@

11660

+/* { dg-do run } */

11661

+/* { dg-options "-O2 --save-temps -fno-inline" } */

11662

+/* { dg-require-effective-target arm32 } */

11663

+

11664

+extern void abort (void);

11665

+

11666

+int

11667

+bics_si_test1 (int a, int b, int c)

11668

+{

11669

+ if ((a & b) == a)

11670

+ return a;

11671

+ else

11672

+ return c;

11673

+}

11674

+

11675

+int

11676

+bics_si_test2 (int a, int b, int c)

11677

+{

11678

+ if ((a & b) == b)

11679

+ return b;

11680

+ else

11681

+ return c;

11682

+}

11683

+

11684

+int

11685

+main ()

11686

+{

11687

+ int x;

11688

+ x = bics_si_test1 (0xf00d, 0xf11f, 0);

11689

+ if (x != 0xf00d)

11690

+ abort ();

11691

+

11692

+ x = bics_si_test1 (0xf11f, 0xf00d, 0);

11693

+ if (x != 0)

11694

+ abort ();

11695

+

11696

+ x = bics_si_test2 (0xf00d, 0xf11f, 0);

11697

+ if (x != 0)

11698

+ abort ();

11699

+

11700

+ x = bics_si_test2 (0xf11f, 0xf00d, 0);

11701

+ if (x != 0xf00d)

11702

+ abort ();

11703

+

11704

+ return 0;

11705

+}

11706

+

11707

+/* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */

11708

+/* { dg-final { cleanup-saved-temps } } */

11709

--- a/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c

11710

+++ b/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c

11711

@@ -58,18 +58,18 @@ GEN_COND_TESTS(vceq)

11712

/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */

11713

/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11714

/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */

11715

-/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11716

-/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11717

-/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11718

-/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11719

-/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11720

-/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11721

-/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11722

-/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11723

-/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */

11724

-/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11725

-/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11726

-/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */

11727

+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11728

+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11729

+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11730

+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11731

+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11732

+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11733

+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11734

+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11735

+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */

11736

+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11737

+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11738

+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */

11739

/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */

11740

/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */

11741

/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */

11742

--- a/src//dev/null

11743

+++ b/src/gcc/testsuite/gcc.target/arm/pr26702.c

11744

@@ -0,0 +1,4 @@

11745

+/* { dg-do compile { target arm*-*-eabi* } } */

11746

+/* { dg-final { scan-assembler "\\.size\[\\t \]+static_foo, 4" } } */

11747

+int foo;

11748

+static int static_foo;

11749

--- a/src/gcc/testsuite/gcc.target/arm/pr42172-1.c

11750

+++ b/src/gcc/testsuite/gcc.target/arm/pr42172-1.c

11751

@@ -16,4 +16,4 @@ void init_A (struct A *this)

11752

this->f4 = 0;

11753

}

11754

11755

-/* { dg-final { scan-assembler-times "ldr" 1 } } */

11756

+/* { dg-final { scan-assembler-times "str" 1 } } */

11757

--- a/src//dev/null

11758

+++ b/src/gcc/testsuite/gcc.target/arm/pr64208.c

11759

@@ -0,0 +1,25 @@

11760

+/* { dg-do compile } */

11761

+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mcpu=*" } { "-mcpu=iwmmxt" } } */

11762

+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mabi=*" } { "-mabi=iwmmxt" } } */

11763

+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-march=*" } { "-march=iwmmxt" } } */

11764

+/* { dg-skip-if "Test is specific to ARM mode" { arm*-*-* } { "-mthumb" } { "" } } */

11765

+/* { dg-require-effective-target arm32 } */

11766

+/* { dg-require-effective-target arm_iwmmxt_ok } */

11767

+/* { dg-options "-O1 -mcpu=iwmmxt" } */

11768

+

11769

+long long x6(void);

11770

+void x7(long long, long long);

11771

+void x8(long long);

11772

+

11773

+int x0;

11774

+long long *x1;

11775

+

11776

+void x2(void) {

11777

+ long long *x3 = x1;

11778

+ while (x1) {

11779

+ long long x4 = x0, x5 = x6();

11780

+ x7(x4, x5);

11781

+ x8(x5);

11782

+ *x3 = 0;

11783

+ }

11784

+}

11785

--- a/src//dev/null

11786

+++ b/src/gcc/testsuite/gcc.target/arm/pr64616.c

11787

@@ -0,0 +1,15 @@

11788

+/* { dg-do compile } */

11789

+/* { dg-options "-O2 -fdump-rtl-cprop2" } */

11790

+

11791

+int f (int);

11792

+unsigned int glob;

11793

+

11794

+void

11795

+g ()

11796

+{

11797

+ while (f (glob));

11798

+ glob = 5;

11799

+}

11800

+

11801

+/* { dg-final { scan-rtl-dump "GLOBAL COPY-PROP" "cprop2" } } */

11802

+/* { dg-final { cleanup-rtl-dump "cprop2" } } */

11803

--- a/src//dev/null

11804

+++ b/src/gcc/testsuite/gcc.target/arm/pr64818.c

11805

@@ -0,0 +1,30 @@

11806

+/* { dg-do compile } */

11807

+/* { dg-options "-O1" } */

11808

+

11809

+char temp[16];

11810

+extern int foo1 (void);

11811

+

11812

+void foo (void)

11813

+{

11814

+ int i;

11815

+ int len;

11816

+

11817

+ while (1)

11818

+ {

11819

+ len = foo1 ();

11820

+ register int a asm ("r0") = 5;

11821

+ register char *b asm ("r1") = temp;

11822

+ register int c asm ("r2") = len;

11823

+ asm volatile ("mov %[r0], %[r0]\n mov %[r1], %[r1]\n mov %[r2], %[r2]\n"

11824

+ : "+m"(*b)

11825

+ : [r0]"r"(a), [r1]"r"(b), [r2]"r"(c));

11826

+

11827

+ for (i = 0; i < len; i++)

11828

+ {

11829

+ if (temp[i] == 10)

11830

+ return;

11831

+ }

11832

+ }

11833

+}

11834

+

11835

+/* { dg-final { scan-assembler "\[\\t \]+mov\ r1,\ r1" } } */

11836

--- a/src/gcc/testsuite/gcc.target/arm/pr65067.c

11837

+++ b/src/gcc/testsuite/gcc.target/arm/pr65067.c

11838

@@ -1,4 +1,5 @@

11839

/* { dg-do compile } */

11840

+/* { dg-require-effective-target arm_thumb2_ok } */

11841

/* { dg-options "-mthumb -mcpu=cortex-m3 -O2" } */

11842

11843

struct tmp {

11844

--- a/src//dev/null

11845

+++ b/src/gcc/testsuite/gcc.target/arm/pr65710.c

11846

@@ -0,0 +1,120 @@

11847

+/* { dg-do compile } */

11848

+/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } {"-mfloat-abi=soft" } } */

11849

+/* { dg-options "-mthumb -O2 -mfloat-abi=soft -w" } */

11850

+/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */

11851

+

11852

+struct ST {

11853

+ char *buffer;

11854

+ int used;

11855

+};

11856

+

11857

+struct ST *h;

11858

+

11859

+enum { no_op, duplicate, pop_failure_jump, dummy_failure_jump };

11860

+

11861

+typedef struct {

11862

+ unsigned pointer;

11863

+} byte_fail_stack_elt_t;

11864

+

11865

+typedef struct { unsigned avail; } byte_fail_stack_type;

11866

+

11867

+typedef union {

11868

+ byte_fail_stack_elt_t word;

11869

+ struct {

11870

+ unsigned match_null_string_p : 2;

11871

+ unsigned is_active : 1;

11872

+ unsigned ever_matched_something : 1;

11873

+ } bits;

11874

+} byte_register_info_type;

11875

+

11876

+static int a;

11877

+int b = 0;

11878

+int c, e, f;

11879

+int *d, *g;

11880

+

11881

+int

11882

+byte_re_match_2_internal_size2(const int p2, int p3, const int p4) {

11883

+ int i, p;

11884

+ char *j;

11885

+ char k, l, m, n = h;

11886

+ byte_fail_stack_type o;

11887

+ byte_fail_stack_elt_t *q;

11888

+ unsigned int s = (unsigned int)h;

11889

+ long t, u;

11890

+ char **v, *w, **x, **y, **t1;

11891

+ byte_register_info_type *z, *t2 = __builtin_alloca(s);

11892

+ x = __builtin_alloca(s);

11893

+ y = __builtin_alloca(s);

11894

+ z = __builtin_alloca(sizeof(byte_register_info_type));

11895

+ k = p4 + byte_re_match_2_internal_size2;

11896

+ if (p3)

11897

+ f = p4;

11898

+ for (;;) {

11899

+ if (h == h->used) {

11900

+ g = f;

11901

+ if (o.avail) {

11902

+ b = 1;

11903

+ for (; i < s; i++)

11904

+ t1[i] = w;

11905

+ goto fail;

11906

+ }

11907

+ e = 30 > s;

11908

+ d = p4;

11909

+ d[s] = 1;

11910

+ return;

11911

+ }

11912

+ switch (*h->buffer++) {

11913

+ case no_op:

11914

+ while (m && n ?: *g)

11915

+ ;

11916

+ y[*h->buffer] = z[*h->buffer].bits.match_null_string_p ? w == &a ?: w : w;

11917

+ w = g;

11918

+ if (t) {

11919

+ char r = h;

11920

+ while (r && z[r].bits.is_active)

11921

+ r--;

11922

+ if (r == 0)

11923

+ ;

11924

+ else

11925

+ u = r;

11926

+ }

11927

+ switch (*j++)

11928

+ case dummy_failure_jump:

11929

+ i = j;

11930

+ if (i)

11931

+ if (z[*h->buffer].bits.ever_matched_something) {

11932

+ unsigned r;

11933

+ z[*h->buffer].bits.ever_matched_something = r = *h->buffer;

11934

+ for (; r + *(h->buffer + 1); r++) {

11935

+ v = x[r];

11936

+ w[r] = y[r];

11937

+ }

11938

+ }

11939

+ break;

11940

+ case duplicate: {

11941

+ char *t3 = p2 + p3;

11942

+ if (t3)

11943

+ break;

11944

+ }

11945

+ if ((p3 ?: p4) == k)

11946

+ goto fail;

11947

+ case pop_failure_jump:

11948

+ for (; c; c--)

11949

+ t2[c].word = q[o.avail];

11950

+ char t4;

11951

+ q = t4 = __builtin_allocamemcpy(t4 ?: (p <<= 1));

11952

+ }

11953

+ continue;

11954

+ fail : {

11955

+ unsigned t5;

11956

+ t = q;

11957

+ t5 = u;

11958

+ for (; t5 >= t; t5--)

11959

+ v[t5] = q[--o.avail].pointer;

11960

+ switch (*h->buffer)

11961

+ case pop_failure_jump:

11962

+ goto fail;

11963

+ }

11964

+ m = &l;

11965

+ }

11966

+}

11967

--- a/src//dev/null

11968

+++ b/src/gcc/testsuite/gcc.target/arm/pr65729.c

11969

@@ -0,0 +1,10 @@

11970

+/* { dg-do compile } */

11971

+/* { dg-require-effective-target arm_hard_vfp_ok } */

11972

+/* { dg-options "-O2 -march=armv7-a -mfloat-abi=hard -mfpu=vfpv3-d16" } */

11973

+

11974

+int foo (void)

11975

+{

11976

+ double x = 0.0;

11977

+ asm volatile ("" : "+gw" (x));

11978

+ return x;

11979

+}

11980

--- a/src//dev/null

11981

+++ b/src/gcc/testsuite/gcc.target/arm/pr65924.c

11982

@@ -0,0 +1,9 @@

11983

+/* { dg-do compile } */

11984

+/* { dg-require-effective-target arm_thumb2_ok } */

11985

+/* { dg-options "-O2 -mthumb" } */

11986

+

11987

+int a, b, c;

11988

+int fn1() {

11989

+ if (b + a < 0)

11990

+ c = 0;

11991

+}

11992

--- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp

11993

+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp

11994

@@ -27,9 +27,22 @@ load_lib gcc-dg.exp

11995

# Initialize `dg'.

11996

dg-init

11997

11998

+# If the target hardware supports NEON, the default action is "run", otherwise

11999

+# just "compile".

12000

+global dg-do-what-default

12001

+set save-dg-do-what-default ${dg-do-what-default}

12002

+if {![check_effective_target_arm_neon_ok]} then {

12003

+ return

12004

+} elseif {[is-effective-target arm_neon_hw]} then {

12005

+ set dg-do-what-default run

12006

+} else {

12007

+ set dg-do-what-default compile

12008

+}

12009

+

12010

# Main loop.

12011

dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \

12012

"" ""

12013

12014

# All done.

12015

+set dg-do-what-default ${save-dg-do-what-default}

12016

dg-finish

12017

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c

12018

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c

12019

@@ -1,7 +1,5 @@

12020

/* Test the `vextQf32' ARM Neon intrinsic. */

12021

12022

-/* { dg-do run } */

12023

-/* { dg-require-effective-target arm_neon_ok } */

12024

/* { dg-options "-save-temps -O3 -fno-inline" } */

12025

/* { dg-add-options arm_neon } */

12026

12027

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c

12028

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c

12029

@@ -1,7 +1,5 @@

12030

/* Test the `vextQp16' ARM Neon intrinsic. */

12031

12032

-/* { dg-do run } */

12033

-/* { dg-require-effective-target arm_neon_ok } */

12034

/* { dg-options "-save-temps -O3 -fno-inline" } */

12035

/* { dg-add-options arm_neon } */

12036

12037

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c

12038

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c

12039

@@ -1,6 +1,5 @@

12040

/* Test the `vextQp64' ARM Neon intrinsic. */

12041

12042

-/* { dg-do run } */

12043

/* { dg-require-effective-target arm_crypto_ok } */

12044

/* { dg-options "-save-temps -O3 -fno-inline" } */

12045

/* { dg-add-options arm_crypto } */

12046

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c

12047

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c

12048

@@ -1,7 +1,5 @@

12049

/* Test the `vextQp8' ARM Neon intrinsic. */

12050

12051

-/* { dg-do run } */

12052

-/* { dg-require-effective-target arm_neon_ok } */

12053

/* { dg-options "-save-temps -O3 -fno-inline" } */

12054

/* { dg-add-options arm_neon } */

12055

12056

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c

12057

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c

12058

@@ -1,7 +1,5 @@

12059

/* Test the `vextQs16' ARM Neon intrinsic. */

12060

12061

-/* { dg-do run } */

12062

-/* { dg-require-effective-target arm_neon_ok } */

12063

/* { dg-options "-save-temps -O3 -fno-inline" } */

12064

/* { dg-add-options arm_neon } */

12065

12066

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c

12067

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c

12068

@@ -1,7 +1,5 @@

12069

/* Test the `vextQs32' ARM Neon intrinsic. */

12070

12071

-/* { dg-do run } */

12072

-/* { dg-require-effective-target arm_neon_ok } */

12073

/* { dg-options "-save-temps -O3 -fno-inline" } */

12074

/* { dg-add-options arm_neon } */

12075

12076

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c

12077

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c

12078

@@ -1,7 +1,5 @@

12079

/* Test the `vextQs64' ARM Neon intrinsic. */

12080

12081

-/* { dg-do run } */

12082

-/* { dg-require-effective-target arm_neon_ok } */

12083

/* { dg-options "-save-temps -O3 -fno-inline" } */

12084

/* { dg-add-options arm_neon } */

12085

12086

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c

12087

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c

12088

@@ -1,7 +1,5 @@

12089

/* Test the `vextQs8' ARM Neon intrinsic. */

12090

12091

-/* { dg-do run } */

12092

-/* { dg-require-effective-target arm_neon_ok } */

12093

/* { dg-options "-save-temps -O3 -fno-inline" } */

12094

/* { dg-add-options arm_neon } */

12095

12096

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c

12097

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c

12098

@@ -1,7 +1,5 @@

12099

/* Test the `vextQu16' ARM Neon intrinsic. */

12100

12101

-/* { dg-do run } */

12102

-/* { dg-require-effective-target arm_neon_ok } */

12103

/* { dg-options "-save-temps -O3 -fno-inline" } */

12104

/* { dg-add-options arm_neon } */

12105

12106

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c

12107

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c

12108

@@ -1,7 +1,5 @@

12109

/* Test the `vextQu32' ARM Neon intrinsic. */

12110

12111

-/* { dg-do run } */

12112

-/* { dg-require-effective-target arm_neon_ok } */

12113

/* { dg-options "-save-temps -O3 -fno-inline" } */

12114

/* { dg-add-options arm_neon } */

12115

12116

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c

12117

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c

12118

@@ -1,7 +1,5 @@

12119

/* Test the `vextQu64' ARM Neon intrinsic. */

12120

12121

-/* { dg-do run } */

12122

-/* { dg-require-effective-target arm_neon_ok } */

12123

/* { dg-options "-save-temps -O3 -fno-inline" } */

12124

/* { dg-add-options arm_neon } */

12125

12126

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c

12127

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c

12128

@@ -1,7 +1,5 @@

12129

/* Test the `vextQu8' ARM Neon intrinsic. */

12130

12131

-/* { dg-do run } */

12132

-/* { dg-require-effective-target arm_neon_ok } */

12133

/* { dg-options "-save-temps -O3 -fno-inline" } */

12134

/* { dg-add-options arm_neon } */

12135

12136

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c

12137

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c

12138

@@ -1,7 +1,5 @@

12139

/* Test the `vextf32' ARM Neon intrinsic. */

12140

12141

-/* { dg-do run } */

12142

-/* { dg-require-effective-target arm_neon_ok } */

12143

/* { dg-options "-save-temps -O3 -fno-inline" } */

12144

/* { dg-add-options arm_neon } */

12145

12146

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c

12147

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c

12148

@@ -1,7 +1,5 @@

12149

/* Test the `vextp16' ARM Neon intrinsic. */

12150

12151

-/* { dg-do run } */

12152

-/* { dg-require-effective-target arm_neon_ok } */

12153

/* { dg-options "-save-temps -O3 -fno-inline" } */

12154

/* { dg-add-options arm_neon } */

12155

12156

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c

12157

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c

12158

@@ -1,6 +1,5 @@

12159

/* Test the `vextp64' ARM Neon intrinsic. */

12160

12161

-/* { dg-do run } */

12162

/* { dg-require-effective-target arm_crypto_ok } */

12163

/* { dg-options "-save-temps -O3 -fno-inline" } */

12164

/* { dg-add-options arm_crypto } */

12165

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c

12166

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c

12167

@@ -1,7 +1,5 @@

12168

/* Test the `vextp8' ARM Neon intrinsic. */

12169

12170

-/* { dg-do run } */

12171

-/* { dg-require-effective-target arm_neon_ok } */

12172

/* { dg-options "-save-temps -O3 -fno-inline" } */

12173

/* { dg-add-options arm_neon } */

12174

12175

--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c

12176

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c

12177

@@ -1,7 +1,5 @@

12178

/* Test the `vexts16' ARM Neon intrinsic. */

12179

12180

-/* { dg-do run } */

12181

-/* { dg-require-effective-target arm_neon_ok } */

12182

/* { dg-options "-save-temps -O3 -fno-inline" } */

12183

/* { dg-add-options arm_neon } */

12184

12185

--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c

12186

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c

12187

@@ -1,7 +1,5 @@

12188

/* Test the `vexts32' ARM Neon intrinsic. */

12189

12190

-/* { dg-do run } */

12191

-/* { dg-require-effective-target arm_neon_ok } */

12192

/* { dg-options "-save-temps -O3 -fno-inline" } */

12193

/* { dg-add-options arm_neon } */

12194

12195

--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c

12196

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c

12197

@@ -1,7 +1,5 @@

12198

/* Test the `vexts64' ARM Neon intrinsic. */

12199

12200

-/* { dg-do run } */

12201

-/* { dg-require-effective-target arm_neon_ok } */

12202

/* { dg-options "-save-temps -O3 -fno-inline" } */

12203

/* { dg-add-options arm_neon } */

12204

12205

--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c

12206

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c

12207

@@ -1,7 +1,5 @@

12208

/* Test the `vexts8' ARM Neon intrinsic. */

12209

12210

-/* { dg-do run } */

12211

-/* { dg-require-effective-target arm_neon_ok } */

12212

/* { dg-options "-save-temps -O3 -fno-inline" } */

12213

/* { dg-add-options arm_neon } */

12214

12215

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c

12216

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c

12217

@@ -1,7 +1,5 @@

12218

/* Test the `vextu16' ARM Neon intrinsic. */

12219

12220

-/* { dg-do run } */

12221

-/* { dg-require-effective-target arm_neon_ok } */

12222

/* { dg-options "-save-temps -O3 -fno-inline" } */

12223

/* { dg-add-options arm_neon } */

12224

12225

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c

12226

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c

12227

@@ -1,7 +1,5 @@

12228

/* Test the `vextu32' ARM Neon intrinsic. */

12229

12230

-/* { dg-do run } */

12231

-/* { dg-require-effective-target arm_neon_ok } */

12232

/* { dg-options "-save-temps -O3 -fno-inline" } */

12233

/* { dg-add-options arm_neon } */

12234

12235

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c

12236

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c

12237

@@ -1,7 +1,5 @@

12238

/* Test the `vextu64' ARM Neon intrinsic. */

12239

12240

-/* { dg-do run } */

12241

-/* { dg-require-effective-target arm_neon_ok } */

12242

/* { dg-options "-save-temps -O3 -fno-inline" } */

12243

/* { dg-add-options arm_neon } */

12244

12245

--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c

12246

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c

12247

@@ -1,7 +1,5 @@

12248

/* Test the `vextu8' ARM Neon intrinsic. */

12249

12250

-/* { dg-do run } */

12251

-/* { dg-require-effective-target arm_neon_ok } */

12252

/* { dg-options "-save-temps -O3 -fno-inline" } */

12253

/* { dg-add-options arm_neon } */

12254

12255

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c

12256

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c

12257

@@ -1,7 +1,5 @@

12258

/* Test the `vrev16p8' ARM Neon intrinsic. */

12259

12260

-/* { dg-do run } */

12261

-/* { dg-require-effective-target arm_neon_ok } */

12262

/* { dg-options "-save-temps -fno-inline" } */

12263

/* { dg-add-options arm_neon } */

12264

12265

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c

12266

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c

12267

@@ -1,7 +1,5 @@

12268

/* Test the `vrev16q_p8' ARM Neon intrinsic. */

12269

12270

-/* { dg-do run } */

12271

-/* { dg-require-effective-target arm_neon_ok } */

12272

/* { dg-options "-save-temps -fno-inline" } */

12273

/* { dg-add-options arm_neon } */

12274

12275

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c

12276

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c

12277

@@ -1,7 +1,5 @@

12278

/* Test the `vrev16q_s8' ARM Neon intrinsic. */

12279

12280

-/* { dg-do run } */

12281

-/* { dg-require-effective-target arm_neon_ok } */

12282

/* { dg-options "-save-temps -fno-inline" } */

12283

/* { dg-add-options arm_neon } */

12284

12285

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c

12286

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c

12287

@@ -1,7 +1,5 @@

12288

/* Test the `vrev16q_u8' ARM Neon intrinsic. */

12289

12290

-/* { dg-do run } */

12291

-/* { dg-require-effective-target arm_neon_ok } */

12292

/* { dg-options "-save-temps -fno-inline" } */

12293

/* { dg-add-options arm_neon } */

12294

12295

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c

12296

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c

12297

@@ -1,7 +1,5 @@

12298

/* Test the `vrev16s8' ARM Neon intrinsic. */

12299

12300

-/* { dg-do run } */

12301

-/* { dg-require-effective-target arm_neon_ok } */

12302

/* { dg-options "-save-temps -fno-inline" } */

12303

/* { dg-add-options arm_neon } */

12304

12305

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c

12306

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c

12307

@@ -1,7 +1,5 @@

12308

/* Test the `vrev16u8' ARM Neon intrinsic. */

12309

12310

-/* { dg-do run } */

12311

-/* { dg-require-effective-target arm_neon_ok } */

12312

/* { dg-options "-save-temps -fno-inline" } */

12313

/* { dg-add-options arm_neon } */

12314

12315

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c

12316

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c

12317

@@ -1,7 +1,5 @@

12318

/* Test the `vrev32p16' ARM Neon intrinsic. */

12319

12320

-/* { dg-do run } */

12321

-/* { dg-require-effective-target arm_neon_ok } */

12322

/* { dg-options "-save-temps -fno-inline" } */

12323

/* { dg-add-options arm_neon } */

12324

12325

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c

12326

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c

12327

@@ -1,7 +1,5 @@

12328

/* Test the `vrev32p8' ARM Neon intrinsic. */

12329

12330

-/* { dg-do run } */

12331

-/* { dg-require-effective-target arm_neon_ok } */

12332

/* { dg-options "-save-temps -fno-inline" } */

12333

/* { dg-add-options arm_neon } */

12334

12335

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c

12336

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c

12337

@@ -1,7 +1,5 @@

12338

/* Test the `vrev32q_p16' ARM Neon intrinsic. */

12339

12340

-/* { dg-do run } */

12341

-/* { dg-require-effective-target arm_neon_ok } */

12342

/* { dg-options "-save-temps -fno-inline" } */

12343

/* { dg-add-options arm_neon } */

12344

12345

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c

12346

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c

12347

@@ -1,7 +1,5 @@

12348

/* Test the `vrev32q_p8' ARM Neon intrinsic. */

12349

12350

-/* { dg-do run } */

12351

-/* { dg-require-effective-target arm_neon_ok } */

12352

/* { dg-options "-save-temps -fno-inline" } */

12353

/* { dg-add-options arm_neon } */

12354

12355

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c

12356

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c

12357

@@ -1,7 +1,5 @@

12358

/* Test the `vrev32q_s16' ARM Neon intrinsic. */

12359

12360

-/* { dg-do run } */

12361

-/* { dg-require-effective-target arm_neon_ok } */

12362

/* { dg-options "-save-temps -fno-inline" } */

12363

/* { dg-add-options arm_neon } */

12364

12365

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c

12366

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c

12367

@@ -1,7 +1,5 @@

12368

/* Test the `vrev32q_s8' ARM Neon intrinsic. */

12369

12370

-/* { dg-do run } */

12371

-/* { dg-require-effective-target arm_neon_ok } */

12372

/* { dg-options "-save-temps -fno-inline" } */

12373

/* { dg-add-options arm_neon } */

12374

12375

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c

12376

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c

12377

@@ -1,7 +1,5 @@

12378

/* Test the `vrev32q_u16' ARM Neon intrinsic. */

12379

12380

-/* { dg-do run } */

12381

-/* { dg-require-effective-target arm_neon_ok } */

12382

/* { dg-options "-save-temps -fno-inline" } */

12383

/* { dg-add-options arm_neon } */

12384

12385

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c

12386

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c

12387

@@ -1,7 +1,5 @@

12388

/* Test the `vrev32q_u8' ARM Neon intrinsic. */

12389

12390

-/* { dg-do run } */

12391

-/* { dg-require-effective-target arm_neon_ok } */

12392

/* { dg-options "-save-temps -fno-inline" } */

12393

/* { dg-add-options arm_neon } */

12394

12395

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c

12396

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c

12397

@@ -1,7 +1,5 @@

12398

/* Test the `vrev32s16' ARM Neon intrinsic. */

12399

12400

-/* { dg-do run } */

12401

-/* { dg-require-effective-target arm_neon_ok } */

12402

/* { dg-options "-save-temps -fno-inline" } */

12403

/* { dg-add-options arm_neon } */

12404

12405

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c

12406

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c

12407

@@ -1,7 +1,5 @@

12408

/* Test the `vrev32s8' ARM Neon intrinsic. */

12409

12410

-/* { dg-do run } */

12411

-/* { dg-require-effective-target arm_neon_ok } */

12412

/* { dg-options "-save-temps -fno-inline" } */

12413

/* { dg-add-options arm_neon } */

12414

12415

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c

12416

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c

12417

@@ -1,7 +1,5 @@

12418

/* Test the `vrev32u16' ARM Neon intrinsic. */

12419

12420

-/* { dg-do run } */

12421

-/* { dg-require-effective-target arm_neon_ok } */

12422

/* { dg-options "-save-temps -fno-inline" } */

12423

/* { dg-add-options arm_neon } */

12424

12425

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c

12426

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c

12427

@@ -1,7 +1,5 @@

12428

/* Test the `vrev32u8' ARM Neon intrinsic. */

12429

12430

-/* { dg-do run } */

12431

-/* { dg-require-effective-target arm_neon_ok } */

12432

/* { dg-options "-save-temps -fno-inline" } */

12433

/* { dg-add-options arm_neon } */

12434

12435

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c

12436

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c

12437

@@ -1,7 +1,5 @@

12438

/* Test the `vrev64f32' ARM Neon intrinsic. */

12439

12440

-/* { dg-do run } */

12441

-/* { dg-require-effective-target arm_neon_ok } */

12442

/* { dg-options "-save-temps -fno-inline" } */

12443

/* { dg-add-options arm_neon } */

12444

12445

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c

12446

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c

12447

@@ -1,7 +1,5 @@

12448

/* Test the `vrev64p16' ARM Neon intrinsic. */

12449

12450

-/* { dg-do run } */

12451

-/* { dg-require-effective-target arm_neon_ok } */

12452

/* { dg-options "-save-temps -fno-inline" } */

12453

/* { dg-add-options arm_neon } */

12454

12455

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c

12456

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c

12457

@@ -1,7 +1,5 @@

12458

/* Test the `vrev64p8' ARM Neon intrinsic. */

12459

12460

-/* { dg-do run } */

12461

-/* { dg-require-effective-target arm_neon_ok } */

12462

/* { dg-options "-save-temps -fno-inline" } */

12463

/* { dg-add-options arm_neon } */

12464

12465

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c

12466

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c

12467

@@ -1,7 +1,5 @@

12468

/* Test the `vrev64q_f32' ARM Neon intrinsic. */

12469

12470

-/* { dg-do run } */

12471

-/* { dg-require-effective-target arm_neon_ok } */

12472

/* { dg-options "-save-temps -fno-inline" } */

12473

/* { dg-add-options arm_neon } */

12474

12475

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c

12476

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c

12477

@@ -1,7 +1,5 @@

12478

/* Test the `vrev64q_p16' ARM Neon intrinsic. */

12479

12480

-/* { dg-do run } */

12481

-/* { dg-require-effective-target arm_neon_ok } */

12482

/* { dg-options "-save-temps -fno-inline" } */

12483

/* { dg-add-options arm_neon } */

12484

12485

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c

12486

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c

12487

@@ -1,7 +1,5 @@

12488

/* Test the `vrev64q_p8' ARM Neon intrinsic. */

12489

12490

-/* { dg-do run } */

12491

-/* { dg-require-effective-target arm_neon_ok } */

12492

/* { dg-options "-save-temps -fno-inline" } */

12493

/* { dg-add-options arm_neon } */

12494

12495

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c

12496

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c

12497

@@ -1,7 +1,5 @@

12498

/* Test the `vrev64q_s16' ARM Neon intrinsic. */

12499

12500

-/* { dg-do run } */

12501

-/* { dg-require-effective-target arm_neon_ok } */

12502

/* { dg-options "-save-temps -fno-inline" } */

12503

/* { dg-add-options arm_neon } */

12504

12505

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c

12506

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c

12507

@@ -1,7 +1,5 @@

12508

/* Test the `vrev64q_s32' ARM Neon intrinsic. */

12509

12510

-/* { dg-do run } */

12511

-/* { dg-require-effective-target arm_neon_ok } */

12512

/* { dg-options "-save-temps -fno-inline" } */

12513

/* { dg-add-options arm_neon } */

12514

12515

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c

12516

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c

12517

@@ -1,7 +1,5 @@

12518

/* Test the `vrev64q_s8' ARM Neon intrinsic. */

12519

12520

-/* { dg-do run } */

12521

-/* { dg-require-effective-target arm_neon_ok } */

12522

/* { dg-options "-save-temps -fno-inline" } */

12523

/* { dg-add-options arm_neon } */

12524

12525

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c

12526

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c

12527

@@ -1,7 +1,5 @@

12528

/* Test the `vrev64q_u16' ARM Neon intrinsic. */

12529

12530

-/* { dg-do run } */

12531

-/* { dg-require-effective-target arm_neon_ok } */

12532

/* { dg-options "-save-temps -fno-inline" } */

12533

/* { dg-add-options arm_neon } */

12534

12535

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c

12536

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c

12537

@@ -1,7 +1,5 @@

12538

/* Test the `vrev64q_u32' ARM Neon intrinsic. */

12539

12540

-/* { dg-do run } */

12541

-/* { dg-require-effective-target arm_neon_ok } */

12542

/* { dg-options "-save-temps -fno-inline" } */

12543

/* { dg-add-options arm_neon } */

12544

12545

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c

12546

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c

12547

@@ -1,7 +1,5 @@

12548

/* Test the `vrev64q_u8' ARM Neon intrinsic. */

12549

12550

-/* { dg-do run } */

12551

-/* { dg-require-effective-target arm_neon_ok } */

12552

/* { dg-options "-save-temps -fno-inline" } */

12553

/* { dg-add-options arm_neon } */

12554

12555

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c

12556

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c

12557

@@ -1,7 +1,5 @@

12558

/* Test the `vrev64s16' ARM Neon intrinsic. */

12559

12560

-/* { dg-do run } */

12561

-/* { dg-require-effective-target arm_neon_ok } */

12562

/* { dg-options "-save-temps -fno-inline" } */

12563

/* { dg-add-options arm_neon } */

12564

12565

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c

12566

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c

12567

@@ -1,7 +1,5 @@

12568

/* Test the `vrev64s32' ARM Neon intrinsic. */

12569

12570

-/* { dg-do run } */

12571

-/* { dg-require-effective-target arm_neon_ok } */

12572

/* { dg-options "-save-temps -fno-inline" } */

12573

/* { dg-add-options arm_neon } */

12574

12575

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c

12576

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c

12577

@@ -1,7 +1,5 @@

12578

/* Test the `vrev64s8' ARM Neon intrinsic. */

12579

12580

-/* { dg-do run } */

12581

-/* { dg-require-effective-target arm_neon_ok } */

12582

/* { dg-options "-save-temps -fno-inline" } */

12583

/* { dg-add-options arm_neon } */

12584

12585

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c

12586

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c

12587

@@ -1,7 +1,5 @@

12588

/* Test the `vrev64u16' ARM Neon intrinsic. */

12589

12590

-/* { dg-do run } */

12591

-/* { dg-require-effective-target arm_neon_ok } */

12592

/* { dg-options "-save-temps -fno-inline" } */

12593

/* { dg-add-options arm_neon } */

12594

12595

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c

12596

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c

12597

@@ -1,7 +1,5 @@

12598

/* Test the `vrev64u32' ARM Neon intrinsic. */

12599

12600

-/* { dg-do run } */

12601

-/* { dg-require-effective-target arm_neon_ok } */

12602

/* { dg-options "-save-temps -fno-inline" } */

12603

/* { dg-add-options arm_neon } */

12604

12605

--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c

12606

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c

12607

@@ -1,7 +1,5 @@

12608

/* Test the `vrev64u8' ARM Neon intrinsic. */

12609

12610

-/* { dg-do run } */

12611

-/* { dg-require-effective-target arm_neon_ok } */

12612

/* { dg-options "-save-temps -fno-inline" } */

12613

/* { dg-add-options arm_neon } */

12614

12615

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c

12616

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c

12617

@@ -1,7 +1,5 @@

12618

/* Test the `vtrnf32' ARM Neon intrinsic. */

12619

12620

-/* { dg-do run } */

12621

-/* { dg-require-effective-target arm_neon_ok } */

12622

/* { dg-options "-save-temps -O1 -fno-inline" } */

12623

/* { dg-add-options arm_neon } */

12624

12625

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c

12626

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c

12627

@@ -1,7 +1,5 @@

12628

/* Test the `vtrnp16' ARM Neon intrinsic. */

12629

12630

-/* { dg-do run } */

12631

-/* { dg-require-effective-target arm_neon_ok } */

12632

/* { dg-options "-save-temps -O1 -fno-inline" } */

12633

/* { dg-add-options arm_neon } */

12634

12635

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c

12636

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c

12637

@@ -1,7 +1,5 @@

12638

/* Test the `vtrnp8' ARM Neon intrinsic. */

12639

12640

-/* { dg-do run } */

12641

-/* { dg-require-effective-target arm_neon_ok } */

12642

/* { dg-options "-save-temps -O1 -fno-inline" } */

12643

/* { dg-add-options arm_neon } */

12644

12645

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c

12646

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c

12647

@@ -1,7 +1,5 @@

12648

/* Test the `vtrnQf32' ARM Neon intrinsic. */

12649

12650

-/* { dg-do run } */

12651

-/* { dg-require-effective-target arm_neon_ok } */

12652

/* { dg-options "-save-temps -O1 -fno-inline" } */

12653

/* { dg-add-options arm_neon } */

12654

12655

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c

12656

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c

12657

@@ -1,7 +1,5 @@

12658

/* Test the `vtrnQp16' ARM Neon intrinsic. */

12659

12660

-/* { dg-do run } */

12661

-/* { dg-require-effective-target arm_neon_ok } */

12662

/* { dg-options "-save-temps -O1 -fno-inline" } */

12663

/* { dg-add-options arm_neon } */

12664

12665

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c

12666

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c

12667

@@ -1,7 +1,5 @@

12668

/* Test the `vtrnQp8' ARM Neon intrinsic. */

12669

12670

-/* { dg-do run } */

12671

-/* { dg-require-effective-target arm_neon_ok } */

12672

/* { dg-options "-save-temps -O1 -fno-inline" } */

12673

/* { dg-add-options arm_neon } */

12674

12675

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c

12676

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c

12677

@@ -1,7 +1,5 @@

12678

/* Test the `vtrnQs16' ARM Neon intrinsic. */

12679

12680

-/* { dg-do run } */

12681

-/* { dg-require-effective-target arm_neon_ok } */

12682

/* { dg-options "-save-temps -O1 -fno-inline" } */

12683

/* { dg-add-options arm_neon } */

12684

12685

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c

12686

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c

12687

@@ -1,7 +1,5 @@

12688

/* Test the `vtrnQs32' ARM Neon intrinsic. */

12689

12690

-/* { dg-do run } */

12691

-/* { dg-require-effective-target arm_neon_ok } */

12692

/* { dg-options "-save-temps -O1 -fno-inline" } */

12693

/* { dg-add-options arm_neon } */

12694

12695

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c

12696

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c

12697

@@ -1,7 +1,5 @@

12698

/* Test the `vtrnQs8' ARM Neon intrinsic. */

12699

12700

-/* { dg-do run } */

12701

-/* { dg-require-effective-target arm_neon_ok } */

12702

/* { dg-options "-save-temps -O1 -fno-inline" } */

12703

/* { dg-add-options arm_neon } */

12704

12705

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c

12706

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c

12707

@@ -1,7 +1,5 @@

12708

/* Test the `vtrnQu16' ARM Neon intrinsic. */

12709

12710

-/* { dg-do run } */

12711

-/* { dg-require-effective-target arm_neon_ok } */

12712

/* { dg-options "-save-temps -O1 -fno-inline" } */

12713

/* { dg-add-options arm_neon } */

12714

12715

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c

12716

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c

12717

@@ -1,7 +1,5 @@

12718

/* Test the `vtrnQu32' ARM Neon intrinsic. */

12719

12720

-/* { dg-do run } */

12721

-/* { dg-require-effective-target arm_neon_ok } */

12722

/* { dg-options "-save-temps -O1 -fno-inline" } */

12723

/* { dg-add-options arm_neon } */

12724

12725

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c

12726

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c

12727

@@ -1,7 +1,5 @@

12728

/* Test the `vtrnQu8' ARM Neon intrinsic. */

12729

12730

-/* { dg-do run } */

12731

-/* { dg-require-effective-target arm_neon_ok } */

12732

/* { dg-options "-save-temps -O1 -fno-inline" } */

12733

/* { dg-add-options arm_neon } */

12734

12735

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c

12736

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c

12737

@@ -1,7 +1,5 @@

12738

/* Test the `vtrns16' ARM Neon intrinsic. */

12739

12740

-/* { dg-do run } */

12741

-/* { dg-require-effective-target arm_neon_ok } */

12742

/* { dg-options "-save-temps -O1 -fno-inline" } */

12743

/* { dg-add-options arm_neon } */

12744

12745

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c

12746

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c

12747

@@ -1,7 +1,5 @@

12748

/* Test the `vtrns32' ARM Neon intrinsic. */

12749

12750

-/* { dg-do run } */

12751

-/* { dg-require-effective-target arm_neon_ok } */

12752

/* { dg-options "-save-temps -O1 -fno-inline" } */

12753

/* { dg-add-options arm_neon } */

12754

12755

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c

12756

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c

12757

@@ -1,7 +1,5 @@

12758

/* Test the `vtrns8' ARM Neon intrinsic. */

12759

12760

-/* { dg-do run } */

12761

-/* { dg-require-effective-target arm_neon_ok } */

12762

/* { dg-options "-save-temps -O1 -fno-inline" } */

12763

/* { dg-add-options arm_neon } */

12764

12765

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c

12766

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c

12767

@@ -1,7 +1,5 @@

12768

/* Test the `vtrnu16' ARM Neon intrinsic. */

12769

12770

-/* { dg-do run } */

12771

-/* { dg-require-effective-target arm_neon_ok } */

12772

/* { dg-options "-save-temps -O1 -fno-inline" } */

12773

/* { dg-add-options arm_neon } */

12774

12775

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c

12776

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c

12777

@@ -1,7 +1,5 @@

12778

/* Test the `vtrnu32' ARM Neon intrinsic. */

12779

12780

-/* { dg-do run } */

12781

-/* { dg-require-effective-target arm_neon_ok } */

12782

/* { dg-options "-save-temps -O1 -fno-inline" } */

12783

/* { dg-add-options arm_neon } */

12784

12785

--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c

12786

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c

12787

@@ -1,7 +1,5 @@

12788

/* Test the `vtrnu8' ARM Neon intrinsic. */

12789

12790

-/* { dg-do run } */

12791

-/* { dg-require-effective-target arm_neon_ok } */

12792

/* { dg-options "-save-temps -O1 -fno-inline" } */

12793

/* { dg-add-options arm_neon } */

12794

12795

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c

12796

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c

12797

@@ -1,7 +1,5 @@

12798

/* Test the `vuzpf32' ARM Neon intrinsic. */

12799

12800

-/* { dg-do run } */

12801

-/* { dg-require-effective-target arm_neon_ok } */

12802

/* { dg-options "-save-temps -O1 -fno-inline" } */

12803

/* { dg-add-options arm_neon } */

12804

12805

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c

12806

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c

12807

@@ -1,7 +1,5 @@

12808

/* Test the `vuzpp16' ARM Neon intrinsic. */

12809

12810

-/* { dg-do run } */

12811

-/* { dg-require-effective-target arm_neon_ok } */

12812

/* { dg-options "-save-temps -O1 -fno-inline" } */

12813

/* { dg-add-options arm_neon } */

12814

12815

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c

12816

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c

12817

@@ -1,7 +1,5 @@

12818

/* Test the `vuzpp8' ARM Neon intrinsic. */

12819

12820

-/* { dg-do run } */

12821

-/* { dg-require-effective-target arm_neon_ok } */

12822

/* { dg-options "-save-temps -O1 -fno-inline" } */

12823

/* { dg-add-options arm_neon } */

12824

12825

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c

12826

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c

12827

@@ -1,7 +1,5 @@

12828

/* Test the `vuzpQf32' ARM Neon intrinsic. */

12829

12830

-/* { dg-do run } */

12831

-/* { dg-require-effective-target arm_neon_ok } */

12832

/* { dg-options "-save-temps -O1 -fno-inline" } */

12833

/* { dg-add-options arm_neon } */

12834

12835

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c

12836

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c

12837

@@ -1,7 +1,5 @@

12838

/* Test the `vuzpQp16' ARM Neon intrinsic. */

12839

12840

-/* { dg-do run } */

12841

-/* { dg-require-effective-target arm_neon_ok } */

12842

/* { dg-options "-save-temps -O1 -fno-inline" } */

12843

/* { dg-add-options arm_neon } */

12844

12845

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c

12846

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c

12847

@@ -1,7 +1,5 @@

12848

/* Test the `vuzpQp8' ARM Neon intrinsic. */

12849

12850

-/* { dg-do run } */

12851

-/* { dg-require-effective-target arm_neon_ok } */

12852

/* { dg-options "-save-temps -O1 -fno-inline" } */

12853

/* { dg-add-options arm_neon } */

12854

12855

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c

12856

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c

12857

@@ -1,7 +1,5 @@

12858

/* Test the `vuzpQs16' ARM Neon intrinsic. */

12859

12860

-/* { dg-do run } */

12861

-/* { dg-require-effective-target arm_neon_ok } */

12862

/* { dg-options "-save-temps -O1 -fno-inline" } */

12863

/* { dg-add-options arm_neon } */

12864

12865

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c

12866

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c

12867

@@ -1,7 +1,5 @@

12868

/* Test the `vuzpQs32' ARM Neon intrinsic. */

12869

12870

-/* { dg-do run } */

12871

-/* { dg-require-effective-target arm_neon_ok } */

12872

/* { dg-options "-save-temps -O1 -fno-inline" } */

12873

/* { dg-add-options arm_neon } */

12874

12875

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c

12876

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c

12877

@@ -1,7 +1,5 @@

12878

/* Test the `vuzpQs8' ARM Neon intrinsic. */

12879

12880

-/* { dg-do run } */

12881

-/* { dg-require-effective-target arm_neon_ok } */

12882

/* { dg-options "-save-temps -O1 -fno-inline" } */

12883

/* { dg-add-options arm_neon } */

12884

12885

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c

12886

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c

12887

@@ -1,7 +1,5 @@

12888

/* Test the `vuzpQu16' ARM Neon intrinsic. */

12889

12890

-/* { dg-do run } */

12891

-/* { dg-require-effective-target arm_neon_ok } */

12892

/* { dg-options "-save-temps -O1 -fno-inline" } */

12893

/* { dg-add-options arm_neon } */

12894

12895

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c

12896

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c

12897

@@ -1,7 +1,5 @@

12898

/* Test the `vuzpQu32' ARM Neon intrinsic. */

12899

12900

-/* { dg-do run } */

12901

-/* { dg-require-effective-target arm_neon_ok } */

12902

/* { dg-options "-save-temps -O1 -fno-inline" } */

12903

/* { dg-add-options arm_neon } */

12904

12905

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c

12906

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c

12907

@@ -1,7 +1,5 @@

12908

/* Test the `vuzpQu8' ARM Neon intrinsic. */

12909

12910

-/* { dg-do run } */

12911

-/* { dg-require-effective-target arm_neon_ok } */

12912

/* { dg-options "-save-temps -O1 -fno-inline" } */

12913

/* { dg-add-options arm_neon } */

12914

12915

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c

12916

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c

12917

@@ -1,7 +1,5 @@

12918

/* Test the `vuzps16' ARM Neon intrinsic. */

12919

12920

-/* { dg-do run } */

12921

-/* { dg-require-effective-target arm_neon_ok } */

12922

/* { dg-options "-save-temps -O1 -fno-inline" } */

12923

/* { dg-add-options arm_neon } */

12924

12925

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c

12926

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c

12927

@@ -1,7 +1,5 @@

12928

/* Test the `vuzps32' ARM Neon intrinsic. */

12929

12930

-/* { dg-do run } */

12931

-/* { dg-require-effective-target arm_neon_ok } */

12932

/* { dg-options "-save-temps -O1 -fno-inline" } */

12933

/* { dg-add-options arm_neon } */

12934

12935

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c

12936

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c

12937

@@ -1,7 +1,5 @@

12938

/* Test the `vuzps8' ARM Neon intrinsic. */

12939

12940

-/* { dg-do run } */

12941

-/* { dg-require-effective-target arm_neon_ok } */

12942

/* { dg-options "-save-temps -O1 -fno-inline" } */

12943

/* { dg-add-options arm_neon } */

12944

12945

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c

12946

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c

12947

@@ -1,7 +1,5 @@

12948

/* Test the `vuzpu16' ARM Neon intrinsic. */

12949

12950

-/* { dg-do run } */

12951

-/* { dg-require-effective-target arm_neon_ok } */

12952

/* { dg-options "-save-temps -O1 -fno-inline" } */

12953

/* { dg-add-options arm_neon } */

12954

12955

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c

12956

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c

12957

@@ -1,7 +1,5 @@

12958

/* Test the `vuzpu32' ARM Neon intrinsic. */

12959

12960

-/* { dg-do run } */

12961

-/* { dg-require-effective-target arm_neon_ok } */

12962

/* { dg-options "-save-temps -O1 -fno-inline" } */

12963

/* { dg-add-options arm_neon } */

12964

12965

--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c

12966

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c

12967

@@ -1,7 +1,5 @@

12968

/* Test the `vuzpu8' ARM Neon intrinsic. */

12969

12970

-/* { dg-do run } */

12971

-/* { dg-require-effective-target arm_neon_ok } */

12972

/* { dg-options "-save-temps -O1 -fno-inline" } */

12973

/* { dg-add-options arm_neon } */

12974

12975

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c

12976

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c

12977

@@ -1,7 +1,5 @@

12978

/* Test the `vzipf32' ARM Neon intrinsic. */

12979

12980

-/* { dg-do run } */

12981

-/* { dg-require-effective-target arm_neon_ok } */

12982

/* { dg-options "-save-temps -O1 -fno-inline" } */

12983

/* { dg-add-options arm_neon } */

12984

12985

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c

12986

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c

12987

@@ -1,7 +1,5 @@

12988

/* Test the `vzipp16' ARM Neon intrinsic. */

12989

12990

-/* { dg-do run } */

12991

-/* { dg-require-effective-target arm_neon_ok } */

12992

/* { dg-options "-save-temps -O1 -fno-inline" } */

12993

/* { dg-add-options arm_neon } */

12994

12995

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c

12996

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c

12997

@@ -1,7 +1,5 @@

12998

/* Test the `vzipp8' ARM Neon intrinsic. */

12999

13000

-/* { dg-do run } */

13001

-/* { dg-require-effective-target arm_neon_ok } */

13002

/* { dg-options "-save-temps -O1 -fno-inline" } */

13003

/* { dg-add-options arm_neon } */

13004

13005

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c

13006

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c

13007

@@ -1,7 +1,5 @@

13008

/* Test the `vzipQf32' ARM Neon intrinsic. */

13009

13010

-/* { dg-do run } */

13011

-/* { dg-require-effective-target arm_neon_ok } */

13012

/* { dg-options "-save-temps -O1 -fno-inline" } */

13013

/* { dg-add-options arm_neon } */

13014

13015

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c

13016

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c

13017

@@ -1,7 +1,5 @@

13018

/* Test the `vzipQp16' ARM Neon intrinsic. */

13019

13020

-/* { dg-do run } */

13021

-/* { dg-require-effective-target arm_neon_ok } */

13022

/* { dg-options "-save-temps -O1 -fno-inline" } */

13023

/* { dg-add-options arm_neon } */

13024

13025

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c

13026

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c

13027

@@ -1,7 +1,5 @@

13028

/* Test the `vzipQp8' ARM Neon intrinsic. */

13029

13030

-/* { dg-do run } */

13031

-/* { dg-require-effective-target arm_neon_ok } */

13032

/* { dg-options "-save-temps -O1 -fno-inline" } */

13033

/* { dg-add-options arm_neon } */

13034

13035

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c

13036

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c

13037

@@ -1,7 +1,5 @@

13038

/* Test the `vzipQs16' ARM Neon intrinsic. */

13039

13040

-/* { dg-do run } */

13041

-/* { dg-require-effective-target arm_neon_ok } */

13042

/* { dg-options "-save-temps -O1 -fno-inline" } */

13043

/* { dg-add-options arm_neon } */

13044

13045

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c

13046

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c

13047

@@ -1,7 +1,5 @@

13048

/* Test the `vzipQs32' ARM Neon intrinsic. */

13049

13050

-/* { dg-do run } */

13051

-/* { dg-require-effective-target arm_neon_ok } */

13052

/* { dg-options "-save-temps -O1 -fno-inline" } */

13053

/* { dg-add-options arm_neon } */

13054

13055

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c

13056

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c

13057

@@ -1,7 +1,5 @@

13058

/* Test the `vzipQs8' ARM Neon intrinsic. */

13059

13060

-/* { dg-do run } */

13061

-/* { dg-require-effective-target arm_neon_ok } */

13062

/* { dg-options "-save-temps -O1 -fno-inline" } */

13063

/* { dg-add-options arm_neon } */

13064

13065

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c

13066

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c

13067

@@ -1,7 +1,5 @@

13068

/* Test the `vzipQu16' ARM Neon intrinsic. */

13069

13070

-/* { dg-do run } */

13071

-/* { dg-require-effective-target arm_neon_ok } */

13072

/* { dg-options "-save-temps -O1 -fno-inline" } */

13073

/* { dg-add-options arm_neon } */

13074

13075

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c

13076

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c

13077

@@ -1,7 +1,5 @@

13078

/* Test the `vzipQu32' ARM Neon intrinsic. */

13079

13080

-/* { dg-do run } */

13081

-/* { dg-require-effective-target arm_neon_ok } */

13082

/* { dg-options "-save-temps -O1 -fno-inline" } */

13083

/* { dg-add-options arm_neon } */

13084

13085

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c

13086

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c

13087

@@ -1,7 +1,5 @@

13088

/* Test the `vzipQu8' ARM Neon intrinsic. */

13089

13090

-/* { dg-do run } */

13091

-/* { dg-require-effective-target arm_neon_ok } */

13092

/* { dg-options "-save-temps -O1 -fno-inline" } */

13093

/* { dg-add-options arm_neon } */

13094

13095

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c

13096

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c

13097

@@ -1,7 +1,5 @@

13098

/* Test the `vzips16' ARM Neon intrinsic. */

13099

13100

-/* { dg-do run } */

13101

-/* { dg-require-effective-target arm_neon_ok } */

13102

/* { dg-options "-save-temps -O1 -fno-inline" } */

13103

/* { dg-add-options arm_neon } */

13104

13105

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c

13106

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c

13107

@@ -1,7 +1,5 @@

13108

/* Test the `vzips32' ARM Neon intrinsic. */

13109

13110

-/* { dg-do run } */

13111

-/* { dg-require-effective-target arm_neon_ok } */

13112

/* { dg-options "-save-temps -O1 -fno-inline" } */

13113

/* { dg-add-options arm_neon } */

13114

13115

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c

13116

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c

13117

@@ -1,7 +1,5 @@

13118

/* Test the `vzips8' ARM Neon intrinsic. */

13119

13120

-/* { dg-do run } */

13121

-/* { dg-require-effective-target arm_neon_ok } */

13122

/* { dg-options "-save-temps -O1 -fno-inline" } */

13123

/* { dg-add-options arm_neon } */

13124

13125

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c

13126

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c

13127

@@ -1,7 +1,5 @@

13128

/* Test the `vzipu16' ARM Neon intrinsic. */

13129

13130

-/* { dg-do run } */

13131

-/* { dg-require-effective-target arm_neon_ok } */

13132

/* { dg-options "-save-temps -O1 -fno-inline" } */

13133

/* { dg-add-options arm_neon } */

13134

13135

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c

13136

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c

13137

@@ -1,7 +1,5 @@

13138

/* Test the `vzipu32' ARM Neon intrinsic. */

13139

13140

-/* { dg-do run } */

13141

-/* { dg-require-effective-target arm_neon_ok } */

13142

/* { dg-options "-save-temps -O1 -fno-inline" } */

13143

/* { dg-add-options arm_neon } */

13144

13145

--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c

13146

+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c

13147

@@ -1,7 +1,5 @@

13148

/* Test the `vzipu8' ARM Neon intrinsic. */

13149

13150

-/* { dg-do run } */

13151

-/* { dg-require-effective-target arm_neon_ok } */

13152

/* { dg-options "-save-temps -O1 -fno-inline" } */

13153

/* { dg-add-options arm_neon } */

13154

13155

--- a/src//dev/null

13156

+++ b/src/gcc/testsuite/gcc.target/arm/unsigned-float.c

13157

@@ -0,0 +1,20 @@

13158

+/* { dg-do compile } */

13159

+/* { dg-require-effective-target arm_vfp_ok } */

13160

+/* { dg-options "-march=armv7-a -O1 -mfloat-abi=softfp" } */

13161

+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */

13162

+

13163

+#include <stdint.h>

13164

+

13165

+double

13166

+f1 (uint16_t x)

13167

+{

13168

+ return (double)(float)x;

13169

+}

13170

+

13171

+float

13172

+f2 (uint16_t x)

13173

+{

13174

+ return (float)(double)x;

13175

+}

13176

+

13177

+/* { dg-final { scan-assembler-not "vcvt.(f32.f64|f64.f32)" } } */

13178

--- a/src/gcc/tree-ssa-loop-ivopts.c

13179

+++ b/src/gcc/tree-ssa-loop-ivopts.c

13180

@@ -226,6 +226,7 @@ struct cost_pair

13181

struct iv_use

13182

{

13183

unsigned id; /* The id of the use. */

13184

+ unsigned sub_id; /* The id of the sub use. */

13185

enum use_type type; /* Type of the use. */

13186

struct iv *iv; /* The induction variable it is based on. */

13187

gimple stmt; /* Statement in that it occurs. */

13188

@@ -239,6 +240,11 @@ struct iv_use

13189

13190

struct iv_cand *selected;

13191

/* The selected candidate. */

13192

+

13193

+ struct iv_use *next; /* The next sub use. */

13194

+ tree addr_base; /* Base address with const offset stripped. */

13195

+ unsigned HOST_WIDE_INT addr_offset;

13196

+ /* Const offset stripped from base address. */

13197

};

13198

13199

/* The position where the iv is computed. */

13200

@@ -555,7 +561,11 @@ dump_iv (FILE *file, struct iv *iv)

13201

void

13202

dump_use (FILE *file, struct iv_use *use)

13203

{

13204

- fprintf (file, "use %d\n", use->id);

13205

+ fprintf (file, "use %d", use->id);

13206

+ if (use->sub_id)

13207

+ fprintf (file, ".%d", use->sub_id);

13208

+

13209

+ fprintf (file, "\n");

13210

13211

switch (use->type)

13212

{

13213

@@ -604,8 +614,12 @@ dump_uses (FILE *file, struct ivopts_data *data)

13214

for (i = 0; i < n_iv_uses (data); i++)

13215

{

13216

use = iv_use (data, i);

13217

-

13218

- dump_use (file, use);

13219

+ do

13220

+ {

13221

+ dump_use (file, use);

13222

+ use = use->next;

13223

+ }

13224

+ while (use);

13225

fprintf (file, "\n");

13226

}

13227

}

13228

@@ -1326,33 +1340,84 @@ find_induction_variables (struct ivopts_data *data)

13229

return true;

13230

}

13231

13232

-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */

13233

+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.

13234

+ For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET

13235

+ is the const offset stripped from IV base. For uses of other types,

13236

+ ADDR_BASE and ADDR_OFFSET are zero by default. */

13237

13238

static struct iv_use *

13239

record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,

13240

- gimple stmt, enum use_type use_type)

13241

+ gimple stmt, enum use_type use_type, tree addr_base = NULL,

13242

+ unsigned HOST_WIDE_INT addr_offset = 0)

13243

{

13244

struct iv_use *use = XCNEW (struct iv_use);

13245

13246

use->id = n_iv_uses (data);

13247

+ use->sub_id = 0;

13248

use->type = use_type;

13249

use->iv = iv;

13250

use->stmt = stmt;

13251

use->op_p = use_p;

13252

use->related_cands = BITMAP_ALLOC (NULL);

13253

+ use->next = NULL;

13254

+ use->addr_base = addr_base;

13255

+ use->addr_offset = addr_offset;

13256

13257

/* To avoid showing ssa name in the dumps, if it was not reset by the

13258

caller. */

13259

iv->ssa_name = NULL_TREE;

13260

13261

- if (dump_file && (dump_flags & TDF_DETAILS))

13262

- dump_use (dump_file, use);

13263

-

13264

data->iv_uses.safe_push (use);

13265

13266

return use;

13267

}

13268

13269

+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.

13270

+ The sub use is recorded under the one whose use id is ID_GROUP. */

13271

+

13272

+static struct iv_use *

13273

+record_sub_use (struct ivopts_data *data, tree *use_p,

13274

+ struct iv *iv, gimple stmt, enum use_type use_type,

13275

+ tree addr_base, unsigned HOST_WIDE_INT addr_offset,

13276

+ unsigned int id_group)

13277

+{

13278

+ struct iv_use *use = XCNEW (struct iv_use);

13279

+ struct iv_use *group = iv_use (data, id_group);

13280

+

13281

+ use->id = group->id;

13282

+ use->sub_id = 0;

13283

+ use->type = use_type;

13284

+ use->iv = iv;

13285

+ use->stmt = stmt;

13286

+ use->op_p = use_p;

13287

+ use->related_cands = NULL;

13288

+ use->addr_base = addr_base;

13289

+ use->addr_offset = addr_offset;

13290

+

13291

+ /* Sub use list is maintained in offset ascending order. */

13292

+ if (addr_offset <= group->addr_offset)

13293

+ {

13294

+ use->related_cands = group->related_cands;

13295

+ group->related_cands = NULL;

13296

+ use->next = group;

13297

+ data->iv_uses[id_group] = use;

13298

+ }

13299

+ else

13300

+ {

13301

+ struct iv_use *pre;

13302

+ do

13303

+ {

13304

+ pre = group;

13305

+ group = group->next;

13306

+ }

13307

+ while (group && addr_offset > group->addr_offset);

13308

+ use->next = pre->next;

13309

+ pre->next = use;

13310

+ }

13311

+

13312

+ return use;

13313

+}

13314

+

13315

/* Checks whether OP is a loop-level invariant and if so, records it.

13316

NONLINEAR_USE is true if the invariant is used in a way we do not

13317

handle specially. */

13318

@@ -1837,6 +1902,50 @@ may_be_nonaddressable_p (tree expr)

13319

return false;

13320

}

13321

13322

+static tree

13323

+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);

13324

+

13325

+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.

13326

+ If there is an existing use which has same stripped iv base and step,

13327

+ this function records this one as a sub use to that; otherwise records

13328

+ it as a normal one. */

13329

+

13330

+static struct iv_use *

13331

+record_group_use (struct ivopts_data *data, tree *use_p,

13332

+ struct iv *iv, gimple stmt, enum use_type use_type)

13333

+{

13334

+ unsigned int i;

13335

+ struct iv_use *use;

13336

+ tree addr_base;

13337

+ unsigned HOST_WIDE_INT addr_offset;

13338

+

13339

+ /* Only support sub use for address type uses, that is, with base

13340

+ object. */

13341

+ if (!iv->base_object)

13342

+ return record_use (data, use_p, iv, stmt, use_type);

13343

+

13344

+ addr_base = strip_offset (iv->base, &addr_offset);

13345

+ for (i = 0; i < n_iv_uses (data); i++)

13346

+ {

13347

+ use = iv_use (data, i);

13348

+ if (use->type != USE_ADDRESS || !use->iv->base_object)

13349

+ continue;

13350

+

13351

+ /* Check if it has the same stripped base and step. */

13352

+ if (operand_equal_p (iv->base_object, use->iv->base_object, 0)

13353

+ && operand_equal_p (iv->step, use->iv->step, 0)

13354

+ && operand_equal_p (addr_base, use->addr_base, 0))

13355

+ break;

13356

+ }

13357

+

13358

+ if (i == n_iv_uses (data))

13359

+ return record_use (data, use_p, iv, stmt,

13360

+ use_type, addr_base, addr_offset);

13361

+ else

13362

+ return record_sub_use (data, use_p, iv, stmt,

13363

+ use_type, addr_base, addr_offset, i);

13364

+}

13365

+

13366

/* Finds addresses in *OP_P inside STMT. */

13367

13368

static void

13369

@@ -1947,7 +2056,7 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p

13370

}

13371

13372

civ = alloc_iv (base, step);

13373

- record_use (data, op_p, civ, stmt, USE_ADDRESS);

13374

+ record_group_use (data, op_p, civ, stmt, USE_ADDRESS);

13375

return;

13376

13377

fail:

13378

@@ -2133,6 +2242,172 @@ find_interesting_uses (struct ivopts_data *data)

13379

free (body);

13380

}

13381

13382

+/* Compute maximum offset of [base + offset] addressing mode

13383

+ for memory reference represented by USE. */

13384

+

13385

+static HOST_WIDE_INT

13386

+compute_max_addr_offset (struct iv_use *use)

13387

+{

13388

+ int width;

13389

+ rtx reg, addr;

13390

+ HOST_WIDE_INT i, off;

13391

+ unsigned list_index, num;

13392

+ addr_space_t as;

13393

+ machine_mode mem_mode, addr_mode;

13394

+ static vec<HOST_WIDE_INT> max_offset_list;

13395

+

13396

+ as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));

13397

+ mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));

13398

+

13399

+ num = max_offset_list.length ();

13400

+ list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;

13401

+ if (list_index >= num)

13402

+ {

13403

+ max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);

13404

+ for (; num < max_offset_list.length (); num++)

13405

+ max_offset_list[num] = -1;

13406

+ }

13407

+

13408

+ off = max_offset_list[list_index];

13409

+ if (off != -1)

13410

+ return off;

13411

+

13412

+ addr_mode = targetm.addr_space.address_mode (as);

13413

+ reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);

13414

+ addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);

13415

+

13416

+ width = GET_MODE_BITSIZE (addr_mode) - 1;

13417

+ if (width > (HOST_BITS_PER_WIDE_INT - 1))

13418

+ width = HOST_BITS_PER_WIDE_INT - 1;

13419

+

13420

+ for (i = width; i > 0; i--)

13421

+ {

13422

+ off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;

13423

+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);

13424

+ if (memory_address_addr_space_p (mem_mode, addr, as))

13425

+ break;

13426

+

13427

+ /* For some strict-alignment targets, the offset must be naturally

13428

+ aligned. Try an aligned offset if mem_mode is not QImode. */

13429

+ off = ((unsigned HOST_WIDE_INT) 1 << i);

13430

+ if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)

13431

+ {

13432

+ off -= GET_MODE_SIZE (mem_mode);

13433

+ XEXP (addr, 1) = gen_int_mode (off, addr_mode);

13434

+ if (memory_address_addr_space_p (mem_mode, addr, as))

13435

+ break;

13436

+ }

13437

+ }

13438

+ if (i == 0)

13439

+ off = 0;

13440

+

13441

+ max_offset_list[list_index] = off;

13442

+ return off;

13443

+}

13444

+

13445

+/* Check if all small groups should be split. Return true if and

13446

+ only if:

13447

+

13448

+ 1) At least one groups contain two uses with different offsets.

13449

+ 2) No group contains more than two uses with different offsets.

13450

+

13451

+ Return false otherwise. We want to split such groups because:

13452

+

13453

+ 1) Small groups don't have much benefit and may interfer with

13454

+ general candidate selection.

13455

+ 2) Size for problem with only small groups is usually small and

13456

+ general algorithm can handle it well.

13457

+

13458

+ TODO -- Above claim may not hold when auto increment is supported. */

13459

+

13460

+static bool

13461

+split_all_small_groups (struct ivopts_data *data)

13462

+{

13463

+ bool split_p = false;

13464

+ unsigned int i, n, distinct;

13465

+ struct iv_use *pre, *use;

13466

+

13467

+ n = n_iv_uses (data);

13468

+ for (i = 0; i < n; i++)

13469

+ {

13470

+ use = iv_use (data, i);

13471

+ if (!use->next)

13472

+ continue;

13473

+

13474

+ distinct = 1;

13475

+ gcc_assert (use->type == USE_ADDRESS);

13476

+ for (pre = use, use = use->next; use; pre = use, use = use->next)

13477

+ {

13478

+ if (pre->addr_offset != use->addr_offset)

13479

+ distinct++;

13480

+

13481

+ if (distinct > 2)

13482

+ return false;

13483

+ }

13484

+ if (distinct == 2)

13485

+ split_p = true;

13486

+ }

13487

+

13488

+ return split_p;

13489

+}

13490

+

13491

+/* For each group of address type uses, this function further groups

13492

+ these uses according to the maximum offset supported by target's

13493

+ [base + offset] addressing mode. */

13494

+

13495

+static void

13496

+group_address_uses (struct ivopts_data *data)

13497

+{

13498

+ HOST_WIDE_INT max_offset = -1;

13499

+ unsigned int i, n, sub_id;

13500

+ struct iv_use *pre, *use;

13501

+ unsigned HOST_WIDE_INT addr_offset_first;

13502

+

13503

+ /* Reset max offset to split all small groups. */

13504

+ if (split_all_small_groups (data))

13505

+ max_offset = 0;

13506

+

13507

+ n = n_iv_uses (data);

13508

+ for (i = 0; i < n; i++)

13509

+ {

13510

+ use = iv_use (data, i);

13511

+ if (!use->next)

13512

+ continue;

13513

+

13514

+ gcc_assert (use->type == USE_ADDRESS);

13515

+ if (max_offset != 0)

13516

+ max_offset = compute_max_addr_offset (use);

13517

+

13518

+ while (use)

13519

+ {

13520

+ sub_id = 0;

13521

+ addr_offset_first = use->addr_offset;

13522

+ /* Only uses with offset that can fit in offset part against

13523

+ the first use can be grouped together. */

13524

+ for (pre = use, use = use->next;

13525

+ use && (use->addr_offset - addr_offset_first

13526

+ <= (unsigned HOST_WIDE_INT) max_offset);

13527

+ pre = use, use = use->next)

13528

+ {

13529

+ use->id = pre->id;

13530

+ use->sub_id = ++sub_id;

13531

+ }

13532

+

13533

+ /* Break the list and create new group. */

13534

+ if (use)

13535

+ {

13536

+ pre->next = NULL;

13537

+ use->id = n_iv_uses (data);

13538

+ use->related_cands = BITMAP_ALLOC (NULL);

13539

+ data->iv_uses.safe_push (use);

13540

+ }

13541

+ }

13542

+ }

13543

+

13544

+ if (dump_file && (dump_flags & TDF_DETAILS))

13545

+ dump_uses (dump_file, data);

13546

+}

13547

+

13548

/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR

13549

is true, assume we are inside an address. If TOP_COMPREF is true, assume

13550

we are at the top-level of the processed address. */

13551

@@ -2556,6 +2831,8 @@ static void

13552

add_candidate (struct ivopts_data *data,

13553

tree base, tree step, bool important, struct iv_use *use)

13554

{

13555

+ gcc_assert (use == NULL || use->sub_id == 0);

13556

+

13557

if (ip_normal_pos (data->current_loop))

13558

add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);

13559

if (ip_end_pos (data->current_loop)

13560

@@ -2785,11 +3062,22 @@ new_cost (unsigned runtime, unsigned complexity)

13561

return cost;

13562

}

13563

13564

+/* Returns true if COST is infinite. */

13565

+

13566

+static bool

13567

+infinite_cost_p (comp_cost cost)

13568

+{

13569

+ return cost.cost == INFTY;

13570

+}

13571

+

13572

/* Adds costs COST1 and COST2. */

13573

13574

static comp_cost

13575

add_costs (comp_cost cost1, comp_cost cost2)

13576

{

13577

+ if (infinite_cost_p (cost1) || infinite_cost_p (cost2))

13578

+ return infinite_cost;

13579

+

13580

cost1.cost += cost2.cost;

13581

cost1.complexity += cost2.complexity;

13582

13583

@@ -2818,14 +3106,6 @@ compare_costs (comp_cost cost1, comp_cost cost2)

13584

return cost1.cost - cost2.cost;

13585

}

13586

13587

-/* Returns true if COST is infinite. */

13588

-

13589

-static bool

13590

-infinite_cost_p (comp_cost cost)

13591

-{

13592

- return cost.cost == INFTY;

13593

-}

13594

-

13595

/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends

13596

on invariants DEPENDS_ON and that the value used in expressing it

13597

is VALUE, and in case of iv elimination the comparison operator is COMP. */

13598

@@ -4300,7 +4580,15 @@ get_computation_cost_at (struct ivopts_data *data,

13599

cost.cost += add_cost (data->speed, TYPE_MODE (ctype));

13600

}

13601

13602

- if (inv_expr_id)

13603

+ /* Set of invariants depended on by sub use has already been computed

13604

+ for the first use in the group. */

13605

+ if (use->sub_id)

13606

+ {

13607

+ cost.cost = 0;

13608

+ if (depends_on && *depends_on)

13609

+ bitmap_clear (*depends_on);

13610

+ }

13611

+ else if (inv_expr_id)

13612

{

13613

*inv_expr_id =

13614

get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);

13615

@@ -4429,6 +4717,8 @@ determine_use_iv_cost_address (struct ivopts_data *data,

13616

bitmap depends_on;

13617

bool can_autoinc;

13618

int inv_expr_id = -1;

13619

+ struct iv_use *sub_use;

13620

+ comp_cost sub_cost;

13621

comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,

13622

&can_autoinc, &inv_expr_id);

13623

13624

@@ -4442,6 +4732,15 @@ determine_use_iv_cost_address (struct ivopts_data *data,

13625

else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)

13626

cost = infinite_cost;

13627

}

13628

+ for (sub_use = use->next;

13629

+ sub_use && !infinite_cost_p (cost);

13630

+ sub_use = sub_use->next)

13631

+ {

13632

+ sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,

13633

+ &can_autoinc, &inv_expr_id);

13634

+ cost = add_costs (cost, sub_cost);

13635

+ }

13636

+

13637

set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,

13638

inv_expr_id);

13639

13640

@@ -6588,8 +6887,8 @@ adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)

13641

/* Rewrites USE (address that is an iv) using candidate CAND. */

13642

13643

static void

13644

-rewrite_use_address (struct ivopts_data *data,

13645

- struct iv_use *use, struct iv_cand *cand)

13646

+rewrite_use_address_1 (struct ivopts_data *data,

13647

+ struct iv_use *use, struct iv_cand *cand)

13648

{

13649

aff_tree aff;

13650

gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);

13651

@@ -6624,6 +6923,28 @@ rewrite_use_address (struct ivopts_data *data,

13652

*use->op_p = ref;

13653

}

13654

13655

+/* Rewrites USE (address that is an iv) using candidate CAND. If it's the

13656

+ first use of a group, rewrites sub uses in the group too. */

13657

+

13658

+static void

13659

+rewrite_use_address (struct ivopts_data *data,

13660

+ struct iv_use *use, struct iv_cand *cand)

13661

+{

13662

+ struct iv_use *next;

13663

+

13664

+ gcc_assert (use->sub_id == 0);

13665

+ rewrite_use_address_1 (data, use, cand);

13666

+ update_stmt (use->stmt);

13667

+

13668

+ for (next = use->next; next != NULL; next = next->next)

13669

+ {

13670

+ rewrite_use_address_1 (data, next, cand);

13671

+ update_stmt (next->stmt);

13672

+ }

13673

+

13674

+ return;

13675

+}

13676

+

13677

/* Rewrites USE (the condition such that one of the arguments is an iv) using

13678

candidate CAND. */

13679

13680

@@ -6899,6 +7220,18 @@ free_loop_data (struct ivopts_data *data)

13681

for (i = 0; i < n_iv_uses (data); i++)

13682

{

13683

struct iv_use *use = iv_use (data, i);

13684

+ struct iv_use *pre = use, *sub = use->next;

13685

+

13686

+ while (sub)

13687

+ {

13688

+ gcc_assert (sub->related_cands == NULL);

13689

+ gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);

13690

+

13691

+ free (sub->iv);

13692

+ pre = sub;

13693

+ sub = sub->next;

13694

+ free (pre);

13695

+ }

13696

13697

free (use->iv);

13698

BITMAP_FREE (use->related_cands);

13699

@@ -7025,6 +7358,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)

13700

13701

/* Finds interesting uses (item 1). */

13702

find_interesting_uses (data);

13703

+ group_address_uses (data);

13704

if (n_iv_uses (data) > MAX_CONSIDERED_USES)

13705

goto finish;

13706

13707

--- a/src/gcc/tree-ssa-math-opts.c

13708

+++ b/src/gcc/tree-ssa-math-opts.c

13709

@@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see

13710

#include "target.h"

13711

#include "gimple-pretty-print.h"

13712

#include "builtins.h"

13713

+#include "params.h"

13714

13715

/* FIXME: RTL headers have to be included here for optabs. */

13716

#include "rtl.h" /* Because optabs.h wants enum rtx_code. */

13717

@@ -1148,6 +1149,357 @@ build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,

13718

return result;

13719

}

13720

13721

+struct pow_synth_sqrt_info

13722

+{

13723

+ bool *factors;

13724

+ unsigned int deepest;

13725

+ unsigned int num_mults;

13726

+};

13727

+

13728

+/* Return true iff the real value C can be represented as a

13729

+ sum of powers of 0.5 up to N. That is:

13730

+ C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.

13731

+ Record in INFO the various parameters of the synthesis algorithm such

13732

+ as the factors a[i], the maximum 0.5 power and the number of

13733

+ multiplications that will be required. */

13734

+

13735

+bool

13736

+representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,

13737

+ struct pow_synth_sqrt_info *info)

13738

+{

13739

+ REAL_VALUE_TYPE factor = dconsthalf;

13740

+ REAL_VALUE_TYPE remainder = c;

13741

+

13742

+ info->deepest = 0;

13743

+ info->num_mults = 0;

13744

+ memset (info->factors, 0, n * sizeof (bool));

13745

+

13746

+ for (unsigned i = 0; i < n; i++)

13747

+ {

13748

+ REAL_VALUE_TYPE res;

13749

+

13750

+ /* If something inexact happened bail out now. */

13751

+ if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))

13752

+ return false;

13753

+

13754

+ /* We have hit zero. The number is representable as a sum

13755

+ of powers of 0.5. */

13756

+ if (REAL_VALUES_EQUAL (res, dconst0))

13757

+ {

13758

+ info->factors[i] = true;

13759

+ info->deepest = i + 1;

13760

+ return true;

13761

+ }

13762

+ else if (!REAL_VALUE_NEGATIVE (res))

13763

+ {

13764

+ remainder = res;

13765

+ info->factors[i] = true;

13766

+ info->num_mults++;

13767

+ }

13768

+ else

13769

+ info->factors[i] = false;

13770

+

13771

+ REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);

13772

+ }

13773

+ return false;

13774

+}

13775

+

13776

+/* Return the tree corresponding to FN being applied

13777

+ to ARG N times at GSI and LOC.

13778

+ Look up previous results from CACHE if need be.

13779

+ cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */

13780

+

13781

+static tree

13782

+get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,

13783

+ tree fn, location_t loc, tree *cache)

13784

+{

13785

+ tree res = cache[n];

13786

+ if (!res)

13787

+ {

13788

+ tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);

13789

+ res = build_and_insert_call (gsi, loc, fn, prev);

13790

+ cache[n] = res;

13791

+ }

13792

+

13793

+ return res;

13794

+}

13795

+

13796

+/* Print to STREAM the repeated application of function FNAME to ARG

13797

+ N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:

13798

+ "foo (foo (x))". */

13799

+

13800

+static void

13801

+print_nested_fn (FILE* stream, const char *fname, const char* arg,

13802

+ unsigned int n)

13803

+{

13804

+ if (n == 0)

13805

+ fprintf (stream, "%s", arg);

13806

+ else

13807

+ {

13808

+ fprintf (stream, "%s (", fname);

13809

+ print_nested_fn (stream, fname, arg, n - 1);

13810

+ fprintf (stream, ")");

13811

+ }

13812

+}

13813

+

13814

+/* Print to STREAM the fractional sequence of sqrt chains

13815

+ applied to ARG, described by INFO. Used for the dump file. */

13816

+

13817

+static void

13818

+dump_fractional_sqrt_sequence (FILE *stream, const char *arg,

13819

+ struct pow_synth_sqrt_info *info)

13820

+{

13821

+ for (unsigned int i = 0; i < info->deepest; i++)

13822

+ {

13823

+ bool is_set = info->factors[i];

13824

+ if (is_set)

13825

+ {

13826

+ print_nested_fn (stream, "sqrt", arg, i + 1);

13827

+ if (i != info->deepest - 1)

13828

+ fprintf (stream, " * ");

13829

+ }

13830

+ }

13831

+}

13832

+

13833

+/* Print to STREAM a representation of raising ARG to an integer

13834

+ power N. Used for the dump file. */

13835

+

13836

+static void

13837

+dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)

13838

+{

13839

+ if (n > 1)

13840

+ fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);

13841

+ else if (n == 1)

13842

+ fprintf (stream, "%s", arg);

13843

+}

13844

+

13845

+/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of

13846

+ square roots. Place at GSI and LOC. Limit the maximum depth

13847

+ of the sqrt chains to MAX_DEPTH. Return the tree holding the

13848

+ result of the expanded sequence or NULL_TREE if the expansion failed.

13849

+

13850

+ This routine assumes that ARG1 is a real number with a fractional part

13851

+ (the integer exponent case will have been handled earlier in

13852

+ gimple_expand_builtin_pow).

13853

+

13854

+ For ARG1 > 0.0:

13855

+ * For ARG1 composed of a whole part WHOLE_PART and a fractional part

13856

+ FRAC_PART i.e. WHOLE_PART == floor (ARG1) and

13857

+ FRAC_PART == ARG1 - WHOLE_PART:

13858

+ Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where

13859

+ POW (ARG0, FRAC_PART) is expanded as a product of square root chains

13860

+ if it can be expressed as such, that is if FRAC_PART satisfies:

13861

+ FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))

13862

+ where integer a[i] is either 0 or 1.

13863

+

13864

+ Example:

13865

+ POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)

13866

+ --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))

13867

+

13868

+ For ARG1 < 0.0 there are two approaches:

13869

+ * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)

13870

+ is calculated as above.

13871

+

13872

+ Example:

13873

+ POW (x, -5.625) == 1.0 / POW (x, 5.625)

13874

+ --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))

13875

+

13876

+ * (B) : WHOLE_PART := - ceil (abs (ARG1))

13877

+ FRAC_PART := ARG1 - WHOLE_PART

13878

+ and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).

13879

+ Example:

13880

+ POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)

13881

+ --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))

13882

+

13883

+ For ARG1 < 0.0 we choose between (A) and (B) depending on

13884

+ how many multiplications we'd have to do.

13885

+ So, for the example in (B): POW (x, -5.875), if we were to

13886

+ follow algorithm (A) we would produce:

13887

+ 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))

13888

+ which contains more multiplications than approach (B).

13889

+

13890

+ Hopefully, this approach will eliminate potentially expensive POW library

13891

+ calls when unsafe floating point math is enabled and allow the compiler to

13892

+ further optimise the multiplies, square roots and divides produced by this

13893

+ function. */

13894

+

13895

+static tree

13896

+expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,

13897

+ tree arg0, tree arg1, HOST_WIDE_INT max_depth)

13898

+{

13899

+ tree type = TREE_TYPE (arg0);

13900

+ machine_mode mode = TYPE_MODE (type);

13901

+ tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);

13902

+ bool one_over = true;

13903

+

13904

+ if (!sqrtfn)

13905

+ return NULL_TREE;

13906

+

13907

+ if (TREE_CODE (arg1) != REAL_CST)

13908

+ return NULL_TREE;

13909

+

13910

+ REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);

13911

+

13912

+ gcc_assert (max_depth > 0);

13913

+ tree *cache = XALLOCAVEC (tree, max_depth + 1);

13914

+

13915

+ struct pow_synth_sqrt_info synth_info;

13916

+ synth_info.factors = XALLOCAVEC (bool, max_depth + 1);

13917

+ synth_info.deepest = 0;

13918

+ synth_info.num_mults = 0;

13919

+

13920

+ bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);

13921

+ REAL_VALUE_TYPE exp = real_value_abs (&exp_init);

13922

+

13923

+ /* The whole and fractional parts of exp. */

13924

+ REAL_VALUE_TYPE whole_part;

13925

+ REAL_VALUE_TYPE frac_part;

13926

+

13927

+ real_floor (&whole_part, mode, &exp);

13928

+ REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);

13929

+

13930

+

13931

+ REAL_VALUE_TYPE ceil_whole = dconst0;

13932

+ REAL_VALUE_TYPE ceil_fract = dconst0;

13933

+

13934

+ if (neg_exp)

13935

+ {

13936

+ real_ceil (&ceil_whole, mode, &exp);

13937

+ REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);

13938

+ }

13939

+

13940

+ if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))

13941

+ return NULL_TREE;

13942

+

13943

+ /* Check whether it's more profitable to not use 1.0 / ... */

13944

+ if (neg_exp)

13945

+ {

13946

+ struct pow_synth_sqrt_info alt_synth_info;

13947

+ alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);

13948

+ alt_synth_info.deepest = 0;

13949

+ alt_synth_info.num_mults = 0;

13950

+

13951

+ if (representable_as_half_series_p (ceil_fract, max_depth,

13952

+ &alt_synth_info)

13953

+ && alt_synth_info.deepest <= synth_info.deepest

13954

+ && alt_synth_info.num_mults < synth_info.num_mults)

13955

+ {

13956

+ whole_part = ceil_whole;

13957

+ frac_part = ceil_fract;

13958

+ synth_info.deepest = alt_synth_info.deepest;

13959

+ synth_info.num_mults = alt_synth_info.num_mults;

13960

+ memcpy (synth_info.factors, alt_synth_info.factors,

13961

+ (max_depth + 1) * sizeof (bool));

13962

+ one_over = false;

13963

+ }

13964

+ }

13965

+

13966

+ HOST_WIDE_INT n = real_to_integer (&whole_part);

13967

+ REAL_VALUE_TYPE cint;

13968

+ real_from_integer (&cint, VOIDmode, n, SIGNED);

13969

+

13970

+ if (!real_identical (&whole_part, &cint))

13971

+ return NULL_TREE;

13972

+

13973

+ if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)

13974

+ return NULL_TREE;

13975

+

13976

+ memset (cache, 0, (max_depth + 1) * sizeof (tree));

13977

+

13978

+ tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;

13979

+

13980

+ /* Calculate the integer part of the exponent. */

13981

+ if (n > 1)

13982

+ {

13983

+ integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);

13984

+ if (!integer_res)

13985

+ return NULL_TREE;

13986

+ }

13987

+

13988

+ if (dump_file)

13989

+ {

13990

+ char string[64];

13991

+

13992

+ real_to_decimal (string, &exp_init, sizeof (string), 0, 1);

13993

+ fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);

13994

+

13995

+ if (neg_exp)

13996

+ {

13997

+ if (one_over)

13998

+ {

13999

+ fprintf (dump_file, "1.0 / (");

14000

+ dump_integer_part (dump_file, "x", n);

14001

+ if (n > 0)

14002

+ fprintf (dump_file, " * ");

14003

+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);

14004

+ fprintf (dump_file, ")");

14005

+ }

14006

+ else

14007

+ {

14008

+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);

14009

+ fprintf (dump_file, " / (");

14010

+ dump_integer_part (dump_file, "x", n);

14011

+ fprintf (dump_file, ")");

14012

+ }

14013

+ }

14014

+ else

14015

+ {

14016

+ dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);

14017

+ if (n > 0)

14018

+ fprintf (dump_file, " * ");

14019

+ dump_integer_part (dump_file, "x", n);

14020

+ }

14021

+

14022

+ fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);

14023

+ }

14024

+

14025

+

14026

+ tree fract_res = NULL_TREE;

14027

+ cache[0] = arg0;

14028

+

14029

+ /* Calculate the fractional part of the exponent. */

14030

+ for (unsigned i = 0; i < synth_info.deepest; i++)

14031

+ {

14032

+ if (synth_info.factors[i])

14033

+ {

14034

+ tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);

14035

+

14036

+ if (!fract_res)

14037

+ fract_res = sqrt_chain;

14038

+

14039

+ else

14040

+ fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,

14041

+ fract_res, sqrt_chain);

14042

+ }

14043

+ }

14044

+

14045

+ tree res = NULL_TREE;

14046

+

14047

+ if (neg_exp)

14048

+ {

14049

+ if (one_over)

14050

+ {

14051

+ if (n > 0)

14052

+ res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,

14053

+ fract_res, integer_res);

14054

+ else

14055

+ res = fract_res;

14056

+

14057

+ res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,

14058

+ build_real (type, dconst1), res);

14059

+ }

14060

+ else

14061

+ {

14062

+ res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,

14063

+ fract_res, integer_res);

14064

+ }

14065

+ }

14066

+ else

14067

+ res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,

14068

+ fract_res, integer_res);

14069

+ return res;

14070

+}

14071

+

14072

/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI

14073

with location info LOC. If possible, create an equivalent and

14074

less expensive sequence of statements prior to GSI, and return an

14075

@@ -1157,13 +1509,17 @@ static tree

14076

gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,

14077

tree arg0, tree arg1)

14078

{

14079

- REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;

14080

+ REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;

14081

REAL_VALUE_TYPE c2, dconst3;

14082

HOST_WIDE_INT n;

14083

- tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;

14084

+ tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;

14085

machine_mode mode;

14086

+ bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));

14087

bool hw_sqrt_exists, c_is_int, c2_is_int;

14088

14089

+ dconst1_4 = dconst1;

14090

+ SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);

14091

+

14092

/* If the exponent isn't a constant, there's nothing of interest

14093

to be done. */

14094

if (TREE_CODE (arg1) != REAL_CST)

14095

@@ -1179,7 +1535,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,

14096

if (c_is_int

14097

&& ((n >= -1 && n <= 2)

14098

|| (flag_unsafe_math_optimizations

14099

- && optimize_bb_for_speed_p (gsi_bb (*gsi))

14100

+ && speed_p

14101

&& powi_cost (n) <= POWI_MAX_MULTS)))

14102

return gimple_expand_builtin_powi (gsi, loc, arg0, n);

14103

14104

@@ -1196,49 +1552,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,

14105

&& !HONOR_SIGNED_ZEROS (mode))

14106

return build_and_insert_call (gsi, loc, sqrtfn, arg0);

14107

14108

- /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that

14109

- a builtin sqrt instruction is smaller than a call to pow with 0.25,

14110

- so do this optimization even if -Os. Don't do this optimization

14111

- if we don't have a hardware sqrt insn. */

14112

- dconst1_4 = dconst1;

14113

- SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);

14114

hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;

14115

14116

- if (flag_unsafe_math_optimizations

14117

- && sqrtfn

14118

- && REAL_VALUES_EQUAL (c, dconst1_4)

14119

- && hw_sqrt_exists)

14120

- {

14121

- /* sqrt(x) */

14122

- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);

14123

-

14124

- /* sqrt(sqrt(x)) */

14125

- return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);

14126

- }

14127

-

14128

- /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are

14129

- optimizing for space. Don't do this optimization if we don't have

14130

- a hardware sqrt insn. */

14131

- real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED);

14132

- SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);

14133

-

14134

- if (flag_unsafe_math_optimizations

14135

- && sqrtfn

14136

- && optimize_function_for_speed_p (cfun)

14137

- && REAL_VALUES_EQUAL (c, dconst3_4)

14138

- && hw_sqrt_exists)

14139

- {

14140

- /* sqrt(x) */

14141

- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);

14142

-

14143

- /* sqrt(sqrt(x)) */

14144

- sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);

14145

-

14146

- /* sqrt(x) * sqrt(sqrt(x)) */

14147

- return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,

14148

- sqrt_arg0, sqrt_sqrt);

14149

- }

14150

-

14151

/* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math

14152

optimizations since 1./3. is not exactly representable. If x

14153

is negative and finite, the correct value of pow(x,1./3.) is

14154

@@ -1263,7 +1578,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,

14155

&& sqrtfn

14156

&& cbrtfn

14157

&& (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))

14158

- && optimize_function_for_speed_p (cfun)

14159

+ && speed_p

14160

&& hw_sqrt_exists

14161

&& REAL_VALUES_EQUAL (c, dconst1_6))

14162

{

14163

@@ -1274,54 +1589,31 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,

14164

return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);

14165

}

14166

14167

- /* Optimize pow(x,c), where n = 2c for some nonzero integer n

14168

- and c not an integer, into

14169

-

14170

- sqrt(x) * powi(x, n/2), n > 0;

14171

- 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.

14172

-

14173

- Do not calculate the powi factor when n/2 = 0. */

14174

- real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);

14175

- n = real_to_integer (&c2);

14176

- real_from_integer (&cint, VOIDmode, n, SIGNED);

14177

- c2_is_int = real_identical (&c2, &cint);

14178

14179

+ /* Attempt to expand the POW as a product of square root chains.

14180

+ Expand the 0.25 case even when otpimising for size. */

14181

if (flag_unsafe_math_optimizations

14182

&& sqrtfn

14183

- && c2_is_int

14184

- && !c_is_int

14185

- && optimize_function_for_speed_p (cfun))

14186

+ && hw_sqrt_exists

14187

+ && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))

14188

+ && !HONOR_SIGNED_ZEROS (mode))

14189

{

14190

- tree powi_x_ndiv2 = NULL_TREE;

14191

-

14192

- /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not

14193

- possible or profitable, give up. Skip the degenerate case when

14194

- n is 1 or -1, where the result is always 1. */

14195

- if (absu_hwi (n) != 1)

14196

- {

14197

- powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,

14198

- abs_hwi (n / 2));

14199

- if (!powi_x_ndiv2)

14200

- return NULL_TREE;

14201

- }

14202

+ unsigned int max_depth = speed_p

14203

+ ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)

14204

+ : 2;

14205

14206

- /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the

14207

- result of the optimal multiply sequence just calculated. */

14208

- sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);

14209

+ tree expand_with_sqrts

14210

+ = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);

14211

14212

- if (absu_hwi (n) == 1)

14213

- result = sqrt_arg0;

14214

- else

14215

- result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,

14216

- sqrt_arg0, powi_x_ndiv2);

14217

-

14218

- /* If n is negative, reciprocate the result. */

14219

- if (n < 0)

14220

- result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,

14221

- build_real (type, dconst1), result);

14222

- return result;

14223

+ if (expand_with_sqrts)

14224

+ return expand_with_sqrts;

14225

}

14226

14227

+ real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);

14228

+ n = real_to_integer (&c2);

14229

+ real_from_integer (&cint, VOIDmode, n, SIGNED);

14230

+ c2_is_int = real_identical (&c2, &cint);

14231

+

14232

/* Optimize pow(x,c), where 3c = n for some nonzero integer n, into

14233

14234

powi(x, n/3) * powi(cbrt(x), n%3), n > 0;

14235

--- a/src/libgcc/config.host

14236

+++ b/src/libgcc/config.host

14237

@@ -377,14 +377,15 @@ arm*-*-netbsdelf*)

14238

tmake_file="$tmake_file arm/t-arm arm/t-netbsd t-slibgcc-gld-nover"

14239

;;

14240

arm*-*-linux*) # ARM GNU/Linux with ELF

14241

- tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"

14242

+ tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix t-crtfm"

14243

tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc"

14244

tm_file="$tm_file arm/bpabi-lib.h"

14245

unwind_header=config/arm/unwind-arm.h

14246

tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"

14247

+ extra_parts="$extra_parts crtfastmath.o"

14248

;;

14249

arm*-*-uclinux*) # ARM ucLinux

14250

- tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"

14251

+ tmake_file="${tmake_file} t-fixedpoint-gnu-prefix t-crtfm"

14252

tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"

14253

tmake_file="${tmake_file} arm/t-bpabi"

14254

tm_file="$tm_file arm/bpabi-lib.h"

14255

@@ -396,7 +397,7 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtems*)

14256

tm_file="$tm_file arm/bpabi-lib.h"

14257

case ${host} in

14258

arm*-*-eabi* | arm*-*-rtems*)

14259

- tmake_file="${tmake_file} arm/t-bpabi"

14260

+ tmake_file="${tmake_file} arm/t-bpabi t-crtfm"

14261

extra_parts="crtbegin.o crtend.o crti.o crtn.o"

14262

;;

14263

arm*-*-symbianelf*)

14264

--- a/src//dev/null

14265

+++ b/src/libgcc/config/arm/crtfastmath.c

14266

@@ -0,0 +1,40 @@

14267

+/*

14268

14269

+ *

14270

+ * This file is free software; you can redistribute it and/or modify it

14271

+ * under the terms of the GNU General Public License as published by the

14272

+ * Free Software Foundation; either version 3, or (at your option) any

14273

+ * later version.

14274

+ *

14275

+ * This file is distributed in the hope that it will be useful, but

14276

+ * WITHOUT ANY WARRANTY; without even the implied warranty of

14277

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14278

+ * General Public License for more details.

14279

+ *

14280

+ * Under Section 7 of GPL version 3, you are granted additional

14281

+ * permissions described in the GCC Runtime Library Exception, version

14282

+ * 3.1, as published by the Free Software Foundation.

14283

+ *

14284

+ * You should have received a copy of the GNU General Public License and

14285

+ * a copy of the GCC Runtime Library Exception along with this program;

14286

+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

14287

+ * <http://www.gnu.org/licenses/>.

14288

+ */

14289

+

14290

+/* Enable flush-to-zero support for -ffast-math on VFP targets. */

14291

+#ifndef __SOFTFP__

14292

+

14293

+#define FPSCR_FZ (1 << 24)

14294

+

14295

+static void __attribute__((constructor))

14296

+__arm_set_fast_math (void)

14297

+{

14298

+ unsigned int fpscr_save;

14299

+

14300

+ /* Set the FZ (flush-to-zero) bit in FPSCR. */

14301

+ __asm__("vmrs %0, fpscr" : "=r" (fpscr_save));

14302

+ fpscr_save |= FPSCR_FZ;

14303

+ __asm__("vmsr fpscr, %0" : : "r" (fpscr_save));

14304

+}

14305

+

14306

+#endif /* __SOFTFP__ */

14307

--- a/src/libgcc/config/arm/ieee754-df.S

14308

+++ b/src/libgcc/config/arm/ieee754-df.S

14309

@@ -33,8 +33,12 @@

14310

* Only the default rounding mode is intended for best performances.

14311

* Exceptions aren't supported yet, but that can be added quite easily

14312

* if necessary without impacting performances.

14313

+ *

14314

+ * In the CFI related comments, 'previousOffset' refers to the previous offset

14315

+ * from sp used to compute the CFA.

14316

*/

14317

14318

+ .cfi_sections .debug_frame

14319

14320

#ifndef __ARMEB__

14321

#define xl r0

14322

@@ -53,11 +57,13 @@

14323

14324

ARM_FUNC_START negdf2

14325

ARM_FUNC_ALIAS aeabi_dneg negdf2

14326

+ CFI_START_FUNCTION

14327

14328

@ flip sign bit

14329

eor xh, xh, #0x80000000

14330

RET

14331

14332

+ CFI_END_FUNCTION

14333

FUNC_END aeabi_dneg

14334

FUNC_END negdf2

14335

14336

@@ -66,6 +72,7 @@ ARM_FUNC_ALIAS aeabi_dneg negdf2

14337

#ifdef L_arm_addsubdf3

14338

14339

ARM_FUNC_START aeabi_drsub

14340

+ CFI_START_FUNCTION

14341

14342

eor xh, xh, #0x80000000 @ flip sign bit of first arg

14343

b 1f

14344

@@ -81,7 +88,11 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3

14345

ARM_FUNC_START adddf3

14346

ARM_FUNC_ALIAS aeabi_dadd adddf3

14347

14348

-1: do_push {r4, r5, lr}

14349

+1: do_push {r4, r5, lr} @ sp -= 12

14350

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14351

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 8

14352

+ .cfi_rel_offset r5, 4

14353

+ .cfi_rel_offset lr, 8

14354

14355

@ Look for zeroes, equal values, INF, or NAN.

14356

shift1 lsl, r4, xh, #1

14357

@@ -148,6 +159,11 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3

14358

@ Since this is not common case, rescale them off line.

14359

teq r4, r5

14360

beq LSYM(Lad_d)

14361

+

14362

+@ CFI note: we're lucky that the branches to Lad_* that appear after this function

14363

+@ have a CFI state that's exactly the same as the one we're in at this

14364

+@ point. Otherwise the CFI would change to a different state after the branch,

14365

+@ which would be disastrous for backtracing.

14366

LSYM(Lad_x):

14367

14368

@ Compensate for the exponent overlapping the mantissa MSB added later

14369

@@ -413,6 +429,7 @@ LSYM(Lad_i):

14370

orrne xh, xh, #0x00080000 @ quiet NAN

14371

RETLDM "r4, r5"

14372

14373

+ CFI_END_FUNCTION

14374

FUNC_END aeabi_dsub

14375

FUNC_END subdf3

14376

FUNC_END aeabi_dadd

14377

@@ -420,12 +437,19 @@ LSYM(Lad_i):

14378

14379

ARM_FUNC_START floatunsidf

14380

ARM_FUNC_ALIAS aeabi_ui2d floatunsidf

14381

+ CFI_START_FUNCTION

14382

14383

teq r0, #0

14384

do_it eq, t

14385

moveq r1, #0

14386

RETc(eq)

14387

- do_push {r4, r5, lr}

14388

+

14389

+ do_push {r4, r5, lr} @ sp -= 12

14390

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14391

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.

14392

+ .cfi_rel_offset r5, 4

14393

+ .cfi_rel_offset lr, 8

14394

+

14395

mov r4, #0x400 @ initial exponent

14396

add r4, r4, #(52-1 - 1)

14397

mov r5, #0 @ sign bit is 0

14398

@@ -435,17 +459,25 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf

14399

mov xh, #0

14400

b LSYM(Lad_l)

14401

14402

+ CFI_END_FUNCTION

14403

FUNC_END aeabi_ui2d

14404

FUNC_END floatunsidf

14405

14406

ARM_FUNC_START floatsidf

14407

ARM_FUNC_ALIAS aeabi_i2d floatsidf

14408

+ CFI_START_FUNCTION

14409

14410

teq r0, #0

14411

do_it eq, t

14412

moveq r1, #0

14413

RETc(eq)

14414

- do_push {r4, r5, lr}

14415

+

14416

+ do_push {r4, r5, lr} @ sp -= 12

14417

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14418

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.

14419

+ .cfi_rel_offset r5, 4

14420

+ .cfi_rel_offset lr, 8

14421

+

14422

mov r4, #0x400 @ initial exponent

14423

add r4, r4, #(52-1 - 1)

14424

ands r5, r0, #0x80000000 @ sign bit in r5

14425

@@ -457,11 +489,13 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf

14426

mov xh, #0

14427

b LSYM(Lad_l)

14428

14429

+ CFI_END_FUNCTION

14430

FUNC_END aeabi_i2d

14431

FUNC_END floatsidf

14432

14433

ARM_FUNC_START extendsfdf2

14434

ARM_FUNC_ALIAS aeabi_f2d extendsfdf2

14435

+ CFI_START_FUNCTION

14436

14437

movs r2, r0, lsl #1 @ toss sign bit

14438

mov xh, r2, asr #3 @ stretch exponent

14439

@@ -480,34 +514,54 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2

14440

14441

@ value was denormalized. We can normalize it now.

14442

do_push {r4, r5, lr}

14443

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14444

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8.

14445

+ .cfi_rel_offset r5, 4

14446

+ .cfi_rel_offset lr, 8

14447

+

14448

mov r4, #0x380 @ setup corresponding exponent

14449

and r5, xh, #0x80000000 @ move sign bit in r5

14450

bic xh, xh, #0x80000000

14451

b LSYM(Lad_l)

14452

14453

+ CFI_END_FUNCTION

14454

FUNC_END aeabi_f2d

14455

FUNC_END extendsfdf2

14456

14457

ARM_FUNC_START floatundidf

14458

ARM_FUNC_ALIAS aeabi_ul2d floatundidf

14459

+ CFI_START_FUNCTION

14460

+ .cfi_remember_state @ Save the current CFA state.

14461

14462

orrs r2, r0, r1

14463

do_it eq

14464

RETc(eq)

14465

14466

- do_push {r4, r5, lr}

14467

+ do_push {r4, r5, lr} @ sp -= 12

14468

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14469

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp + 0 to sp + 8

14470

+ .cfi_rel_offset r5, 4

14471

+ .cfi_rel_offset lr, 8

14472

14473

mov r5, #0

14474

b 2f

14475

14476

ARM_FUNC_START floatdidf

14477

ARM_FUNC_ALIAS aeabi_l2d floatdidf

14478

+ .cfi_restore_state

14479

+ @ Restore the CFI state we saved above. If we didn't do this then the

14480

+ @ following instructions would have the CFI state that was set by the

14481

+ @ offset adjustments made in floatundidf.

14482

14483

orrs r2, r0, r1

14484

do_it eq

14485

RETc(eq)

14486

14487

- do_push {r4, r5, lr}

14488

+ do_push {r4, r5, lr} @ sp -= 12

14489

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14490

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 8

14491

+ .cfi_rel_offset r5, 4

14492

+ .cfi_rel_offset lr, 8

14493

14494

ands r5, ah, #0x80000000 @ sign bit in r5

14495

bpl 2f

14496

@@ -550,6 +604,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf

14497

add r4, r4, r2

14498

b LSYM(Lad_p)

14499

14500

+ CFI_END_FUNCTION

14501

FUNC_END floatdidf

14502

FUNC_END aeabi_l2d

14503

FUNC_END floatundidf

14504

@@ -561,7 +616,14 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf

14505

14506

ARM_FUNC_START muldf3

14507

ARM_FUNC_ALIAS aeabi_dmul muldf3

14508

- do_push {r4, r5, r6, lr}

14509

+ CFI_START_FUNCTION

14510

+

14511

+ do_push {r4, r5, r6, lr} @ sp -= 16

14512

+ .cfi_adjust_cfa_offset 16 @ CFA is now sp + previousOffset + 16

14513

+ .cfi_rel_offset r4, 0 @ Registers are saved from sp to sp + 12.

14514

+ .cfi_rel_offset r5, 4

14515

+ .cfi_rel_offset r6, 8

14516

+ .cfi_rel_offset lr, 12

14517

14518

@ Mask out exponents, trap any zero/denormal/INF/NAN.

14519

mov ip, #0xff

14520

@@ -596,7 +658,16 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3

14521

and r6, r6, #0x80000000

14522

14523

@ Well, no way to make it shorter without the umull instruction.

14524

- stmfd sp!, {r6, r7, r8, r9, sl, fp}

14525

+ stmfd sp!, {r6, r7, r8, r9, sl, fp} @ sp -= 24

14526

+ .cfi_remember_state @ Save the current CFI state.

14527

+ .cfi_adjust_cfa_offset 24 @ CFA is now sp + previousOffset + 24.

14528

+ .cfi_rel_offset r6, 0 @ Registers are saved from sp to sp + 20.

14529

+ .cfi_rel_offset r7, 4

14530

+ .cfi_rel_offset r8, 8

14531

+ .cfi_rel_offset r9, 12

14532

+ .cfi_rel_offset sl, 16

14533

+ .cfi_rel_offset fp, 20

14534

+

14535

mov r7, xl, lsr #16

14536

mov r8, yl, lsr #16

14537

mov r9, xh, lsr #16

14538

@@ -648,8 +719,8 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3

14539

mul fp, xh, yh

14540

adcs r5, r5, fp

14541

adc r6, r6, #0

14542

- ldmfd sp!, {yl, r7, r8, r9, sl, fp}

14543

-

14544

+ ldmfd sp!, {yl, r7, r8, r9, sl, fp} @ sp += 24

14545

+ .cfi_restore_state @ Restore the previous CFI state.

14546

#else

14547

14548

@ Here is the actual multiplication.

14549

@@ -715,7 +786,6 @@ LSYM(Lml_1):

14550

orr xh, xh, #0x00100000

14551

mov lr, #0

14552

subs r4, r4, #1

14553

-

14554

LSYM(Lml_u):

14555

@ Overflow?

14556

bgt LSYM(Lml_o)

14557

@@ -863,13 +933,20 @@ LSYM(Lml_n):

14558

orr xh, xh, #0x00f80000

14559

RETLDM "r4, r5, r6"

14560

14561

+ CFI_END_FUNCTION

14562

FUNC_END aeabi_dmul

14563

FUNC_END muldf3

14564

14565

ARM_FUNC_START divdf3

14566

ARM_FUNC_ALIAS aeabi_ddiv divdf3

14567

+ CFI_START_FUNCTION

14568

14569

do_push {r4, r5, r6, lr}

14570

+ .cfi_adjust_cfa_offset 16

14571

+ .cfi_rel_offset r4, 0

14572

+ .cfi_rel_offset r5, 4

14573

+ .cfi_rel_offset r6, 8

14574

+ .cfi_rel_offset lr, 12

14575

14576

@ Mask out exponents, trap any zero/denormal/INF/NAN.

14577

mov ip, #0xff

14578

@@ -1052,6 +1129,7 @@ LSYM(Ldv_s):

14579

bne LSYM(Lml_z) @ 0 / <non_zero> -> 0

14580

b LSYM(Lml_n) @ 0 / 0 -> NAN

14581

14582

+ CFI_END_FUNCTION

14583

FUNC_END aeabi_ddiv

14584

FUNC_END divdf3

14585

14586

@@ -1063,6 +1141,7 @@ LSYM(Ldv_s):

14587

14588

ARM_FUNC_START gtdf2

14589

ARM_FUNC_ALIAS gedf2 gtdf2

14590

+ CFI_START_FUNCTION

14591

mov ip, #-1

14592

b 1f

14593

14594

@@ -1077,6 +1156,10 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2

14595

mov ip, #1 @ how should we specify unordered here?

14596

14597

1: str ip, [sp, #-4]!

14598

+ .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.

14599

+ @ We're not adding CFI for ip as it's pushed into the stack

14600

+ @ only because @ it may be popped off later as a return value

14601

+ @ (i.e. we're not preserving @ it anyways).

14602

14603

@ Trap any INF/NAN first.

14604

mov ip, xh, lsl #1

14605

@@ -1085,10 +1168,18 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2

14606

do_it ne

14607

COND(mvn,s,ne) ip, ip, asr #21

14608

beq 3f

14609

-

14610

- @ Test for equality.

14611

- @ Note that 0.0 is equal to -0.0.

14612

+ .cfi_remember_state

14613

+ @ Save the current CFI state. This is done because the branch

14614

+ @ is conditional, @ and if we don't take it we'll issue a

14615

+ @ .cfi_adjust_cfa_offset and return. @ If we do take it,

14616

+ @ however, the .cfi_adjust_cfa_offset from the non-branch @ code

14617

+ @ will affect the branch code as well. To avoid this we'll

14618

+ @ restore @ the current state before executing the branch code.

14619

+

14620

+ @ Test for equality. @ Note that 0.0 is equal to -0.0.

14621

2: add sp, sp, #4

14622

+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.

14623

+

14624

orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0

14625

do_it eq, e

14626

COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0

14627

@@ -1117,8 +1208,13 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2

14628

orr r0, r0, #1

14629

RET

14630

14631

- @ Look for a NAN.

14632

-3: mov ip, xh, lsl #1

14633

+3: @ Look for a NAN.

14634

+

14635

+ @ Restore the previous CFI state (i.e. keep the CFI state as it was

14636

+ @ before the branch).

14637

+ .cfi_restore_state

14638

+

14639

+ mov ip, xh, lsl #1

14640

mvns ip, ip, asr #21

14641

bne 4f

14642

orrs ip, xl, xh, lsl #12

14643

@@ -1128,9 +1224,13 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2

14644

bne 2b

14645

orrs ip, yl, yh, lsl #12

14646

beq 2b @ y is not NAN

14647

+

14648

5: ldr r0, [sp], #4 @ unordered return code

14649

+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.

14650

+

14651

RET

14652

14653

+ CFI_END_FUNCTION

14654

FUNC_END gedf2

14655

FUNC_END gtdf2

14656

FUNC_END ledf2

14657

@@ -1140,6 +1240,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2

14658

FUNC_END cmpdf2

14659

14660

ARM_FUNC_START aeabi_cdrcmple

14661

+ CFI_START_FUNCTION

14662

14663

mov ip, r0

14664

mov r0, r2

14665

@@ -1148,13 +1249,17 @@ ARM_FUNC_START aeabi_cdrcmple

14666

mov r1, r3

14667

mov r3, ip

14668

b 6f

14669

-

14670

+

14671

ARM_FUNC_START aeabi_cdcmpeq

14672

ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq

14673

14674

@ The status-returning routines are required to preserve all

14675

@ registers except ip, lr, and cpsr.

14676

6: do_push {r0, lr}

14677

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8.

14678

+ .cfi_rel_offset r0, 0 @ Previous r0 is saved at sp.

14679

+ .cfi_rel_offset lr, 4 @ Previous lr is saved at sp + 4.

14680

+

14681

ARM_CALL cmpdf2

14682

@ Set the Z flag correctly, and the C flag unconditionally.

14683

cmp r0, #0

14684

@@ -1162,59 +1267,86 @@ ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq

14685

@ that the first operand was smaller than the second.

14686

do_it mi

14687

cmnmi r0, #0

14688

+

14689

RETLDM "r0"

14690

14691

+ CFI_END_FUNCTION

14692

FUNC_END aeabi_cdcmple

14693

FUNC_END aeabi_cdcmpeq

14694

FUNC_END aeabi_cdrcmple

14695

14696

ARM_FUNC_START aeabi_dcmpeq

14697

+ CFI_START_FUNCTION

14698

+

14699

+ str lr, [sp, #-8]! @ sp -= 8

14700

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

14701

+ .cfi_rel_offset lr, 0 @ lr is at sp

14702

14703

- str lr, [sp, #-8]!

14704

ARM_CALL aeabi_cdcmple

14705

do_it eq, e

14706

moveq r0, #1 @ Equal to.

14707

movne r0, #0 @ Less than, greater than, or unordered.

14708

+

14709

RETLDM

14710

14711

+ CFI_END_FUNCTION

14712

FUNC_END aeabi_dcmpeq

14713

14714

ARM_FUNC_START aeabi_dcmplt

14715

+ CFI_START_FUNCTION

14716

+

14717

+ str lr, [sp, #-8]! @ sp -= 8

14718

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

14719

+ .cfi_rel_offset lr, 0 @ lr is at sp

14720

14721

- str lr, [sp, #-8]!

14722

ARM_CALL aeabi_cdcmple

14723

do_it cc, e

14724

movcc r0, #1 @ Less than.

14725

movcs r0, #0 @ Equal to, greater than, or unordered.

14726

RETLDM

14727

14728

+ CFI_END_FUNCTION

14729

FUNC_END aeabi_dcmplt

14730

14731

ARM_FUNC_START aeabi_dcmple

14732

+ CFI_START_FUNCTION

14733

+

14734

+ str lr, [sp, #-8]! @ sp -= 8

14735

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

14736

+ .cfi_rel_offset lr, 0 @ lr is at sp

14737

14738

- str lr, [sp, #-8]!

14739

ARM_CALL aeabi_cdcmple

14740

do_it ls, e

14741

movls r0, #1 @ Less than or equal to.

14742

movhi r0, #0 @ Greater than or unordered.

14743

RETLDM

14744

14745

+ CFI_END_FUNCTION

14746

FUNC_END aeabi_dcmple

14747

14748

ARM_FUNC_START aeabi_dcmpge

14749

+ CFI_START_FUNCTION

14750

+

14751

+ str lr, [sp, #-8]! @ sp -= 8

14752

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

14753

+ .cfi_rel_offset lr, 0 @ lr is at sp

14754

14755

- str lr, [sp, #-8]!

14756

ARM_CALL aeabi_cdrcmple

14757

do_it ls, e

14758

movls r0, #1 @ Operand 2 is less than or equal to operand 1.

14759

movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.

14760

RETLDM

14761

14762

+ CFI_END_FUNCTION

14763

FUNC_END aeabi_dcmpge

14764

14765

ARM_FUNC_START aeabi_dcmpgt

14766

+ CFI_START_FUNCTION

14767

+

14768

+ str lr, [sp, #-8]! @ sp -= 8

14769

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

14770

+ .cfi_rel_offset lr, 0 @ lr is at sp

14771

14772

- str lr, [sp, #-8]!

14773

ARM_CALL aeabi_cdrcmple

14774

do_it cc, e

14775

movcc r0, #1 @ Operand 2 is less than operand 1.

14776

@@ -1222,6 +1354,7 @@ ARM_FUNC_START aeabi_dcmpgt

14777

@ or they are unordered.

14778

RETLDM

14779

14780

+ CFI_END_FUNCTION

14781

FUNC_END aeabi_dcmpgt

14782

14783

#endif /* L_cmpdf2 */

14784

@@ -1230,6 +1363,7 @@ ARM_FUNC_START aeabi_dcmpgt

14785

14786

ARM_FUNC_START unorddf2

14787

ARM_FUNC_ALIAS aeabi_dcmpun unorddf2

14788

+ .cfi_startproc

14789

14790

mov ip, xh, lsl #1

14791

mvns ip, ip, asr #21

14792

@@ -1247,6 +1381,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2

14793

3: mov r0, #1 @ arguments are unordered.

14794

RET

14795

14796

+ .cfi_endproc

14797

FUNC_END aeabi_dcmpun

14798

FUNC_END unorddf2

14799

14800

@@ -1256,6 +1391,7 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2

14801

14802

ARM_FUNC_START fixdfsi

14803

ARM_FUNC_ALIAS aeabi_d2iz fixdfsi

14804

+ CFI_START_FUNCTION

14805

14806

@ check exponent range.

14807

mov r2, xh, lsl #1

14808

@@ -1289,6 +1425,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi

14809

4: mov r0, #0 @ How should we convert NAN?

14810

RET

14811

14812

+ CFI_END_FUNCTION

14813

FUNC_END aeabi_d2iz

14814

FUNC_END fixdfsi

14815

14816

@@ -1298,6 +1435,7 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi

14817

14818

ARM_FUNC_START fixunsdfsi

14819

ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi

14820

+ CFI_START_FUNCTION

14821

14822

@ check exponent range.

14823

movs r2, xh, lsl #1

14824

@@ -1327,6 +1465,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi

14825

4: mov r0, #0 @ How should we convert NAN?

14826

RET

14827

14828

+ CFI_END_FUNCTION

14829

FUNC_END aeabi_d2uiz

14830

FUNC_END fixunsdfsi

14831

14832

@@ -1336,6 +1475,7 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi

14833

14834

ARM_FUNC_START truncdfsf2

14835

ARM_FUNC_ALIAS aeabi_d2f truncdfsf2

14836

+ CFI_START_FUNCTION

14837

14838

@ check exponent range.

14839

mov r2, xh, lsl #1

14840

@@ -1400,6 +1540,7 @@ ARM_FUNC_ALIAS aeabi_d2f truncdfsf2

14841

orr r0, r0, #0x00800000

14842

RET

14843

14844

+ CFI_END_FUNCTION

14845

FUNC_END aeabi_d2f

14846

FUNC_END truncdfsf2

14847

14848

--- a/src/libgcc/config/arm/ieee754-sf.S

14849

+++ b/src/libgcc/config/arm/ieee754-sf.S

14850

@@ -31,16 +31,21 @@

14851

* Only the default rounding mode is intended for best performances.

14852

* Exceptions aren't supported yet, but that can be added quite easily

14853

* if necessary without impacting performances.

14854

+ *

14855

+ * In the CFI related comments, 'previousOffset' refers to the previous offset

14856

+ * from sp used to compute the CFA.

14857

*/

14858

14859

#ifdef L_arm_negsf2

14860

14861

ARM_FUNC_START negsf2

14862

ARM_FUNC_ALIAS aeabi_fneg negsf2

14863

+ CFI_START_FUNCTION

14864

14865

eor r0, r0, #0x80000000 @ flip sign bit

14866

RET

14867

14868

+ CFI_END_FUNCTION

14869

FUNC_END aeabi_fneg

14870

FUNC_END negsf2

14871

14872

@@ -49,6 +54,7 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2

14873

#ifdef L_arm_addsubsf3

14874

14875

ARM_FUNC_START aeabi_frsub

14876

+ CFI_START_FUNCTION

14877

14878

eor r0, r0, #0x80000000 @ flip sign bit of first arg

14879

b 1f

14880

@@ -284,6 +290,7 @@ LSYM(Lad_i):

14881

orrne r0, r0, #0x00400000 @ quiet NAN

14882

RET

14883

14884

+ CFI_END_FUNCTION

14885

FUNC_END aeabi_frsub

14886

FUNC_END aeabi_fadd

14887

FUNC_END addsf3

14888

@@ -292,6 +299,7 @@ LSYM(Lad_i):

14889

14890

ARM_FUNC_START floatunsisf

14891

ARM_FUNC_ALIAS aeabi_ui2f floatunsisf

14892

+ CFI_START_FUNCTION

14893

14894

mov r3, #0

14895

b 1f

14896

@@ -316,6 +324,7 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf

14897

mov al, #0

14898

b 2f

14899

14900

+ CFI_END_FUNCTION

14901

FUNC_END aeabi_i2f

14902

FUNC_END floatsisf

14903

FUNC_END aeabi_ui2f

14904

@@ -323,6 +332,7 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf

14905

14906

ARM_FUNC_START floatundisf

14907

ARM_FUNC_ALIAS aeabi_ul2f floatundisf

14908

+ CFI_START_FUNCTION

14909

14910

orrs r2, r0, r1

14911

do_it eq

14912

@@ -409,6 +419,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf

14913

biceq r0, r0, ip, lsr #31

14914

RET

14915

14916

+ CFI_END_FUNCTION

14917

FUNC_END floatdisf

14918

FUNC_END aeabi_l2f

14919

FUNC_END floatundisf

14920

@@ -420,6 +431,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf

14921

14922

ARM_FUNC_START mulsf3

14923

ARM_FUNC_ALIAS aeabi_fmul mulsf3

14924

+ CFI_START_FUNCTION

14925

14926

@ Mask out exponents, trap any zero/denormal/INF/NAN.

14927

mov ip, #0xff

14928

@@ -454,7 +466,13 @@ LSYM(Lml_x):

14929

and r3, ip, #0x80000000

14930

14931

@ Well, no way to make it shorter without the umull instruction.

14932

- do_push {r3, r4, r5}

14933

+ do_push {r3, r4, r5} @ sp -= 12

14934

+ .cfi_remember_state @ Save the current CFI state

14935

+ .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12

14936

+ .cfi_rel_offset r3, 0 @ Registers are saved from sp to sp + 8

14937

+ .cfi_rel_offset r4, 4

14938

+ .cfi_rel_offset r5, 8

14939

+

14940

mov r4, r0, lsr #16

14941

mov r5, r1, lsr #16

14942

bic r0, r0, r4, lsl #16

14943

@@ -465,7 +483,8 @@ LSYM(Lml_x):

14944

mla r0, r4, r1, r0

14945

adds r3, r3, r0, lsl #16

14946

adc r1, ip, r0, lsr #16

14947

- do_pop {r0, r4, r5}

14948

+ do_pop {r0, r4, r5} @ sp += 12

14949

+ .cfi_restore_state @ Restore the previous CFI state

14950

14951

#else

14952

14953

@@ -618,11 +637,13 @@ LSYM(Lml_n):

14954

orr r0, r0, #0x00c00000

14955

RET

14956

14957

+ CFI_END_FUNCTION

14958

FUNC_END aeabi_fmul

14959

FUNC_END mulsf3

14960

14961

ARM_FUNC_START divsf3

14962

ARM_FUNC_ALIAS aeabi_fdiv divsf3

14963

+ CFI_START_FUNCTION

14964

14965

@ Mask out exponents, trap any zero/denormal/INF/NAN.

14966

mov ip, #0xff

14967

@@ -758,6 +779,7 @@ LSYM(Ldv_s):

14968

bne LSYM(Lml_z) @ 0 / <non_zero> -> 0

14969

b LSYM(Lml_n) @ 0 / 0 -> NAN

14970

14971

+ CFI_END_FUNCTION

14972

FUNC_END aeabi_fdiv

14973

FUNC_END divsf3

14974

14975

@@ -782,6 +804,7 @@ LSYM(Ldv_s):

14976

14977

ARM_FUNC_START gtsf2

14978

ARM_FUNC_ALIAS gesf2 gtsf2

14979

+ CFI_START_FUNCTION

14980

mov ip, #-1

14981

b 1f

14982

14983

@@ -796,6 +819,10 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2

14984

mov ip, #1 @ how should we specify unordered here?

14985

14986

1: str ip, [sp, #-4]!

14987

+ .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4.

14988

+ @ We're not adding CFI for ip as it's pushed into the stack only because

14989

+ @ it may be popped off later as a return value (i.e. we're not preserving

14990

+ @ it anyways).

14991

14992

@ Trap any INF/NAN first.

14993

mov r2, r0, lsl #1

14994

@@ -804,10 +831,18 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2

14995

do_it ne

14996

COND(mvn,s,ne) ip, r3, asr #24

14997

beq 3f

14998

+ .cfi_remember_state

14999

+ @ Save the current CFI state. This is done because the branch is conditional,

15000

+ @ and if we don't take it we'll issue a .cfi_adjust_cfa_offset and return.

15001

+ @ If we do take it, however, the .cfi_adjust_cfa_offset from the non-branch

15002

+ @ code will affect the branch code as well. To avoid this we'll restore

15003

+ @ the current state before executing the branch code.

15004

15005

@ Compare values.

15006

@ Note that 0.0 is equal to -0.0.

15007

2: add sp, sp, #4

15008

+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.

15009

+

15010

orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag

15011

do_it ne

15012

teqne r0, r1 @ if not 0 compare sign

15013

@@ -823,8 +858,13 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2

15014

orrne r0, r0, #1

15015

RET

15016

15017

- @ Look for a NAN.

15018

-3: mvns ip, r2, asr #24

15019

+3: @ Look for a NAN.

15020

+

15021

+ @ Restore the previous CFI state (i.e. keep the CFI state as it was

15022

+ @ before the branch).

15023

+ .cfi_restore_state

15024

+

15025

+ mvns ip, r2, asr #24

15026

bne 4f

15027

movs ip, r0, lsl #9

15028

bne 5f @ r0 is NAN

15029

@@ -832,9 +872,12 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2

15030

bne 2b

15031

movs ip, r1, lsl #9

15032

beq 2b @ r1 is not NAN

15033

+

15034

5: ldr r0, [sp], #4 @ return unordered code.

15035

+ .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset.

15036

RET

15037

15038

+ CFI_END_FUNCTION

15039

FUNC_END gesf2

15040

FUNC_END gtsf2

15041

FUNC_END lesf2

15042

@@ -844,6 +887,7 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2

15043

FUNC_END cmpsf2

15044

15045

ARM_FUNC_START aeabi_cfrcmple

15046

+ CFI_START_FUNCTION

15047

15048

mov ip, r0

15049

mov r0, r1

15050

@@ -856,6 +900,13 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq

15051

@ The status-returning routines are required to preserve all

15052

@ registers except ip, lr, and cpsr.

15053

6: do_push {r0, r1, r2, r3, lr}

15054

+ .cfi_adjust_cfa_offset 20 @ CFA is at sp + previousOffset + 20

15055

+ .cfi_rel_offset r0, 0 @ Registers are saved from sp to sp + 16

15056

+ .cfi_rel_offset r1, 4

15057

+ .cfi_rel_offset r2, 8

15058

+ .cfi_rel_offset r3, 12

15059

+ .cfi_rel_offset lr, 16

15060

+

15061

ARM_CALL cmpsf2

15062

@ Set the Z flag correctly, and the C flag unconditionally.

15063

cmp r0, #0

15064

@@ -865,57 +916,82 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq

15065

cmnmi r0, #0

15066

RETLDM "r0, r1, r2, r3"

15067

15068

+ CFI_END_FUNCTION

15069

FUNC_END aeabi_cfcmple

15070

FUNC_END aeabi_cfcmpeq

15071

FUNC_END aeabi_cfrcmple

15072

15073

ARM_FUNC_START aeabi_fcmpeq

15074

+ CFI_START_FUNCTION

15075

+

15076

+ str lr, [sp, #-8]! @ sp -= 8

15077

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

15078

+ .cfi_rel_offset lr, 0 @ lr is at sp

15079

15080

- str lr, [sp, #-8]!

15081

ARM_CALL aeabi_cfcmple

15082

do_it eq, e

15083

moveq r0, #1 @ Equal to.

15084

movne r0, #0 @ Less than, greater than, or unordered.

15085

RETLDM

15086

15087

+ CFI_END_FUNCTION

15088

FUNC_END aeabi_fcmpeq

15089

15090

ARM_FUNC_START aeabi_fcmplt

15091

+ CFI_START_FUNCTION

15092

+

15093

+ str lr, [sp, #-8]! @ sp -= 8

15094

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

15095

+ .cfi_rel_offset lr, 0 @ lr is at sp

15096

15097

- str lr, [sp, #-8]!

15098

ARM_CALL aeabi_cfcmple

15099

do_it cc, e

15100

movcc r0, #1 @ Less than.

15101

movcs r0, #0 @ Equal to, greater than, or unordered.

15102

RETLDM

15103

15104

+ CFI_END_FUNCTION

15105

FUNC_END aeabi_fcmplt

15106

15107

ARM_FUNC_START aeabi_fcmple

15108

+ CFI_START_FUNCTION

15109

+

15110

+ str lr, [sp, #-8]! @ sp -= 8

15111

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

15112

+ .cfi_rel_offset lr, 0 @ lr is at sp

15113

15114

- str lr, [sp, #-8]!

15115

ARM_CALL aeabi_cfcmple

15116

do_it ls, e

15117

movls r0, #1 @ Less than or equal to.

15118

movhi r0, #0 @ Greater than or unordered.

15119

RETLDM

15120

15121

+ CFI_END_FUNCTION

15122

FUNC_END aeabi_fcmple

15123

15124

ARM_FUNC_START aeabi_fcmpge

15125

+ CFI_START_FUNCTION

15126

+

15127

+ str lr, [sp, #-8]! @ sp -= 8

15128

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

15129

+ .cfi_rel_offset lr, 0 @ lr is at sp

15130

15131

- str lr, [sp, #-8]!

15132

ARM_CALL aeabi_cfrcmple

15133

do_it ls, e

15134

movls r0, #1 @ Operand 2 is less than or equal to operand 1.

15135

movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.

15136

RETLDM

15137

15138

+ CFI_END_FUNCTION

15139

FUNC_END aeabi_fcmpge

15140

15141

ARM_FUNC_START aeabi_fcmpgt

15142

+ CFI_START_FUNCTION

15143

+

15144

+ str lr, [sp, #-8]! @ sp -= 8

15145

+ .cfi_adjust_cfa_offset 8 @ CFA is now sp + previousOffset + 8

15146

+ .cfi_rel_offset lr, 0 @ lr is at sp

15147

15148

- str lr, [sp, #-8]!

15149

ARM_CALL aeabi_cfrcmple

15150

do_it cc, e

15151

movcc r0, #1 @ Operand 2 is less than operand 1.

15152

@@ -923,6 +999,7 @@ ARM_FUNC_START aeabi_fcmpgt

15153

@ or they are unordered.

15154

RETLDM

15155

15156

+ CFI_END_FUNCTION

15157

FUNC_END aeabi_fcmpgt

15158

15159

#endif /* L_cmpsf2 */

15160

@@ -931,6 +1008,7 @@ ARM_FUNC_START aeabi_fcmpgt

15161

15162

ARM_FUNC_START unordsf2

15163

ARM_FUNC_ALIAS aeabi_fcmpun unordsf2

15164

+ CFI_START_FUNCTION

15165

15166

mov r2, r0, lsl #1

15167

mov r3, r1, lsl #1

15168

@@ -947,6 +1025,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2

15169

3: mov r0, #1 @ arguments are unordered.

15170

RET

15171

15172

+ CFI_END_FUNCTION

15173

FUNC_END aeabi_fcmpun

15174

FUNC_END unordsf2

15175

15176

@@ -956,6 +1035,7 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2

15177

15178

ARM_FUNC_START fixsfsi

15179

ARM_FUNC_ALIAS aeabi_f2iz fixsfsi

15180

+ CFI_START_FUNCTION

15181

15182

@ check exponent range.

15183

mov r2, r0, lsl #1

15184

@@ -989,6 +1069,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi

15185

4: mov r0, #0 @ What should we convert NAN to?

15186

RET

15187

15188

+ CFI_END_FUNCTION

15189

FUNC_END aeabi_f2iz

15190

FUNC_END fixsfsi

15191

15192

@@ -998,6 +1079,7 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi

15193

15194

ARM_FUNC_START fixunssfsi

15195

ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi

15196

+ CFI_START_FUNCTION

15197

15198

@ check exponent range.

15199

movs r2, r0, lsl #1

15200

@@ -1027,6 +1109,7 @@ ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi

15201

4: mov r0, #0 @ What should we convert NAN to?

15202

RET

15203

15204

+ CFI_END_FUNCTION

15205

FUNC_END aeabi_f2uiz

15206

FUNC_END fixunssfsi

15207

15208

--- a/src/libgcc/config/arm/lib1funcs.S

15209

+++ b/src/libgcc/config/arm/lib1funcs.S

15210

@@ -1965,6 +1965,16 @@ LSYM(Lchange_\register):

15211

15212

#endif /* Arch supports thumb. */

15213

15214

+.macro CFI_START_FUNCTION

15215

+ .cfi_startproc

15216

+ .cfi_remember_state

15217

+.endm

15218

+

15219

+.macro CFI_END_FUNCTION

15220

+ .cfi_restore_state

15221

+ .cfi_endproc

15222

+.endm

15223

+

15224

#ifndef __symbian__

15225

#ifndef __ARM_ARCH_6M__

15226

#include "ieee754-df.S"

15227

--- a/src/libgcc/unwind-dw2-fde-dip.c

15228

+++ b/src/libgcc/unwind-dw2-fde-dip.c

15229

@@ -59,6 +59,12 @@

15230

15231

#if !defined(inhibit_libc) && defined(HAVE_LD_EH_FRAME_HDR) \

15232

&& defined(TARGET_DL_ITERATE_PHDR) \

15233

+ && defined(__linux__)

15234

+# define USE_PT_GNU_EH_FRAME

15235

+#endif

15236

+

15237

+#if !defined(inhibit_libc) && defined(HAVE_LD_EH_FRAME_HDR) \

15238

+ && defined(TARGET_DL_ITERATE_PHDR) \

15239

&& (defined(__DragonFly__) || defined(__FreeBSD__))

15240

# define ElfW __ElfN

15241

# define USE_PT_GNU_EH_FRAME

15242

--- a/src/libgfortran/acinclude.m4

15243

+++ b/src/libgfortran/acinclude.m4

15244

@@ -100,7 +100,7 @@ void foo (void);

15245

[Define to 1 if the target supports #pragma weak])

15246

fi

15247

case "$host" in

15248

- *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* )

15249

+ *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* | *-*-musl* )

15250

AC_DEFINE(GTHREAD_USE_WEAK, 0,

15251

[Define to 0 if the target shouldn't use #pragma weak])

15252

;;

15253

--- a/src/libgfortran/configure

15254

+++ b/src/libgfortran/configure

15255

@@ -26456,7 +26456,7 @@ $as_echo "#define SUPPORTS_WEAK 1" >>confdefs.h

15256

15257

fi

15258

case "$host" in

15259

- *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* )

15260

+ *-*-darwin* | *-*-hpux* | *-*-cygwin* | *-*-mingw* | *-*-musl* )

15261

15262

$as_echo "#define GTHREAD_USE_WEAK 0" >>confdefs.h

15263

15264

--- a/src/libitm/config/arm/hwcap.cc

15265

+++ b/src/libitm/config/arm/hwcap.cc

15266

@@ -40,7 +40,7 @@ int GTM_hwcap HIDDEN = 0

15267

15268

#ifdef __linux__

15269

#include <unistd.h>

15270

-#include <sys/fcntl.h>

15271

+#include <fcntl.h>

15272

#include <elf.h>

15273

15274

static void __attribute__((constructor))

15275

--- a/src/libitm/config/linux/x86/tls.h

15276

+++ b/src/libitm/config/linux/x86/tls.h

15277

@@ -25,16 +25,19 @@

15278

#ifndef LIBITM_X86_TLS_H

15279

#define LIBITM_X86_TLS_H 1

15280

15281

-#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10)

15282

+#if defined(__GLIBC_PREREQ)

15283

+#if __GLIBC_PREREQ(2, 10)

15284

/* Use slots in the TCB head rather than __thread lookups.

15285

GLIBC has reserved words 10 through 13 for TM. */

15286

#define HAVE_ARCH_GTM_THREAD 1

15287

#define HAVE_ARCH_GTM_THREAD_DISP 1

15288

#endif

15289

+#endif

15290

15291

#include "config/generic/tls.h"

15292

15293

-#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 10)

15294

+#if defined(__GLIBC_PREREQ)

15295

+#if __GLIBC_PREREQ(2, 10)

15296

namespace GTM HIDDEN {

15297

15298

#ifdef __x86_64__

15299

@@ -101,5 +104,6 @@ static inline void set_abi_disp(struct abi_dispatch *x)

15300

15301

} // namespace GTM

15302

#endif /* >= GLIBC 2.10 */

15303

+#endif

15304

15305

#endif // LIBITM_X86_TLS_H

15306

--- a/src//dev/null

15307

+++ b/src/libstdc++-v3/config/cpu/arm/cpu_defines.h

15308

@@ -0,0 +1,40 @@

15309

+// Specific definitions for generic platforms -*- C++ -*-

15310

+

15311

15312

+//

15313

+// This file is part of the GNU ISO C++ Library. This library is free

15314

+// software; you can redistribute it and/or modify it under the

15315

+// terms of the GNU General Public License as published by the

15316

+// Free Software Foundation; either version 3, or (at your option)

15317

+// any later version.

15318

+

15319

+// This library is distributed in the hope that it will be useful,

15320

+// but WITHOUT ANY WARRANTY; without even the implied warranty of

15321

+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

15322

+// GNU General Public License for more details.

15323

+

15324

+// Under Section 7 of GPL version 3, you are granted additional

15325

+// permissions described in the GCC Runtime Library Exception, version

15326

+// 3.1, as published by the Free Software Foundation.

15327

+

15328

+// You should have received a copy of the GNU General Public License and

15329

+// a copy of the GCC Runtime Library Exception along with this program;

15330

+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see

15331

+// <http://www.gnu.org/licenses/>.

15332

+

15333

+/** @file bits/cpu_defines.h

15334

+ * This is an internal header file, included by other library headers.

15335

+ * Do not attempt to use it directly. @headername{iosfwd}

15336

+ */

15337

+

15338

+#ifndef _GLIBCXX_CPU_DEFINES

15339

+#define _GLIBCXX_CPU_DEFINES 1

15340

+

15341

+// Integer divide instructions don't trap on ARM.

15342

+#ifdef __ARM_ARCH_EXT_IDIV__

15343

+#define __glibcxx_integral_traps false

15344

+#else

15345

+#define __glibcxx_integral_traps true

15346

+#endif

15347

+

15348

+#endif

15349

--- a/src/libstdc++-v3/config/os/generic/os_defines.h

15350

+++ b/src/libstdc++-v3/config/os/generic/os_defines.h

15351

@@ -33,4 +33,9 @@

15352

// System-specific #define, typedefs, corrections, etc, go here. This

15353

// file will come before all others.

15354

15355

+// Disable the weak reference logic in gthr.h for os/generic because it

15356

+// is broken on every platform unless there is implementation specific

15357

+// workaround in gthr-posix.h and at link-time for static linking.

15358

+#define _GLIBCXX_GTHREAD_USE_WEAK 0

15359

+

15360

#endif

15361

--- a/src/libstdc++-v3/configure.host

15362

+++ b/src/libstdc++-v3/configure.host

15363

@@ -143,6 +143,9 @@ cpu_include_dir=cpu/${try_cpu}

15364

# Set specific CPU overrides for cpu_defines_dir. Most can just use generic.

15365

# THIS TABLE IS SORTED. KEEP IT THAT WAY.

15366

case "${host_cpu}" in

15367

+ arm*)

15368

+ cpu_defines_dir=cpu/arm

15369

+ ;;

15370

powerpc* | rs6000)

15371

cpu_defines_dir=cpu/powerpc

15372

;;

15373

@@ -273,6 +276,9 @@ case "${host_os}" in

15374

freebsd*)

15375

os_include_dir="os/bsd/freebsd"

15376

;;

15377

+ linux-musl*)

15378

+ os_include_dir="os/generic"

15379

+ ;;

15380

gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)

15381

if [ "$uclibc" = "yes" ]; then

15382

os_include_dir="os/uclibc"