1
# DP: Revert AArch64 backport also found on the Linaro branch.
3
Index: gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
4
===================================================================
5
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c (revision 206133)
6
+++ a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c (revision 206132)
8
/* { dg-do compile } */
9
/* { dg-options "-O2" } */
11
-#include <arm_neon.h>
12
+#include "../../../config/aarch64/arm_neon.h"
14
-/* Used to force a variable to a SIMD register. */
15
-#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
18
- : /* No clobbers */);
20
/* { dg-final { scan-assembler-times "\\tadd\\tx\[0-9\]+" 2 } } */
25
test_vceqd_s64 (int64x1_t a, int64x1_t b)
30
- res = vceqd_s64 (a, b);
33
+ return vceqd_s64 (a, b);
36
/* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
39
test_vceqzd_s64 (int64x1_t a)
43
- res = vceqzd_s64 (a);
46
+ return vceqzd_s64 (a);
49
/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
52
test_vcged_s64 (int64x1_t a, int64x1_t b)
57
- res = vcged_s64 (a, b);
60
+ return vcged_s64 (a, b);
64
test_vcled_s64 (int64x1_t a, int64x1_t b)
69
- res = vcled_s64 (a, b);
72
+ return vcled_s64 (a, b);
75
-/* Idiom recognition will cause this testcase not to generate
76
- the expected cmge instruction, so do not check for it. */
77
+/* { dg-final { scan-assembler-times "\\tcmge\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
80
test_vcgezd_s64 (int64x1_t a)
84
- res = vcgezd_s64 (a);
87
+ return vcgezd_s64 (a);
90
/* { dg-final { scan-assembler-times "\\tcmhs\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
93
test_vcged_u64 (uint64x1_t a, uint64x1_t b)
98
- res = vcged_u64 (a, b);
101
+ return vcged_u64 (a, b);
104
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
107
test_vcgtd_s64 (int64x1_t a, int64x1_t b)
112
- res = vcgtd_s64 (a, b);
115
+ return vcgtd_s64 (a, b);
119
test_vcltd_s64 (int64x1_t a, int64x1_t b)
124
- res = vcltd_s64 (a, b);
127
+ return vcltd_s64 (a, b);
130
/* { dg-final { scan-assembler-times "\\tcmgt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
133
test_vcgtzd_s64 (int64x1_t a)
137
- res = vcgtzd_s64 (a);
140
+ return vcgtzd_s64 (a);
143
/* { dg-final { scan-assembler-times "\\tcmhi\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
146
test_vcgtd_u64 (uint64x1_t a, uint64x1_t b)
151
- res = vcgtd_u64 (a, b);
154
+ return vcgtd_u64 (a, b);
157
/* { dg-final { scan-assembler-times "\\tcmle\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
158
@@ -161,24 +107,15 @@
160
test_vclezd_s64 (int64x1_t a)
164
- res = vclezd_s64 (a);
167
+ return vclezd_s64 (a);
170
-/* Idiom recognition will cause this testcase not to generate
171
- the expected cmlt instruction, so do not check for it. */
172
+/* { dg-final { scan-assembler-times "\\tcmlt\\td\[0-9\]+, d\[0-9\]+, #?0" 1 } } */
175
test_vcltzd_s64 (int64x1_t a)
179
- res = vcltzd_s64 (a);
182
+ return vcltzd_s64 (a);
185
/* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */
186
@@ -242,23 +179,13 @@
188
test_vtst_s64 (int64x1_t a, int64x1_t b)
193
- res = vtstd_s64 (a, b);
196
+ return vtstd_s64 (a, b);
200
test_vtst_u64 (uint64x1_t a, uint64x1_t b)
205
- res = vtstd_s64 (a, b);
208
+ return vtstd_u64 (a, b);
211
/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
213
return vrshld_u64 (a, b);
216
-/* Other intrinsics can generate an asr instruction (vcltzd, vcgezd),
217
- so we cannot check scan-assembler-times. */
218
+/* { dg-final { scan-assembler-times "\\tasr\\tx\[0-9\]+" 1 } } */
220
-/* { dg-final { scan-assembler "\\tasr\\tx\[0-9\]+" } } */
223
test_vshrd_n_s64 (int64x1_t a)
225
Index: gcc/config/aarch64/aarch64-simd.md
226
===================================================================
227
--- a/src/gcc/config/aarch64/aarch64-simd.md (revision 206133)
228
+++ a/src/gcc/config/aarch64/aarch64-simd.md (revision 206132)
231
; Main data types used by the insntructions
233
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
234
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
235
(const_string "unknown"))
238
@@ -1548,12 +1548,12 @@
242
- emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
243
+ emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
248
- emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
249
+ emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
253
@@ -3034,181 +3034,48 @@
257
-;; cm(eq|ge|gt|lt|le)
258
-;; Note, we have constraints for Dz and Z as different expanders
259
-;; have different ideas of what should be passed to this pattern.
260
+;; cm(eq|ge|le|lt|gt)
262
-(define_insn "aarch64_cm<optab><mode>"
263
+(define_insn "aarch64_cm<cmp><mode>"
264
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
265
- (neg:<V_cmp_result>
266
- (COMPARISONS:<V_cmp_result>
267
- (match_operand:VDQ 1 "register_operand" "w,w")
268
- (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
270
+ (unspec:<V_cmp_result>
271
+ [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
272
+ (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
276
- cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
277
- cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
278
+ cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
279
+ cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
280
[(set_attr "simd_type" "simd_cmp")
281
(set_attr "simd_mode" "<MODE>")]
284
-(define_insn_and_split "aarch64_cm<optab>di"
285
- [(set (match_operand:DI 0 "register_operand" "=w,w,r")
288
- (match_operand:DI 1 "register_operand" "w,w,r")
289
- (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
293
- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
294
- cm<optab>\t%d0, %d1, #0
297
- /* We need to prevent the split from
298
- happening in the 'w' constraint cases. */
299
- && GP_REGNUM_P (REGNO (operands[0]))
300
- && GP_REGNUM_P (REGNO (operands[1]))"
301
- [(set (reg:CC CC_REGNUM)
308
- (match_operand 3 "cc_register" "")
311
- enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
312
- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
313
- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
314
- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
317
- [(set_attr "simd_type" "simd_cmp")
318
- (set_attr "simd_mode" "DI")]
324
-(define_insn "aarch64_cm<optab><mode>"
325
+(define_insn "aarch64_cm<cmp><mode>"
326
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
327
- (neg:<V_cmp_result>
328
- (UCOMPARISONS:<V_cmp_result>
329
- (match_operand:VDQ 1 "register_operand" "w")
330
- (match_operand:VDQ 2 "register_operand" "w")
332
+ (unspec:<V_cmp_result>
333
+ [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
334
+ (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
337
- "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
338
+ "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
339
[(set_attr "simd_type" "simd_cmp")
340
(set_attr "simd_mode" "<MODE>")]
343
-(define_insn_and_split "aarch64_cm<optab>di"
344
- [(set (match_operand:DI 0 "register_operand" "=w,r")
347
- (match_operand:DI 1 "register_operand" "w,r")
348
- (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
352
- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
355
- /* We need to prevent the split from
356
- happening in the 'w' constraint cases. */
357
- && GP_REGNUM_P (REGNO (operands[0]))
358
- && GP_REGNUM_P (REGNO (operands[1]))"
359
- [(set (reg:CC CC_REGNUM)
366
- (match_operand 3 "cc_register" "")
369
- enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
370
- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
371
- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
372
- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
375
- [(set_attr "simd_type" "simd_cmp")
376
- (set_attr "simd_mode" "DI")]
378
+;; fcm(eq|ge|le|lt|gt)
382
-(define_insn "aarch64_cmtst<mode>"
383
- [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
384
- (neg:<V_cmp_result>
387
- (match_operand:VDQ 1 "register_operand" "w")
388
- (match_operand:VDQ 2 "register_operand" "w"))
389
- (vec_duplicate:<V_cmp_result> (const_int 0)))))]
391
- "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
392
- [(set_attr "simd_type" "simd_cmp")
393
- (set_attr "simd_mode" "<MODE>")]
396
-(define_insn_and_split "aarch64_cmtstdi"
397
- [(set (match_operand:DI 0 "register_operand" "=w,r")
401
- (match_operand:DI 1 "register_operand" "w,r")
402
- (match_operand:DI 2 "register_operand" "w,r"))
406
- cmtst\t%d0, %d1, %d2
409
- /* We need to prevent the split from
410
- happening in the 'w' constraint cases. */
411
- && GP_REGNUM_P (REGNO (operands[0]))
412
- && GP_REGNUM_P (REGNO (operands[1]))"
413
- [(set (reg:CC_NZ CC_REGNUM)
415
- (and:DI (match_dup 1)
421
- (match_operand 3 "cc_register" "")
424
- rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
425
- enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
426
- rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
427
- rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
428
- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
431
- [(set_attr "simd_type" "simd_cmp")
432
- (set_attr "simd_mode" "DI")]
435
-;; fcm(eq|ge|gt|le|lt)
437
-(define_insn "aarch64_cm<optab><mode>"
438
+(define_insn "aarch64_cm<cmp><mode>"
439
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
440
- (neg:<V_cmp_result>
441
- (COMPARISONS:<V_cmp_result>
442
- (match_operand:VALLF 1 "register_operand" "w,w")
443
- (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
445
+ (unspec:<V_cmp_result>
446
+ [(match_operand:VDQF 1 "register_operand" "w,w")
447
+ (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
451
- fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
452
- fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
453
+ fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
454
+ fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
455
[(set_attr "simd_type" "simd_fcmp")
456
(set_attr "simd_mode" "<MODE>")]
458
Index: gcc/config/aarch64/predicates.md
459
===================================================================
460
--- a/src/gcc/config/aarch64/predicates.md (revision 206133)
461
+++ a/src/gcc/config/aarch64/predicates.md (revision 206132)
463
(ior (match_operand 0 "register_operand")
464
(match_test "op == const0_rtx"))))
466
-(define_predicate "aarch64_reg_or_fp_zero"
467
- (and (match_code "reg,subreg,const_double")
468
- (ior (match_operand 0 "register_operand")
469
- (match_test "aarch64_float_const_zero_rtx_p (op)"))))
471
(define_predicate "aarch64_reg_zero_or_m1_or_1"
472
(and (match_code "reg,subreg,const_int")
473
(ior (match_operand 0 "register_operand")
474
Index: gcc/config/aarch64/arm_neon.h
475
===================================================================
476
--- a/src/gcc/config/aarch64/arm_neon.h (revision 206133)
477
+++ a/src/gcc/config/aarch64/arm_neon.h (revision 206132)
478
@@ -19551,28 +19551,28 @@
479
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
480
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
482
- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
483
+ return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
487
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
488
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
490
- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
491
+ return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
495
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
496
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
498
- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
499
+ return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
503
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
504
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
506
- return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
507
+ return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
511
@@ -19603,28 +19603,28 @@
512
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
513
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
515
- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
516
+ return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
520
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
521
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
523
- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
524
+ return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
528
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
529
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
531
- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
532
+ return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
536
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
537
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
539
- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
540
+ return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
544
@@ -19637,7 +19637,7 @@
545
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
546
vcged_u64 (uint64x1_t __a, uint64x1_t __b)
548
- return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
549
+ return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
553
@@ -19676,28 +19676,28 @@
554
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
555
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
557
- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
558
+ return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
562
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
563
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
565
- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
566
+ return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
570
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
571
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
573
- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
574
+ return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
578
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
579
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
581
- return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
582
+ return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
586
@@ -19728,28 +19728,28 @@
587
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
588
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
590
- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
591
+ return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
595
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
596
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
598
- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
599
+ return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
603
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
604
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
606
- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
607
+ return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
611
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
612
vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
614
- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
615
+ return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
619
@@ -19762,7 +19762,7 @@
620
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
621
vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
623
- return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
624
+ return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
628
@@ -19801,28 +19801,28 @@
629
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
630
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
632
- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
633
+ return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
637
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
638
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
640
- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
641
+ return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
645
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
646
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
648
- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
649
+ return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
653
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
654
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
656
- return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __b,
657
+ return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
661
@@ -19853,28 +19853,28 @@
662
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
663
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
665
- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
666
+ return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
670
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
671
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
673
- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
674
+ return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
678
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
679
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
681
- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
682
+ return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
686
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
687
vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
689
- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
690
+ return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
694
@@ -19919,28 +19919,28 @@
695
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
696
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
698
- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
699
+ return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
703
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
704
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
706
- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
707
+ return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
711
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
712
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
714
- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
715
+ return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
719
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
720
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
722
- return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __b,
723
+ return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
727
@@ -19971,28 +19971,28 @@
728
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
729
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
731
- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
732
+ return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
736
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
737
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
739
- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
740
+ return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
744
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
745
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
747
- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
748
+ return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
752
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
753
vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
755
- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
756
+ return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
760
Index: gcc/config/aarch64/aarch64.md
761
===================================================================
762
--- a/src/gcc/config/aarch64/aarch64.md (revision 206133)
763
+++ a/src/gcc/config/aarch64/aarch64.md (revision 206132)
764
@@ -2211,7 +2211,7 @@
765
(set_attr "mode" "SI")]
768
-(define_insn "cstore<mode>_neg"
769
+(define_insn "*cstore<mode>_neg"
770
[(set (match_operand:ALLI 0 "register_operand" "=r")
771
(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
772
[(match_operand 2 "cc_register" "") (const_int 0)])))]
773
Index: gcc/config/aarch64/aarch64-simd-builtins.def
774
===================================================================
775
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def (revision 206133)
776
+++ a/src/gcc/config/aarch64/aarch64-simd-builtins.def (revision 206132)
778
BUILTIN_VSDQ_I_DI (BINOP, cmle)
779
BUILTIN_VSDQ_I_DI (BINOP, cmlt)
780
/* Implemented by aarch64_cm<cmp><mode>. */
781
- BUILTIN_VSDQ_I_DI (BINOP, cmgeu)
782
- BUILTIN_VSDQ_I_DI (BINOP, cmgtu)
783
+ BUILTIN_VSDQ_I_DI (BINOP, cmhs)
784
+ BUILTIN_VSDQ_I_DI (BINOP, cmhi)
785
BUILTIN_VSDQ_I_DI (BINOP, cmtst)
787
/* Implemented by aarch64_<fmaxmin><mode>. */
788
Index: gcc/config/aarch64/iterators.md
789
===================================================================
790
--- a/src/gcc/config/aarch64/iterators.md (revision 206133)
791
+++ a/src/gcc/config/aarch64/iterators.md (revision 206132)
793
;; Vector Float modes.
794
(define_mode_iterator VDQF [V2SF V4SF V2DF])
797
-(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
799
;; Vector Float modes with 2 elements.
800
(define_mode_iterator V2F [V2SF V2DF])
803
UNSPEC_URSHL ; Used in aarch64-simd.md.
804
UNSPEC_SQRSHL ; Used in aarch64-simd.md.
805
UNSPEC_UQRSHL ; Used in aarch64-simd.md.
806
+ UNSPEC_CMEQ ; Used in aarch64-simd.md.
807
+ UNSPEC_CMLE ; Used in aarch64-simd.md.
808
+ UNSPEC_CMLT ; Used in aarch64-simd.md.
809
+ UNSPEC_CMGE ; Used in aarch64-simd.md.
810
+ UNSPEC_CMGT ; Used in aarch64-simd.md.
811
+ UNSPEC_CMHS ; Used in aarch64-simd.md.
812
+ UNSPEC_CMHI ; Used in aarch64-simd.md.
813
UNSPEC_SSLI ; Used in aarch64-simd.md.
814
UNSPEC_USLI ; Used in aarch64-simd.md.
815
UNSPEC_SSRI ; Used in aarch64-simd.md.
817
UNSPEC_SSHLL ; Used in aarch64-simd.md.
818
UNSPEC_USHLL ; Used in aarch64-simd.md.
819
UNSPEC_ADDP ; Used in aarch64-simd.md.
820
+ UNSPEC_CMTST ; Used in aarch64-simd.md.
821
UNSPEC_FMAX ; Used in aarch64-simd.md.
822
UNSPEC_FMIN ; Used in aarch64-simd.md.
823
UNSPEC_BSL ; Used in aarch64-simd.md.
826
;; For scalar usage of vector/FP registers
827
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
833
(V4SF ".4s") (V2DF ".2d")
840
;; Register suffix narrowed modes for VQN.
841
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
843
(V2SI "V2SI") (V4SI "V4SI")
844
(DI "DI") (V2DI "V2DI")
845
(V2SF "V2SI") (V4SF "V4SI")
846
- (V2DF "V2DI") (DF "DI")
850
;; Lower case mode of results of comparison operations.
851
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
853
(V2SI "v2si") (V4SI "v4si")
854
(DI "di") (V2DI "v2di")
855
(V2SF "v2si") (V4SF "v4si")
856
- (V2DF "v2di") (DF "di")
860
;; Vm for lane instructions is restricted to FP_LO_REGS.
861
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
863
;; Code iterator for signed variants of vector saturating binary ops.
864
(define_code_iterator SBINQOPS [ss_plus ss_minus])
866
-;; Comparison operators for <F>CM.
867
-(define_code_iterator COMPARISONS [lt le eq ge gt])
869
-;; Unsigned comparison operators.
870
-(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
872
;; -------------------------------------------------------------------
874
;; -------------------------------------------------------------------
888
-;; For comparison operators we use the FCM* and CM* instructions.
889
-;; As there are no CMLE or CMLT instructions which act on 3 vector
890
-;; operands, we must use CMGE or CMGT and swap the order of the
893
-(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
894
- (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
895
-(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
896
- (ltu "2") (leu "2") (geu "1") (gtu "1")])
897
-(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
898
- (ltu "1") (leu "1") (geu "2") (gtu "2")])
900
-(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
901
- (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
903
;; Optab prefix for sign/zero-extending operations
904
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")
907
UNSPEC_SQSHRN UNSPEC_UQSHRN
908
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
910
+(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
911
+ UNSPEC_CMLE UNSPEC_CMLT])
913
+(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
915
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
916
UNSPEC_TRN1 UNSPEC_TRN2
917
UNSPEC_UZP1 UNSPEC_UZP2])
919
(UNSPEC_RADDHN2 "add")
920
(UNSPEC_RSUBHN2 "sub")])
922
+(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
923
+ (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
925
+ (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
926
+ (UNSPEC_CMTST "tst")])
928
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
929
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])