~ubuntu-branches/ubuntu/trusty/llvm-toolchain-snapshot/trusty-201310232150

« back to all changes in this revision

Viewing changes to lib/Target/X86/X86InstrSSE.td

  • Committer: Package Import Robot
  • Author(s): Sylvestre Ledru
  • Date: 2013-05-27 15:01:57 UTC
  • mfrom: (0.10.1) (0.9.1) (0.8.1) (0.7.1) (0.6.1) (0.5.2)
  • Revision ID: package-import@ubuntu.com-20130527150157-tdkrsjpuvht7v0qx
Tags: 1:3.4~svn182733-1~exp1
* New snapshot release (3.4 release)
* Add a symlink of libLLVM-3.4.so.1 to usr/lib/llvm-3.4/lib/libLLVM-3.4.so
    to fix make the llvm-config-3.4 --libdir work (Closes: #708677)
  * Various packages rename to allow co installations:
    * libclang1 => libclang1-3.4
    * libclang1-dbg => libclang1-3.4-dbg
    * libclang-dev => libclang-3.4-dev
    * libclang-common-dev => libclang-common-3.4-dev

Show diffs side-by-side

added added

removed removed

Lines of Context:
2342
2342
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
2343
2343
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
2344
2344
                            ValueType vt, X86MemOperand x86memop,
2345
 
                            PatFrag ld_frag, string OpcodeStr, Domain d> {
2346
 
  def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
 
2345
                            PatFrag ld_frag, string OpcodeStr> {
 
2346
  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
2347
2347
                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2348
2348
                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
2349
 
                     IIC_SSE_COMIS_RR, d>,
 
2349
                     IIC_SSE_COMIS_RR>,
2350
2350
          Sched<[WriteFAdd]>;
2351
 
  def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
 
2351
  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
2352
2352
                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2353
2353
                     [(set EFLAGS, (OpNode (vt RC:$src1),
2354
2354
                                           (ld_frag addr:$src2)))],
2355
 
                                           IIC_SSE_COMIS_RM, d>,
 
2355
                                           IIC_SSE_COMIS_RM>,
2356
2356
          Sched<[WriteFAddLd, ReadAfterLd]>;
2357
2357
}
2358
2358
 
2359
2359
let Defs = [EFLAGS] in {
2360
2360
  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2361
 
                                  "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
 
2361
                                  "ucomiss">, TB, VEX, VEX_LIG;
2362
2362
  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2363
 
                                  "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
2364
 
                                  VEX_LIG;
 
2363
                                  "ucomisd">, TB, OpSize, VEX, VEX_LIG;
2365
2364
  let Pattern = []<dag> in {
2366
2365
    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2367
 
                                    "comiss", SSEPackedSingle>, TB, VEX,
2368
 
                                    VEX_LIG;
 
2366
                                    "comiss">, TB, VEX, VEX_LIG;
2369
2367
    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2370
 
                                    "comisd", SSEPackedDouble>, TB, OpSize, VEX,
2371
 
                                    VEX_LIG;
 
2368
                                    "comisd">, TB, OpSize, VEX, VEX_LIG;
2372
2369
  }
2373
2370
 
2374
2371
  defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2375
 
                            load, "ucomiss", SSEPackedSingle>, TB, VEX;
 
2372
                            load, "ucomiss">, TB, VEX;
2376
2373
  defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2377
 
                            load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
 
2374
                            load, "ucomisd">, TB, OpSize, VEX;
2378
2375
 
2379
2376
  defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
2380
 
                            load, "comiss", SSEPackedSingle>, TB, VEX;
 
2377
                            load, "comiss">, TB, VEX;
2381
2378
  defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
2382
 
                            load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
 
2379
                            load, "comisd">, TB, OpSize, VEX;
2383
2380
  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2384
 
                                  "ucomiss", SSEPackedSingle>, TB;
 
2381
                                  "ucomiss">, TB;
2385
2382
  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2386
 
                                  "ucomisd", SSEPackedDouble>, TB, OpSize;
 
2383
                                  "ucomisd">, TB, OpSize;
2387
2384
 
2388
2385
  let Pattern = []<dag> in {
2389
2386
    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2390
 
                                    "comiss", SSEPackedSingle>, TB;
 
2387
                                    "comiss">, TB;
2391
2388
    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2392
 
                                    "comisd", SSEPackedDouble>, TB, OpSize;
 
2389
                                    "comisd">, TB, OpSize;
2393
2390
  }
2394
2391
 
2395
2392
  defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2396
 
                              load, "ucomiss", SSEPackedSingle>, TB;
 
2393
                              load, "ucomiss">, TB;
2397
2394
  defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2398
 
                              load, "ucomisd", SSEPackedDouble>, TB, OpSize;
 
2395
                              load, "ucomisd">, TB, OpSize;
2399
2396
 
2400
2397
  defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
2401
 
                                  "comiss", SSEPackedSingle>, TB;
 
2398
                                  "comiss">, TB;
2402
2399
  defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
2403
 
                                  "comisd", SSEPackedDouble>, TB, OpSize;
 
2400
                                  "comisd">, TB, OpSize;
2404
2401
} // Defs = [EFLAGS]
2405
2402
 
2406
2403
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
3049
3046
/// And, we have a special variant form for a full-vector intrinsic form.
3050
3047
 
3051
3048
let Sched = WriteFSqrt in {
3052
 
def SSE_SQRTP : OpndItins<
3053
 
  IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
3054
 
>;
3055
 
 
3056
 
def SSE_SQRTS : OpndItins<
3057
 
  IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
 
3049
def SSE_SQRTPS : OpndItins<
 
3050
  IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM
 
3051
>;
 
3052
 
 
3053
def SSE_SQRTSS : OpndItins<
 
3054
  IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM
 
3055
>;
 
3056
 
 
3057
def SSE_SQRTPD : OpndItins<
 
3058
  IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM
 
3059
>;
 
3060
 
 
3061
def SSE_SQRTSD : OpndItins<
 
3062
  IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM
3058
3063
>;
3059
3064
}
3060
3065
 
3319
3324
 
3320
3325
// Square root.
3321
3326
defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
3322
 
                            SSE_SQRTS>,
3323
 
             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
 
3327
                            SSE_SQRTSS>,
 
3328
             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
3324
3329
             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
3325
 
                            SSE_SQRTS>,
3326
 
             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
 
3330
                            SSE_SQRTSD>,
 
3331
             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
3327
3332
 
3328
3333
// Reciprocal approximations. Note that these typically require refinement
3329
3334
// in order to obtain suitable precision.
3330
 
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
3331
 
             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
 
3335
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>,
 
3336
             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>,
3332
3337
             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
3333
 
                                int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
 
3338
                                int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>;
3334
3339
defm RCP   : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
3335
3340
             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
3336
3341
             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
4462
4467
// Move Packed Doubleword Int first element to Doubleword Int
4463
4468
//
4464
4469
let SchedRW = [WriteMove] in {
4465
 
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4466
 
                          "vmov{d|q}\t{$src, $dst|$dst, $src}",
 
4470
def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 
4471
                          "mov{d|q}\t{$src, $dst|$dst, $src}",
4467
4472
                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
4468
4473
                                                           (iPTR 0)))],
4469
4474
                                                           IIC_SSE_MOVD_ToGP>,
4470
 
                      TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
 
4475
                      VEX;
4471
4476
 
4472
4477
def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4473
4478
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
5094
5099
                    Sched<[WriteVecALULd]>;
5095
5100
}
5096
5101
 
 
5102
// Helper fragments to match sext vXi1 to vXiY.
 
5103
def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
 
5104
                                               VR128:$src))>;
 
5105
def v8i1sextv8i16  : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
 
5106
def v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
 
5107
def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
 
5108
                                               VR256:$src))>;
 
5109
def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
 
5110
def v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
 
5111
 
5097
5112
let Predicates = [HasAVX] in {
5098
5113
  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
5099
5114
                                  int_x86_ssse3_pabs_b_128>, VEX;
5101
5116
                                  int_x86_ssse3_pabs_w_128>, VEX;
5102
5117
  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
5103
5118
                                  int_x86_ssse3_pabs_d_128>, VEX;
 
5119
 
 
5120
  def : Pat<(xor
 
5121
            (bc_v2i64 (v16i1sextv16i8)),
 
5122
            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
 
5123
            (VPABSBrr128 VR128:$src)>;
 
5124
  def : Pat<(xor
 
5125
            (bc_v2i64 (v8i1sextv8i16)),
 
5126
            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
 
5127
            (VPABSWrr128 VR128:$src)>;
 
5128
  def : Pat<(xor
 
5129
            (bc_v2i64 (v4i1sextv4i32)),
 
5130
            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
 
5131
            (VPABSDrr128 VR128:$src)>;
5104
5132
}
5105
5133
 
5106
5134
let Predicates = [HasAVX2] in {
5110
5138
                                    int_x86_avx2_pabs_w>, VEX, VEX_L;
5111
5139
  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
5112
5140
                                    int_x86_avx2_pabs_d>, VEX, VEX_L;
 
5141
 
 
5142
  def : Pat<(xor
 
5143
            (bc_v4i64 (v32i1sextv32i8)),
 
5144
            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
 
5145
            (VPABSBrr256 VR256:$src)>;
 
5146
  def : Pat<(xor
 
5147
            (bc_v4i64 (v16i1sextv16i16)),
 
5148
            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
 
5149
            (VPABSWrr256 VR256:$src)>;
 
5150
  def : Pat<(xor
 
5151
            (bc_v4i64 (v8i1sextv8i32)),
 
5152
            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
 
5153
            (VPABSDrr256 VR256:$src)>;
5113
5154
}
5114
5155
 
5115
5156
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
5119
5160
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
5120
5161
                              int_x86_ssse3_pabs_d_128>;
5121
5162
 
 
5163
let Predicates = [HasSSSE3] in {
 
5164
  def : Pat<(xor
 
5165
            (bc_v2i64 (v16i1sextv16i8)),
 
5166
            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
 
5167
            (PABSBrr128 VR128:$src)>;
 
5168
  def : Pat<(xor
 
5169
            (bc_v2i64 (v8i1sextv8i16)),
 
5170
            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
 
5171
            (PABSWrr128 VR128:$src)>;
 
5172
  def : Pat<(xor
 
5173
            (bc_v2i64 (v4i1sextv4i32)),
 
5174
            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
 
5175
            (PABSDrr128 VR128:$src)>;
 
5176
}
 
5177
 
5122
5178
//===---------------------------------------------------------------------===//
5123
5179
// SSSE3 - Packed Binary Operator Instructions
5124
5180
//===---------------------------------------------------------------------===//