2342
2342
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
2343
2343
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
2344
2344
ValueType vt, X86MemOperand x86memop,
2345
PatFrag ld_frag, string OpcodeStr, Domain d> {
2346
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
2345
PatFrag ld_frag, string OpcodeStr> {
2346
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
2347
2347
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2348
2348
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
2349
IIC_SSE_COMIS_RR, d>,
2350
2350
Sched<[WriteFAdd]>;
2351
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
2351
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
2352
2352
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2353
2353
[(set EFLAGS, (OpNode (vt RC:$src1),
2354
2354
(ld_frag addr:$src2)))],
2355
IIC_SSE_COMIS_RM, d>,
2356
2356
Sched<[WriteFAddLd, ReadAfterLd]>;
2359
2359
let Defs = [EFLAGS] in {
2360
2360
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2361
"ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
2361
"ucomiss">, TB, VEX, VEX_LIG;
2362
2362
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2363
"ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
2363
"ucomisd">, TB, OpSize, VEX, VEX_LIG;
2365
2364
let Pattern = []<dag> in {
2366
2365
defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2367
"comiss", SSEPackedSingle>, TB, VEX,
2366
"comiss">, TB, VEX, VEX_LIG;
2369
2367
defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2370
"comisd", SSEPackedDouble>, TB, OpSize, VEX,
2368
"comisd">, TB, OpSize, VEX, VEX_LIG;
2374
2371
defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2375
load, "ucomiss", SSEPackedSingle>, TB, VEX;
2372
load, "ucomiss">, TB, VEX;
2376
2373
defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2377
load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
2374
load, "ucomisd">, TB, OpSize, VEX;
2379
2376
defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
2380
load, "comiss", SSEPackedSingle>, TB, VEX;
2377
load, "comiss">, TB, VEX;
2381
2378
defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
2382
load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
2379
load, "comisd">, TB, OpSize, VEX;
2383
2380
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2384
"ucomiss", SSEPackedSingle>, TB;
2385
2382
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2386
"ucomisd", SSEPackedDouble>, TB, OpSize;
2383
"ucomisd">, TB, OpSize;
2388
2385
let Pattern = []<dag> in {
2389
2386
defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2390
"comiss", SSEPackedSingle>, TB;
2391
2388
defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2392
"comisd", SSEPackedDouble>, TB, OpSize;
2389
"comisd">, TB, OpSize;
2395
2392
defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2396
load, "ucomiss", SSEPackedSingle>, TB;
2393
load, "ucomiss">, TB;
2397
2394
defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2398
load, "ucomisd", SSEPackedDouble>, TB, OpSize;
2395
load, "ucomisd">, TB, OpSize;
2400
2397
defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
2401
"comiss", SSEPackedSingle>, TB;
2402
2399
defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
2403
"comisd", SSEPackedDouble>, TB, OpSize;
2400
"comisd">, TB, OpSize;
2404
2401
} // Defs = [EFLAGS]
2406
2403
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
3320
3325
// Square root.
3321
3326
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
3323
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
3328
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
3324
3329
sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
3326
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
3331
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
3328
3333
// Reciprocal approximations. Note that these typically require refinement
3329
3334
// in order to obtain suitable precision.
3330
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
3331
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
3335
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>,
3336
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>,
3332
3337
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
3333
int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
3338
int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>;
3334
3339
defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
3335
3340
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
3336
3341
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,