1
//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
3
// Cell SPU math operations
5
// This target description file contains instruction sequences for various
6
// math operations, such as vector multiplies, i32 multiply, etc., for the
7
// SPU's i32, i16 i8 and corresponding vector types.
9
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
10
// purely and completely coincidental.
11
//===----------------------------------------------------------------------===//
13
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
14
// v16i8 multiply instruction sequence:
15
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
17
def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
20
(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
21
(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
22
(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
24
(ILAv4i32 0x0000ffff)),
26
(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
27
(ROTMAIv4i32_i32 VECREG:$rB, 16)),
28
(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
29
(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
30
(FSMBIv8i16 0x2222)), 16))>;
32
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
33
// v8i16 multiply instruction sequence:
34
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
36
def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
37
(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
38
(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
39
(FSMBIv8i16 0xcccc))>;
41
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
42
// v4i32, i32 multiply instruction sequence:
43
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
46
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
48
(v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
49
(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
50
(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
53
Pat<(mul R32C:$rA, R32C:$rB),
55
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
56
(MPYHr32 R32C:$rB, R32C:$rA)),
57
(MPYUr32 R32C:$rA, R32C:$rB))>;
59
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
60
// f32, v4f32 divide instruction sequence:
61
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
63
// Reciprocal estimate and interpolation
64
def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
66
def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
67
// Newton-Raphson iteration
68
def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
72
def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
74
def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
75
(SELBf32_cond NRaphf32.Fragment,
77
(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
79
// Reciprocal estimate and interpolation
80
def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
82
def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
83
// Newton-Raphson iteration
84
def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
88
DivEstv4f32.Fragment)>;
90
def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
92
def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
93
(SELBv4f32_cond NRaphv4f32.Fragment,
94
Epsilonv4f32.Fragment,
95
(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
96
Epsilonv4f32.Fragment,
97
(v4f32 VECREG:$rA)), -1))>;