~pali/+junk/llvm-toolchain-3.7

%5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>

%tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>

br i1 undef, label %bb2, label %bb1

bb2:

%result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]

ret <4 x i32> %result

}

; Test trying to do a ShiftCombine on illegal types.

; The vector should be split first.

define void @lshrIllegalType(<8 x i32>* %A) nounwind {

%tmp1 = load <8 x i32>, <8 x i32>* %A

%tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>

store <8 x i32> %tmp2, <8 x i32>* %A

ret void

}

; Test folding a binary vector operation with constant BUILD_VECTOR

; operands with i16 elements.

define void @test_i16_constant_fold() nounwind optsize {

entry:

%0 = sext <4 x i1> zeroinitializer to <4 x i16>

%1 = add <4 x i16> %0, zeroinitializer

%2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

%3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>

%4 = trunc <8 x i16> %3 to <8 x i8>

tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)

unreachable

}

declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind

; Test that loads and stores of i64 vector elements are handled as f64 values

; so they are not split up into i32 values. Radar 8755338.

define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {

; CHECK: i64_buildvector

; CHECK: vldr

%t0 = load i64, i64* %ptr, align 4

%t1 = insertelement <2 x i64> undef, i64 %t0, i32 0

store <2 x i64> %t1, <2 x i64>* %vp

ret void

}

define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {

; CHECK: i64_insertelement

100

; CHECK: vldr

101

%t0 = load i64, i64* %ptr, align 4

102

%vec = load <2 x i64>, <2 x i64>* %vp

103

%t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0

104

store <2 x i64> %t1, <2 x i64>* %vp

105

ret void

106

}

107

108

define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {

109

; CHECK: i64_extractelement

110

; CHECK: vstr

111

%vec = load <2 x i64>, <2 x i64>* %vp

112

%t1 = extractelement <2 x i64> %vec, i32 0

113

store i64 %t1, i64* %ptr

114

ret void

115

}

116

117

; Test trying to do a AND Combine on illegal types.

118

define void @andVec(<3 x i8>* %A) nounwind {

119

%tmp = load <3 x i8>, <3 x i8>* %A, align 4

120

%and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>

121

store <3 x i8> %and, <3 x i8>* %A

122

ret void

123

}

124

125

126

; Test trying to do an OR Combine on illegal types.

127

define void @orVec(<3 x i8>* %A) nounwind {

128

%tmp = load <3 x i8>, <3 x i8>* %A, align 4

129

%or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>

130

store <3 x i8> %or, <3 x i8>* %A

131

ret void

132

}

133

134

; The following test was hitting an assertion in the DAG combiner when

135

; constant folding the multiply because the "sext undef" was translated to

136

; a BUILD_VECTOR with i32 0 operands, which did not match the i16 operands

137

; of the other BUILD_VECTOR.

138

define i16 @foldBuildVectors() {

139

%1 = sext <8 x i8> undef to <8 x i16>

140

%2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>

141

%3 = extractelement <8 x i16> %2, i32 0

142

ret i16 %3

143

}

144

145

; Test that we are generating vrev and vext for reverse shuffles of v8i16

146

; shuffles.

147

; CHECK-LABEL: reverse_v8i16:

148

define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {

149

%v0 = load <8 x i16>, <8 x i16>* %loadaddr

150

; CHECK: vrev64.16

151

; CHECK: vext.16

152

%v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,

153

<8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

154

store <8 x i16> %v1, <8 x i16>* %storeaddr

155

ret void

156

}

157

158

; Test that we are generating vrev and vext for reverse shuffles of v16i8

159

; shuffles.

160

; CHECK-LABEL: reverse_v16i8:

161

define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {

162

%v0 = load <16 x i8>, <16 x i8>* %loadaddr

163

; CHECK: vrev64.8

164

; CHECK: vext.8

165

%v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,

166

<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,

167

i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

168

store <16 x i8> %v1, <16 x i8>* %storeaddr

169

ret void

170

}

171

172

; <rdar://problem/14170854>.

173

; vldr cannot handle unaligned loads.

174

; Fall back to vld1.32, which can, instead of using the general purpose loads

175

; followed by a costly sequence of instructions to build the vector register.

176

; CHECK-LABEL: t3:

177

; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}

178

; CHECK: vld1.32 {[[REG]][1]}

179

; CHECK: vmull.u8 q{{[0-9]+}}, [[REG]], [[REG]]

180

define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) {

181

entry:

182

%pix_sp0.0.cast = bitcast i8* %sp0 to i32*

183

%pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1

184

%pix_sp1.0.cast = bitcast i8* %sp1 to i32*

185

%pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1

186

%vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0

187

%vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1

188

%0 = bitcast <2 x i32> %vecinit1 to <8 x i8>

189

%vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)

190

ret <8 x i16> %vmull.i

191

}

192

193

; Function Attrs: nounwind readnone

194

declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)

195

196

; Check that (insert_vector_elt (load)) => (vector_load).

197

; Thus, check that scalar_to_vector do not interfer with that.

198

define <8 x i16> @t4(i8* nocapture %sp0) {

199

; CHECK-LABEL: t4:

200

; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]

201

entry:

202

%pix_sp0.0.cast = bitcast i8* %sp0 to i32*

203

%pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1

204

%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0

205

%0 = bitcast <2 x i32> %vec to <8 x i8>

206

%vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)

207

ret <8 x i16> %vmull.i

208

}

209

210

; Make sure vector load is used for all three loads.

211

; Lowering to build vector was breaking the single use property of the load of

212

; %pix_sp0.0.copyload.

213

; CHECK-LABEL: t5:

214

; CHECK: vld1.32 {[[REG1:d[0-9]+]][1]}, [r0]

215

; CHECK: vorr [[REG2:d[0-9]+]], [[REG1]], [[REG1]]

216

; CHECK: vld1.32 {[[REG1]][0]}, [r1]

217

; CHECK: vld1.32 {[[REG2]][0]}, [r2]

218

; CHECK: vmull.u8 q{{[0-9]+}}, [[REG1]], [[REG2]]

219

define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {

220

entry:

221

%pix_sp0.0.cast = bitcast i8* %sp0 to i32*

222

%pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1

223

%pix_sp1.0.cast = bitcast i8* %sp1 to i32*

224

%pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1

225

%pix_sp2.0.cast = bitcast i8* %sp2 to i32*

226

%pix_sp2.0.copyload = load i32, i32* %pix_sp2.0.cast, align 1

227

%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 1

228

%vecinit1 = insertelement <2 x i32> %vec, i32 %pix_sp1.0.copyload, i32 0

229

%vecinit2 = insertelement <2 x i32> %vec, i32 %pix_sp2.0.copyload, i32 0

230

%0 = bitcast <2 x i32> %vecinit1 to <8 x i8>

231

%1 = bitcast <2 x i32> %vecinit2 to <8 x i8>

232

%vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %1)

233

ret <8 x i16> %vmull.i

234

}

235

236

; <rdar://problem/14989896> Make sure we manage to truncate a vector from an

237

; illegal type to a legal type.

238

define <2 x i8> @test_truncate(<2 x i128> %in) {

239

; CHECK-LABEL: test_truncate:

240

; CHECK: mov [[BASE:r[0-9]+]], sp

241

; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]

242

; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4

243

; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]

244

; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we

245

; cannot express that.

246

; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0

247

; CHECK-NEXT: vmov.32 [[REG2]][1], r1

248

; The Q register used here should match floor(REG1/2), but we cannot express that.

249

; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}

250

; CHECK-NEXT: vmov r0, r1, [[RES]]

251

entry:

252

%res = trunc <2 x i128> %in to <2 x i8>

253

ret <2 x i8> %res

254

}

Older »