~pali/+junk/llvm-toolchain-3.7

; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33

; @sum_of_array2 is very similar to @sum_of_array. The only difference is in

; the order of "sext" and "add" when computing the array indices. @sum_of_array

; computes add before sext, e.g., array[sext(x + 1)][sext(y + 1)], while

; @sum_of_array2 computes sext before add,

; e.g., array[sext(x) + 1][sext(y) + 1]. SeparateConstOffsetFromGEP should be

; able to extract constant offsets from both forms.

define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {

.preheader:

%0 = sext i32 %y to i64

%1 = sext i32 %x to i64

%2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0

%3 = addrspacecast float addrspace(3)* %2 to float*

%4 = load float, float* %3, align 4

%5 = fadd float %4, 0.000000e+00

%6 = add i64 %0, 1

%7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6

%8 = addrspacecast float addrspace(3)* %7 to float*

%9 = load float, float* %8, align 4

%10 = fadd float %5, %9

%11 = add i64 %1, 1

%12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0

%13 = addrspacecast float addrspace(3)* %12 to float*

%14 = load float, float* %13, align 4

%15 = fadd float %10, %14

%16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6

%17 = addrspacecast float addrspace(3)* %16 to float*

%18 = load float, float* %17, align 4

%19 = fadd float %15, %18

store float %19, float* %output, align 4

ret void

}

; PTX-LABEL: sum_of_array2(

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}

; IR-LABEL: @sum_of_array2(

; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32

100

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33

101

102

103

; This function loads

104

; array[zext(x)][zext(y)]

105

; array[zext(x)][zext(y +nuw 1)]

106

; array[zext(x +nuw 1)][zext(y)]

107

; array[zext(x +nuw 1)][zext(y +nuw 1)].

108

;

109

; This function is similar to @sum_of_array, but it

110

; 1) extends array indices using zext instead of sext;

111

; 2) annotates the addition with "nuw"; otherwise, zext(x + 1) => zext(x) + 1

112

; may be invalid.

113

define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {

114

.preheader:

115

%0 = zext i32 %y to i64

116

%1 = zext i32 %x to i64

117

%2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0

118

%3 = addrspacecast float addrspace(3)* %2 to float*

119

%4 = load float, float* %3, align 4

120

%5 = fadd float %4, 0.000000e+00

121

%6 = add nuw i32 %y, 1

122

%7 = zext i32 %6 to i64

123

%8 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7

124

%9 = addrspacecast float addrspace(3)* %8 to float*

125

%10 = load float, float* %9, align 4

126

%11 = fadd float %5, %10

127

%12 = add nuw i32 %x, 1

128

%13 = zext i32 %12 to i64

129

%14 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0

130

%15 = addrspacecast float addrspace(3)* %14 to float*

131

%16 = load float, float* %15, align 4

132

%17 = fadd float %11, %16

133

%18 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7

134

%19 = addrspacecast float addrspace(3)* %18 to float*

135

%20 = load float, float* %19, align 4

136

%21 = fadd float %17, %20

137

store float %21, float* %output, align 4

138

ret void

139

}

140

; PTX-LABEL: sum_of_array3(

141

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}

142

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}

143

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}

144

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}

145

146

; IR-LABEL: @sum_of_array3(

147

; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}

148

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1

149

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32

150

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33

151

152

153

; This function loads

154

; array[zext(x)][zext(y)]

155

; array[zext(x)][zext(y)]

156

; array[zext(x) + 1][zext(y) + 1]

157

; array[zext(x) + 1][zext(y) + 1].

158

;

159

; We expect the generated code to reuse the computation of

160

; &array[zext(x)][zext(y)]. See the expected IR and PTX for details.

161

define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {

162

.preheader:

163

%0 = zext i32 %y to i64

164

%1 = zext i32 %x to i64

165

%2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0

166

%3 = addrspacecast float addrspace(3)* %2 to float*

167

%4 = load float, float* %3, align 4

168

%5 = fadd float %4, 0.000000e+00

169

%6 = add i64 %0, 1

170

%7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6

171

%8 = addrspacecast float addrspace(3)* %7 to float*

172

%9 = load float, float* %8, align 4

173

%10 = fadd float %5, %9

174

%11 = add i64 %1, 1

175

%12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0

176

%13 = addrspacecast float addrspace(3)* %12 to float*

177

%14 = load float, float* %13, align 4

178

%15 = fadd float %10, %14

179

%16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6

180

%17 = addrspacecast float addrspace(3)* %16 to float*

181

%18 = load float, float* %17, align 4

182

%19 = fadd float %15, %18

183

store float %19, float* %output, align 4

184

ret void

185

}

186

; PTX-LABEL: sum_of_array4(

187

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}

188

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}

189

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}

190

; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}

191

192

; IR-LABEL: @sum_of_array4(

193

; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}

194

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1

195

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32

196

; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33

Older »