1
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
3
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
target triple = "x86_64-unknown-linux-gnu"
8
; void loop(int *a, int *b) {
9
; for (int i = 0; i < 512; ++i) {
17
define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
21
for.body: ; preds = %for.body, %entry
22
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
23
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
24
%0 = load i32, i32* %arrayidx, align 4
25
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
26
%1 = load i32, i32* %arrayidx2, align 4
27
%idxprom3 = sext i32 %1 to i64
28
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
29
store i32 %0, i32* %arrayidx4, align 4
30
%indvars.iv.next = add i64 %indvars.iv, 1
31
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
32
%2 = load i32, i32* %arrayidx6, align 4
33
store i32 %2, i32* %arrayidx2, align 4
34
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
35
%exitcond = icmp eq i32 %lftr.wideiv, 512
36
br i1 %exitcond, label %for.end, label %for.body
38
for.end: ; preds = %for.body
42
; The same loop with parallel loop metadata added to the loop branch
43
; and the memory instructions.
45
;CHECK-LABEL: @parallel_loop(
47
define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
51
for.body: ; preds = %for.body, %entry
52
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
53
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
54
%0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
55
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
56
%1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
57
%idxprom3 = sext i32 %1 to i64
58
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
59
; This store might have originated from inlining a function with a parallel
60
; loop. Refers to a list with the "original loop reference" (!4) also included.
61
store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5
62
%indvars.iv.next = add i64 %indvars.iv, 1
63
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
64
%2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
65
store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
66
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
67
%exitcond = icmp eq i32 %lftr.wideiv, 512
68
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
70
for.end: ; preds = %for.body
74
; The same loop with an illegal parallel loop metadata: the memory
75
; accesses refer to a different loop's identifier.
77
;CHECK-LABEL: @mixed_metadata(
80
define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
84
for.body: ; preds = %for.body, %entry
85
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
86
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
87
%0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
88
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
89
%1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
90
%idxprom3 = sext i32 %1 to i64
91
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
92
; This refers to the loop marked with !7 which we are not in at the moment.
93
; It should prevent detecting as a parallel loop.
94
store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7
95
%indvars.iv.next = add i64 %indvars.iv, 1
96
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
97
%2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
98
store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
99
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
100
%exitcond = icmp eq i32 %lftr.wideiv, 512
101
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
103
for.end: ; preds = %for.body