2
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
3
// Use of this source code is governed by a BSD-style license that can be
4
// found in the LICENSE file.
6
// OverlayCull.comp: Cull overlay widgets. A maximum of 32 text widgets and 32 graph widgets is
7
// supported simultaneously.
11
#extension GL_EXT_samplerless_texture_functions : require
13
#extension GL_KHR_shader_subgroup_ballot: require
14
#elif SupportsArithmetic
15
#extension GL_KHR_shader_subgroup_arithmetic: require
19
#define WORKGROUP_WIDTH 32
21
#define BLOCK_HEIGHT 4
23
#define WORKGROUP_WIDTH 64
25
#define BLOCK_HEIGHT 8
27
#error "Not all subgroup sizes are accounted for"
31
#define MAX_TEXT_WIDGETS 32
32
#define MAX_GRAPH_WIDGETS 32
34
layout (local_size_x = WORKGROUP_WIDTH, local_size_y = 1, local_size_z = 1) in;
36
layout(set = 0, binding = 0, rgba32ui) uniform writeonly uimage2D culledWidgetsOut;
38
layout (set = 0, binding = 1) uniform WidgetCoordinates
40
uvec4 coordinates[MAX_TEXT_WIDGETS + MAX_GRAPH_WIDGETS];
44
shared uint intersectingWidgets[32];
46
void accumulateWidgets(const uint localId)
48
// Note: no barriers needed as the workgroup size is the same as hardware subgroup size.
51
intersectingWidgets[localId] |= intersectingWidgets[localId + 16];
54
intersectingWidgets[localId] |= intersectingWidgets[localId + 8];
57
intersectingWidgets[localId] |= intersectingWidgets[localId + 4];
60
intersectingWidgets[localId] |= intersectingWidgets[localId + 2];
63
intersectingWidgets[localId] |= intersectingWidgets[localId + 1];
72
uvec2 cullWidgets(const uint offset, const uvec2 blockCoordLow, const uvec2 blockCoordHigh)
74
const uint localId = gl_LocalInvocationID.x;
75
const uvec4 widgetCoords = coordinates[offset + localId];
77
const bool intersects = widgetCoords.x < widgetCoords.z &&
78
all(lessThan(widgetCoords.xy, blockCoordHigh)) &&
79
all(greaterThanEqual(widgetCoords.zw, blockCoordLow));
83
return subgroupBallot(intersects).xy;
85
#elif SupportsArithmetic
88
const uint textWidgetBit =
89
localId < MAX_TEXT_WIDGETS ? uint(intersects) << localId : 0;
90
const uint graphWidgetBit =
91
localId >= MAX_TEXT_WIDGETS ? uint(intersects) << (localId - MAX_TEXT_WIDGETS) : 0;
92
return uvec2(subgroupOr(textWidgetBit), subgroupOr(graphWidgetBit));
94
return uvec2(subgroupOr(uint(intersects) << localId), 0);
96
#error "Not all subgroup sizes are accounted for"
101
uvec2 ballot = uvec2(0, 0);
103
if (localId < MAX_TEXT_WIDGETS)
105
intersectingWidgets[localId] = uint(intersects) << localId;
106
accumulateWidgets(localId);
109
ballot.x = intersectingWidgets[0];
114
const uint graphLocalId = localId - MAX_TEXT_WIDGETS;
115
intersectingWidgets[graphLocalId] = uint(intersects) << graphLocalId;
116
accumulateWidgets(graphLocalId);
120
ballot.y = intersectingWidgets[0];
123
intersectingWidgets[localId] = uint(intersects) << localId;
124
accumulateWidgets(localId);
127
ballot.x = intersectingWidgets[0];
130
#error "Not all subgroup sizes are accounted for"
136
#error "Not all subgroup operations are accounted for"
142
// There is one workgroup invocation per pixel in culledWidgetsOut. Depending on the subgroup
143
// size, either all widgets and graphs are processed simultaneously (subgroup size 64) or
144
// separately (subgroup size 32).
145
const uvec2 outCoord = gl_WorkGroupID.xy;
146
const uvec2 blockCoordLow = outCoord * uvec2(BLOCK_WIDTH, BLOCK_HEIGHT);
147
const uvec2 blockCoordHigh = blockCoordLow + uvec2(BLOCK_WIDTH, BLOCK_HEIGHT);
152
culledWidgets = cullWidgets(0, blockCoordLow, blockCoordHigh);
154
culledWidgets.x = cullWidgets(0, blockCoordLow, blockCoordHigh).x;
155
culledWidgets.y = cullWidgets(MAX_TEXT_WIDGETS, blockCoordLow, blockCoordHigh).x;
157
#error "Not all subgroup sizes are accounted for"
160
if (gl_LocalInvocationID.x == 0)
162
imageStore(culledWidgetsOut, ivec2(outCoord), uvec4(culledWidgets, 0, 0));