33
33
#include "brw_state.h"
34
34
#include "brw_defines.h"
36
struct brw_clip_unit_key {
37
unsigned int total_grf;
38
unsigned int urb_entry_read_length;
39
unsigned int curb_entry_read_length;
40
unsigned int clip_mode;
42
unsigned int curbe_offset;
44
unsigned int nr_urb_entries, urb_size;
46
GLboolean depth_clamp;
50
clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
37
brw_prepare_clip_unit(struct brw_context *brw)
52
struct gl_context *ctx = &brw->intel.ctx;
53
memset(key, 0, sizeof(*key));
39
struct intel_context *intel = &brw->intel;
40
struct gl_context *ctx = &intel->ctx;
41
struct brw_clip_unit_state *clip;
43
clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
44
memset(clip, 0, sizeof(*clip));
55
46
/* CACHE_NEW_CLIP_PROG */
56
key->total_grf = brw->clip.prog_data->total_grf;
57
key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
58
key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
59
key->clip_mode = brw->clip.prog_data->clip_mode;
47
clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
50
clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
52
clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
53
clip->thread1.single_program_flow = 1;
55
clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
56
clip->thread3.const_urb_entry_read_length =
57
brw->clip.prog_data->curb_read_length;
61
59
/* BRW_NEW_CURBE_OFFSETS */
62
key->curbe_offset = brw->curbe.clip_start;
60
clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
61
clip->thread3.dispatch_grf_start_reg = 1;
62
clip->thread3.urb_entry_read_offset = 0;
64
64
/* BRW_NEW_URB_FENCE */
65
key->nr_urb_entries = brw->urb.nr_clip_entries;
66
key->urb_size = brw->urb.vsize;
69
key->depth_clamp = ctx->Transform.DepthClamp;
73
clip_unit_create_from_key(struct brw_context *brw,
74
struct brw_clip_unit_key *key)
76
struct intel_context *intel = &brw->intel;
77
struct brw_clip_unit_state clip;
80
memset(&clip, 0, sizeof(clip));
82
clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
84
clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
86
clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
87
clip.thread1.single_program_flow = 1;
89
clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
90
clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
91
clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
92
clip.thread3.dispatch_grf_start_reg = 1;
93
clip.thread3.urb_entry_read_offset = 0;
95
clip.thread4.nr_urb_entries = key->nr_urb_entries;
96
clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
65
clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
66
clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
97
67
/* If we have enough clip URB entries to run two threads, do so.
99
if (key->nr_urb_entries >= 10) {
69
if (brw->urb.nr_clip_entries >= 10) {
100
70
/* Half of the URB entries go to each thread, and it has to be an
103
assert(key->nr_urb_entries % 2 == 0);
73
assert(brw->urb.nr_clip_entries % 2 == 0);
105
75
/* Although up to 16 concurrent Clip threads are allowed on Ironlake,
106
76
* only 2 threads can output VUEs at a time.
108
78
if (intel->gen == 5)
109
clip.thread4.max_threads = 16 - 1;
79
clip->thread4.max_threads = 16 - 1;
111
clip.thread4.max_threads = 2 - 1;
81
clip->thread4.max_threads = 2 - 1;
113
assert(key->nr_urb_entries >= 5);
114
clip.thread4.max_threads = 1 - 1;
83
assert(brw->urb.nr_clip_entries >= 5);
84
clip->thread4.max_threads = 1 - 1;
117
87
if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
118
clip.thread4.max_threads = 0;
88
clip->thread4.max_threads = 0;
120
90
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
121
clip.thread4.stats_enable = 1;
91
clip->thread4.stats_enable = 1;
123
clip.clip5.userclip_enable_flags = 0x7f;
124
clip.clip5.userclip_must_clip = 1;
125
clip.clip5.guard_band_enable = 0;
126
if (!key->depth_clamp)
127
clip.clip5.viewport_z_clip_enable = 1;
128
clip.clip5.viewport_xy_clip_enable = 1;
129
clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
130
clip.clip5.api_mode = BRW_CLIP_API_OGL;
131
clip.clip5.clip_mode = key->clip_mode;
93
clip->clip5.userclip_enable_flags = 0x7f;
94
clip->clip5.userclip_must_clip = 1;
95
clip->clip5.guard_band_enable = 0;
97
if (!ctx->Transform.DepthClamp)
98
clip->clip5.viewport_z_clip_enable = 1;
99
clip->clip5.viewport_xy_clip_enable = 1;
100
clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
101
clip->clip5.api_mode = BRW_CLIP_API_OGL;
102
clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
133
104
if (intel->is_g4x)
134
clip.clip5.negative_w_clip_test = 1;
136
clip.clip6.clipper_viewport_state_ptr = 0;
137
clip.viewport_xmin = -1;
138
clip.viewport_xmax = 1;
139
clip.viewport_ymin = -1;
140
clip.viewport_ymax = 1;
142
bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
144
&brw->clip.prog_bo, 1,
145
&clip, sizeof(clip));
105
clip->clip5.negative_w_clip_test = 1;
107
clip->clip6.clipper_viewport_state_ptr = 0;
108
clip->viewport_xmin = -1;
109
clip->viewport_xmax = 1;
110
clip->viewport_ymin = -1;
111
clip->viewport_ymax = 1;
147
113
/* Emit clip program relocation */
148
114
assert(brw->clip.prog_bo);
149
drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
150
brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
115
drm_intel_bo_emit_reloc(intel->batch.bo,
116
(brw->clip.state_offset +
117
offsetof(struct brw_clip_unit_state, thread0)),
118
brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
151
119
I915_GEM_DOMAIN_INSTRUCTION, 0);
156
static void upload_clip_unit( struct brw_context *brw )
158
struct brw_clip_unit_key key;
160
clip_unit_populate_key(brw, &key);
162
drm_intel_bo_unreference(brw->clip.state_bo);
163
brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
165
&brw->clip.prog_bo, 1,
167
if (brw->clip.state_bo == NULL) {
168
brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
121
brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
172
124
const struct brw_tracked_state brw_clip_unit = {
174
126
.mesa = _NEW_TRANSFORM,
175
.brw = (BRW_NEW_CURBE_OFFSETS |
127
.brw = (BRW_NEW_BATCH |
128
BRW_NEW_CURBE_OFFSETS |
176
129
BRW_NEW_URB_FENCE),
177
130
.cache = CACHE_NEW_CLIP_PROG
179
.prepare = upload_clip_unit,
132
.prepare = brw_prepare_clip_unit,