103
103
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
104
104
unsigned nr_buffers;
106
/* Multiple buffer varients can map to a single buffer. */
107
struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
108
unsigned nr_buffer_varients;
106
/* Multiple buffer variants can map to a single buffer. */
107
struct translate_buffer_variant buffer_variant[PIPE_MAX_ATTRIBS];
108
unsigned nr_buffer_variants;
110
/* Multiple elements can map to a single buffer varient. */
111
unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
110
/* Multiple elements can map to a single buffer variant. */
111
unsigned element_to_buffer_variant[PIPE_MAX_ATTRIBS];
113
113
boolean use_instancing;
114
114
unsigned instance_id;
1062
1062
struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
1063
1063
get_offset(p, &p->instance_id));
1065
for (i = 0; i < p->nr_buffer_varients; i++) {
1066
struct translate_buffer_varient *varient = &p->buffer_varient[i];
1067
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
1065
for (i = 0; i < p->nr_buffer_variants; i++) {
1066
struct translate_buffer_variant *variant = &p->buffer_variant[i];
1067
struct translate_buffer *buffer = &p->buffer[variant->buffer_index];
1069
if (!index_size || varient->instance_divisor) {
1069
if (!index_size || variant->instance_divisor) {
1070
struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI,
1071
get_offset(p, &buffer->max_index));
1070
1072
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
1071
1073
get_offset(p, &buffer->stride));
1072
1074
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
1073
get_offset(p, &varient->ptr));
1075
get_offset(p, &variant->ptr));
1074
1076
struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
1075
1077
get_offset(p, &buffer->base_ptr));
1076
1078
struct x86_reg elt = p->idx_ESI;
1079
1081
/* Calculate pointer to first attrib:
1080
1082
* base_ptr + stride * index, where index depends on instance divisor
1082
if (varient->instance_divisor) {
1084
if (variant->instance_divisor) {
1083
1085
/* Our index is instance ID divided by instance divisor.
1085
1087
x86_mov(p->func, tmp_EAX, instance_id);
1087
if (varient->instance_divisor != 1) {
1089
if (variant->instance_divisor != 1) {
1088
1090
struct x86_reg tmp_EDX = p->tmp2_EDX;
1089
1091
struct x86_reg tmp_ECX = p->src_ECX;
1095
1097
x86_xor(p->func, tmp_EDX, tmp_EDX);
1096
x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
1098
x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor);
1097
1099
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
1102
/* XXX we need to clamp the index here too, but to a
1103
* per-array max value, not the draw->pt.max_index value
1104
* that's being given to us via translate->set_buffer().
1100
1107
x86_mov(p->func, tmp_EAX, elt);
1109
/* Clamp to max_index
1111
x86_cmp(p->func, tmp_EAX, buf_max_index);
1112
x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE);
1104
* TODO: Respect translate_buffer::max_index.
1107
1115
x86_imul(p->func, tmp_EAX, buf_stride);
1108
1116
x64_rexw(p->func);
1109
1117
x86_add(p->func, tmp_EAX, buf_base_ptr);
1119
x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
1112
1121
/* In the linear case, keep the buffer pointer instead of the
1113
1122
* index number.
1115
if (!index_size && p->nr_buffer_varients == 1)
1124
if (!index_size && p->nr_buffer_variants == 1)
1117
1126
x64_rexw(p->func);
1118
1127
x86_mov(p->func, elt, tmp_EAX);
1138
1147
return x86_make_disp(p->machine_EDI,
1139
1148
get_offset(p, &p->instance_id));
1141
if (!index_size && p->nr_buffer_varients == 1) {
1150
if (!index_size && p->nr_buffer_variants == 1) {
1142
1151
return p->idx_ESI;
1144
else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
1153
else if (!index_size || p->buffer_variant[var_idx].instance_divisor) {
1145
1154
struct x86_reg ptr = p->src_ECX;
1146
1155
struct x86_reg buf_ptr =
1147
1156
x86_make_disp(p->machine_EDI,
1148
get_offset(p, &p->buffer_varient[var_idx].ptr));
1157
get_offset(p, &p->buffer_variant[var_idx].ptr));
1150
1159
x64_rexw(p->func);
1151
1160
x86_mov(p->func, ptr, buf_ptr);
1155
1164
struct x86_reg ptr = p->src_ECX;
1156
const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
1165
const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx];
1158
1167
struct x86_reg buf_stride =
1159
1168
x86_make_disp(p->machine_EDI,
1160
get_offset(p, &p->buffer[varient->buffer_index].stride));
1169
get_offset(p, &p->buffer[variant->buffer_index].stride));
1162
1171
struct x86_reg buf_base_ptr =
1163
1172
x86_make_disp(p->machine_EDI,
1164
get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
1173
get_offset(p, &p->buffer[variant->buffer_index].base_ptr));
1175
struct x86_reg buf_max_index =
1176
x86_make_disp(p->machine_EDI,
1177
get_offset(p, &p->buffer[variant->buffer_index].max_index));
1179
1192
x86_mov(p->func, ptr, elt);
1196
/* Clamp to max_index
1198
x86_cmp(p->func, ptr, buf_max_index);
1199
x86_cmovcc(p->func, ptr, buf_max_index, cc_AE);
1182
1201
x86_imul(p->func, ptr, buf_stride);
1183
1202
x64_rexw(p->func);
1184
1203
x86_add(p->func, ptr, buf_base_ptr);
1191
1210
static boolean incr_inputs( struct translate_sse *p,
1192
1211
unsigned index_size )
1194
if (!index_size && p->nr_buffer_varients == 1) {
1213
if (!index_size && p->nr_buffer_variants == 1) {
1195
1214
struct x86_reg stride = x86_make_disp(p->machine_EDI,
1196
1215
get_offset(p, &p->buffer[0].stride));
1198
if (p->buffer_varient[0].instance_divisor == 0) {
1217
if (p->buffer_variant[0].instance_divisor == 0) {
1199
1218
x64_rexw(p->func);
1200
1219
x86_add(p->func, p->idx_ESI, stride);
1201
1220
sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
1207
1226
/* Is this worthwhile??
1209
for (i = 0; i < p->nr_buffer_varients; i++) {
1210
struct translate_buffer_varient *varient = &p->buffer_varient[i];
1228
for (i = 0; i < p->nr_buffer_variants; i++) {
1229
struct translate_buffer_variant *variant = &p->buffer_variant[i];
1211
1230
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
1212
get_offset(p, &varient->ptr));
1231
get_offset(p, &variant->ptr));
1213
1232
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
1214
get_offset(p, &p->buffer[varient->buffer_index].stride));
1233
get_offset(p, &p->buffer[variant->buffer_index].stride));
1216
if (varient->instance_divisor == 0) {
1235
if (variant->instance_divisor == 0) {
1217
1236
x86_mov(p->func, p->tmp_EAX, buf_stride);
1218
1237
x64_rexw(p->func);
1219
1238
x86_add(p->func, p->tmp_EAX, buf_ptr);
1323
1342
label = x86_get_label(p->func);
1325
1344
struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
1326
int last_varient = -1;
1345
int last_variant = -1;
1327
1346
struct x86_reg vb;
1329
1348
for (j = 0; j < p->translate.key.nr_elements; j++) {
1330
1349
const struct translate_element *a = &p->translate.key.element[j];
1331
unsigned varient = p->element_to_buffer_varient[j];
1350
unsigned variant = p->element_to_buffer_variant[j];
1333
1352
/* Figure out source pointer address:
1335
if (varient != last_varient) {
1336
last_varient = varient;
1337
vb = get_buffer_ptr(p, index_size, varient, elt);
1354
if (variant != last_variant) {
1355
last_variant = variant;
1356
vb = get_buffer_ptr(p, index_size, variant, elt);
1340
1359
if (!translate_attr( p, a,
1463
* Map vertex element to vertex buffer varient.
1482
* Map vertex element to vertex buffer variant.
1465
for (j = 0; j < p->nr_buffer_varients; j++) {
1466
if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
1467
p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
1484
for (j = 0; j < p->nr_buffer_variants; j++) {
1485
if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer &&
1486
p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) {
1471
if (j == p->nr_buffer_varients) {
1472
p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
1473
p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
1474
p->nr_buffer_varients++;
1490
if (j == p->nr_buffer_variants) {
1491
p->buffer_variant[j].buffer_index = key->element[i].input_buffer;
1492
p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor;
1493
p->nr_buffer_variants++;
1476
p->element_to_buffer_varient[i] = j;
1495
p->element_to_buffer_variant[i] = j;
1478
1497
assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
1480
p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
1499
p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID;