~ubuntu-branches/ubuntu/natty/mesa/natty-proposed

« back to all changes in this revision

Viewing changes to src/gallium/drivers/llvmpipe/lp_tile_soa.py

  • Committer: Bazaar Package Importer
  • Author(s): Robert Hooker, Robert Hooker, Christopher James Halse Rogers
  • Date: 2010-09-14 08:55:40 UTC
  • mfrom: (1.2.28 upstream)
  • Revision ID: james.westby@ubuntu.com-20100914085540-m4fpl0hdjlfd4jgz
Tags: 7.9~git20100909-0ubuntu1
[ Robert Hooker ]
* New upstream git snapshot up to commit 94118fe2d4b1e5 (LP: #631413)
* New features include ATI HD5xxx series support in r600, and a vastly
  improved glsl compiler.
* Remove pre-generated .pc's, use the ones generated at build time
  instead.
* Remove all references to mesa-utils now that its no longer shipped
  with the mesa source.
* Disable the experimental ARB_fragment_shader option by default on
  i915, it exposes incomplete functionality that breaks KDE compositing
  among other things. It can be enabled via driconf still. (LP: #628930).

[ Christopher James Halse Rogers ]
* debian/patches/04_osmesa_version.diff:
  - Refresh for new upstream
* Bugs fixed in this release:
  - Fixes severe rendering corruption in Unity on radeon (LP: #628727,
    LP: #596292, LP: #599741, LP: #630315, LP: #613694, LP: #599741).
  - Also fixes rendering in gnome-shell (LP: #578619).
  - Flickering in OpenGL apps on radeon (LP: #626943, LP: #610541).
  - Provides preliminary support for new intel chips (LP: #601052).
* debian/rules:
  - Update configure flags to match upstream reshuffling.
  - Explicitly remove gallium DRI drivers that we don't want to ship.
* Update debian/gbp.conf for this Maverick-specific packaging
* libegl1-mesa-dri-x11,kms: There are no longer separate kms or x11 drivers
  for EGL, libegl1-mesa-drivers now contains a single driver that provides
  both backends.

Show diffs side-by-side

added added

removed removed

Lines of Context:
42
42
 
43
43
sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util'))
44
44
 
45
 
from u_format_access import *
 
45
from u_format_pack import *
 
46
 
 
47
 
 
48
def is_format_supported(format):
 
49
    '''Determines whether we actually have the plumbing necessary to generate the 
 
50
    to read/write to/from this format.'''
 
51
 
 
52
    # FIXME: Ideally we would support any format combination here.
 
53
 
 
54
    if format.layout != PLAIN:
 
55
        return False
 
56
 
 
57
    for i in range(4):
 
58
        channel = format.channels[i]
 
59
        if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT):
 
60
            return False
 
61
        if channel.type == FLOAT and channel.size not in (16, 32 ,64):
 
62
            return False
 
63
 
 
64
    if format.colorspace not in ('rgb', 'srgb'):
 
65
        return False
 
66
 
 
67
    return True
46
68
 
47
69
 
48
70
def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
53
75
    src_native_type = native_type(format)
54
76
 
55
77
    print 'static void'
56
 
    print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
 
78
    print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
57
79
    print '{'
58
80
    print '   unsigned x, y;'
59
81
    print '   const uint8_t *src_row = src + y0*src_stride;'
60
 
    print '   for (y = 0; y < h; ++y) {'
 
82
    print '   for (y = 0; y < TILE_SIZE; ++y) {'
61
83
    print '      const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
62
 
    print '      for (x = 0; x < w; ++x) {'
 
84
    print '      for (x = 0; x < TILE_SIZE; ++x) {'
63
85
 
64
86
    names = ['']*4
65
 
    if format.colorspace == 'rgb':
 
87
    if format.colorspace in ('rgb', 'srgb'):
66
88
        for i in range(4):
67
89
            swizzle = format.swizzles[i]
68
90
            if swizzle < 4:
95
117
                shift += width
96
118
        else:
97
119
            for i in range(4):
 
120
                if names[i]:
 
121
                    print '         %s %s;' % (dst_native_type, names[i])
 
122
            for i in range(4):
98
123
                src_channel = format.channels[i]
99
124
                if names[i]:
100
125
                    value = '(*src_pixel++)'
101
126
                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
102
 
                    print '         %s %s = %s;' % (dst_native_type, names[i], value)
 
127
                    print '         %s = %s;' % (names[i], value)
 
128
                elif src_channel.size:
 
129
                    print '         ++src_pixel;'
103
130
    else:
104
131
        assert False
105
132
 
106
133
    for i in range(4):
107
 
        if format.colorspace == 'rgb':
 
134
        if format.colorspace in ('rgb', 'srgb'):
108
135
            swizzle = format.swizzles[i]
109
136
            if swizzle < 4:
110
137
                value = names[swizzle]
134
161
    """Return an expression for packing r, g, b, a into a pixel of the
135
162
    given format.  Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)'
136
163
    """
137
 
    assert format.colorspace == 'rgb'
 
164
    assert format.colorspace in ('rgb', 'srgb')
138
165
    inv_swizzle = format.inv_swizzles()
139
166
    shift = 0
140
167
    expr = None
166
193
    return expr
167
194
 
168
195
 
169
 
def emit_unrolled_write_code(format, src_channel):
 
196
def emit_unrolled_unswizzle_code(format, src_channel):
170
197
    '''Emit code for writing a block based on unrolled loops.
171
198
    This is considerably faster than the TILE_PIXEL-based code below.
172
199
    '''
175
202
    print '   %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type)
176
203
    print '   unsigned int qx, qy, i;'
177
204
    print
178
 
    print '   for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {'
 
205
    print '   for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {'
179
206
    print '      const unsigned py = y0 + qy;'
180
 
    print '      for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {'
 
207
    print '      for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {'
181
208
    print '         const unsigned px = x0 + qx;'
182
209
    print '         const uint8_t *r = src + 0 * TILE_C_STRIDE;'
183
210
    print '         const uint8_t *g = src + 1 * TILE_C_STRIDE;'
196
223
    print '   }'
197
224
 
198
225
 
199
 
def emit_tile_pixel_write_code(format, src_channel):
 
226
def emit_tile_pixel_unswizzle_code(format, src_channel):
200
227
    '''Emit code for writing a block based on the TILE_PIXEL macro.'''
201
228
    dst_native_type = native_type(format)
202
229
 
204
231
 
205
232
    print '   unsigned x, y;'
206
233
    print '   uint8_t *dst_row = dst + y0*dst_stride;'
207
 
    print '   for (y = 0; y < h; ++y) {'
 
234
    print '   for (y = 0; y < TILE_SIZE; ++y) {'
208
235
    print '      %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
209
 
    print '      for (x = 0; x < w; ++x) {'
 
236
    print '      for (x = 0; x < TILE_SIZE; ++x) {'
210
237
 
211
238
    if format.layout == PLAIN:
212
239
        if not format.is_array():
230
257
                    value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
231
258
                    value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
232
259
                    print '         *dst_pixel++ = %s;' % value
 
260
                elif dst_channel.size:
 
261
                    print '         ++dst_pixel;'
233
262
    else:
234
263
        assert False
235
264
 
244
273
    name = format.short_name()
245
274
 
246
275
    print 'static void'
247
 
    print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
 
276
    print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
248
277
    print '{'
249
278
    if format.layout == PLAIN \
250
279
        and format.colorspace == 'rgb' \
251
280
        and format.block_size() <= 32 \
252
281
        and format.is_pot() \
253
282
        and not format.is_mixed() \
254
 
        and format.channels[0].type == UNSIGNED:
255
 
        emit_unrolled_write_code(format, src_channel)
 
283
        and (format.channels[0].type == UNSIGNED \
 
284
             or format.channels[1].type == UNSIGNED):
 
285
        emit_unrolled_unswizzle_code(format, src_channel)
256
286
    else:
257
 
        emit_tile_pixel_write_code(format, src_channel)
 
287
        emit_tile_pixel_unswizzle_code(format, src_channel)
258
288
    print '}'
259
289
    print
260
290
    
261
291
 
262
 
def generate_read(formats, dst_channel, dst_native_type, dst_suffix):
 
292
def generate_ssse3():
 
293
    print '''
 
294
#if defined(PIPE_ARCH_SSE)
 
295
 
 
296
#include "util/u_sse.h"
 
297
 
 
298
static void
 
299
lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
 
300
                                         const uint8_t *src, unsigned src_stride,
 
301
                                         unsigned x0, unsigned y0)
 
302
{
 
303
 
 
304
   unsigned x, y;
 
305
   __m128i *pdst = (__m128i*) dst;
 
306
   const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t);
 
307
   unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH;
 
308
   unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT;
 
309
 
 
310
   const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
311
   const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
312
   const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
313
   const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
314
 
 
315
   const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
316
   const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
317
   const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
318
   const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
 
319
 
 
320
   const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff);
 
321
   const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff);
 
322
   const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff);
 
323
   const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff);
 
324
 
 
325
   const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e);
 
326
   const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d);
 
327
   const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c);
 
328
   const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f);
 
329
 
 
330
   for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
 
331
      __m128i line0 = *(__m128i*)ysrc0;
 
332
      const uint8_t *ysrc = ysrc0 + src_stride;
 
333
      ysrc0 += tile_stridey;
 
334
 
 
335
      for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
 
336
         __m128i r, g, b, a, line1;
 
337
         line1 = *(__m128i*)ysrc;
 
338
         PIPE_READ_WRITE_BARRIER();
 
339
         ysrc += src_stride;
 
340
         r = _mm_shuffle_epi8(line0, shuffle00);
 
341
         g = _mm_shuffle_epi8(line0, shuffle01);
 
342
         b = _mm_shuffle_epi8(line0, shuffle02);
 
343
         a = _mm_shuffle_epi8(line0, shuffle03);
 
344
 
 
345
         line0 = *(__m128i*)ysrc;
 
346
         PIPE_READ_WRITE_BARRIER();
 
347
         ysrc += src_stride;
 
348
         r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10));
 
349
         g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11));
 
350
         b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12));
 
351
         a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13));
 
352
 
 
353
         line1 = *(__m128i*)ysrc;
 
354
         PIPE_READ_WRITE_BARRIER();
 
355
         ysrc -= tile_stridex;
 
356
         r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20));
 
357
         g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21));
 
358
         b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22));
 
359
         a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23));
 
360
 
 
361
         if (x + 1 < TILE_SIZE) {
 
362
            line0 = *(__m128i*)ysrc;
 
363
            ysrc += src_stride;
 
364
         }
 
365
 
 
366
         PIPE_READ_WRITE_BARRIER();
 
367
         r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30));
 
368
         g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31));
 
369
         b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32));
 
370
         a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33));
 
371
 
 
372
         *pdst++ = r;
 
373
         *pdst++ = g;
 
374
         *pdst++ = b;
 
375
         *pdst++ = a;
 
376
      }
 
377
   }
 
378
 
 
379
}
 
380
 
 
381
static void
 
382
lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src,
 
383
                                          uint8_t *dst, unsigned dst_stride,
 
384
                                          unsigned x0, unsigned y0)
 
385
{
 
386
   unsigned int x, y;
 
387
   const __m128i *psrc = (__m128i*) src;
 
388
   const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t));
 
389
   uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t);
 
390
   __m128i c0 = *psrc++;
 
391
   __m128i c1;
 
392
 
 
393
   const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff);
 
394
   const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff);
 
395
   const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff);
 
396
   const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff);
 
397
 
 
398
   const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff);
 
399
   const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff);
 
400
   const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff);
 
401
   const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff);
 
402
 
 
403
   const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff);
 
404
   const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff);
 
405
   const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff);
 
406
   const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff);
 
407
 
 
408
   const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05);
 
409
   const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07);
 
410
   const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d);
 
411
   const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f);
 
412
 
 
413
   for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) {
 
414
      __m128i *tile = (__m128i*) pdst;
 
415
      pdst += dst_stride * TILE_VECTOR_HEIGHT;
 
416
      for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) {
 
417
         uint8_t *linep = (uint8_t*) (tile++);
 
418
         __m128i line0, line1, line2, line3;
 
419
 
 
420
         c1 = *psrc++; /* r */
 
421
         PIPE_READ_WRITE_BARRIER();
 
422
         line0 = _mm_shuffle_epi8(c0, shuffle00);
 
423
         line1 = _mm_shuffle_epi8(c0, shuffle01);
 
424
         line2 = _mm_shuffle_epi8(c0, shuffle02);
 
425
         line3 = _mm_shuffle_epi8(c0, shuffle03);
 
426
 
 
427
         c0 = *psrc++; /* g */
 
428
         PIPE_READ_WRITE_BARRIER();
 
429
         line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10));
 
430
         line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11));
 
431
         line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12));
 
432
         line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13));
 
433
 
 
434
         c1 = *psrc++; /* b */
 
435
         PIPE_READ_WRITE_BARRIER();
 
436
         line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20));
 
437
         line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21));
 
438
         line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22));
 
439
         line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23));
 
440
 
 
441
         if (psrc != end)
 
442
                 c0 = *psrc++; /* a */
 
443
         PIPE_READ_WRITE_BARRIER();
 
444
         line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30));
 
445
         line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31));
 
446
         line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32));
 
447
         line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33));
 
448
 
 
449
         *(__m128i*) (linep) = line0;
 
450
         *(__m128i*) (((char*)linep) + dst_stride) = line1;
 
451
         *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2;
 
452
         *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3;
 
453
      }
 
454
   }
 
455
}
 
456
 
 
457
#endif /* PIPE_ARCH_SSSE3 */
 
458
'''
 
459
 
 
460
 
 
461
def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
263
462
    '''Generate the dispatch function to read pixels from any format'''
264
463
 
265
464
    for format in formats:
267
466
            generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
268
467
 
269
468
    print 'void'
270
 
    print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
 
469
    print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
271
470
    print '{'
272
 
    print '   void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
 
471
    print '   void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
 
472
    print '#ifdef DEBUG'
 
473
    print '   lp_tile_swizzle_count += 1;'
 
474
    print '#endif'
273
475
    print '   switch(format) {'
274
476
    for format in formats:
275
477
        if is_format_supported(format):
276
478
            print '   case %s:' % format.name
277
 
            print '      func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix)
 
479
            func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix)
 
480
            if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
 
481
                print '#ifdef PIPE_ARCH_SSE'
 
482
                print '      func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
 
483
                print '#else'
 
484
                print '      func = %s;' % (func_name,)
 
485
                print '#endif'
 
486
            else:
 
487
                print '      func = %s;' % (func_name,)
278
488
            print '      break;'
279
489
    print '   default:'
280
 
    print '      debug_printf("unsupported format\\n");'
 
490
    print '      debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
281
491
    print '      return;'
282
492
    print '   }'
283
 
    print '   func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
 
493
    print '   func(dst, (const uint8_t *)src, src_stride, x, y);'
284
494
    print '}'
285
495
    print
286
496
 
287
497
 
288
 
def generate_write(formats, src_channel, src_native_type, src_suffix):
 
498
def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
289
499
    '''Generate the dispatch function to write pixels to any format'''
290
500
 
291
501
    for format in formats:
293
503
            generate_format_write(format, src_channel, src_native_type, src_suffix)
294
504
 
295
505
    print 'void'
296
 
    print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
 
506
    print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
297
507
    
298
508
    print '{'
299
 
    print '   void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
 
509
    print '   void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
 
510
    print '#ifdef DEBUG'
 
511
    print '   lp_tile_unswizzle_count += 1;'
 
512
    print '#endif'
300
513
    print '   switch(format) {'
301
514
    for format in formats:
302
515
        if is_format_supported(format):
303
516
            print '   case %s:' % format.name
304
 
            print '      func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix)
 
517
            func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix)
 
518
            if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM':
 
519
                print '#ifdef PIPE_ARCH_SSE'
 
520
                print '      func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name)
 
521
                print '#else'
 
522
                print '      func = %s;' % (func_name,)
 
523
                print '#endif'
 
524
            else:
 
525
                print '      func = %s;' % (func_name,)
305
526
            print '      break;'
306
527
    print '   default:'
307
 
    print '      debug_printf("unsupported format\\n");'
 
528
    print '      debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
308
529
    print '      return;'
309
530
    print '   }'
310
 
    print '   func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
 
531
    print '   func(src, (uint8_t *)dst, dst_stride, x, y);'
311
532
    print '}'
312
533
    print
313
534
 
325
546
    print '#include "pipe/p_compiler.h"'
326
547
    print '#include "util/u_format.h"'
327
548
    print '#include "util/u_math.h"'
 
549
    print '#include "util/u_half.h"'
 
550
    print '#include "util/u_cpu_detect.h"'
328
551
    print '#include "lp_tile_soa.h"'
329
552
    print
 
553
    print '#ifdef DEBUG'
 
554
    print 'unsigned lp_tile_unswizzle_count = 0;'
 
555
    print 'unsigned lp_tile_swizzle_count = 0;'
 
556
    print '#endif'
 
557
    print
330
558
    print 'const unsigned char'
331
559
    print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {'
332
560
    print '   {  0,  1,  4,  5},'
349
577
    print '};'
350
578
    print
351
579
 
352
 
    generate_clamp()
 
580
    generate_ssse3()
353
581
 
354
582
    channel = Channel(UNSIGNED, True, 8)
355
583
    native_type = 'uint8_t'
356
584
    suffix = '4ub'
357
585
 
358
 
    generate_read(formats, channel, native_type, suffix)
359
 
    generate_write(formats, channel, native_type, suffix)
 
586
    generate_swizzle(formats, channel, native_type, suffix)
 
587
    generate_unswizzle(formats, channel, native_type, suffix)
360
588
 
361
589
 
362
590
if __name__ == '__main__':