3
/* Due to strange behaviour of as.exe we use this macros */
4
/* For all OS/2 coders - please use PGCC to compile this code */
5
#define PR_(foo) ___##foo
6
#define PT_(foo,func) ___##foo,##func
9
.size ___##sym,.___end_##sym-___##sym; \
11
#elif defined(__APPLE__)
12
#define PR_(foo) ___##foo
13
#define PT_(foo,func) ___##foo,##func
18
#define PR_(foo) __##foo
19
#define PT_(foo,func) __##foo,##func
22
.size __##sym,.__end_##sym-__##sym; \
26
#if defined(__i386__) && ( defined(__GNUC__) || defined(__INTEL_COMPILER) )
30
|*| MMX assembly scaling routine for Imlib2
31
|*| Written by Willem Monsuwe <willem@stack.nl>
36
.globl PR_(qimageScale_mmx_AARGBA)
37
/* .type PT_(qimageScale_mmx_AARGBA,@function) */
40
/*\ Prototype: __qimageScale_mmx_AARGBA(ImlibScaleInfo *isi, DATA32 *dest,
41
|*| int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow)
55
/*\ Local variables that didn't fit in registers \*/
65
#define sow_4 -40(%ebp)
67
/*\ When %edx points to ImlibScaleInfo, these are the members \*/
68
#define xpoints (%edx)
69
#define ypoints 4(%edx)
70
#define xapoints 8(%edx)
71
#define yapoints 12(%edx)
72
#define xup_yup 16(%edx)
74
PR_(qimageScale_mmx_AARGBA):
85
/*\ Check (dw > 0) && (dh > 0) \*/
91
/*\ X-based array pointers point to the end; we're looping up to 0 \*/
92
/*\ %edi = dest + dow * dy + dx + dw \*/
98
leal (%edi, %eax, 4), %edi
99
/*\ xp = xpoints + dxx + dw \*/
103
leal (%eax, %ebx, 4), %eax
105
/*\ xap = xapoints + dxx + dw \*/
107
leal (%eax, %ebx, 4), %eax
112
/*\ yp = ypoints + dyy \*/
115
leal (%eax, %ebx, 4), %eax
117
/*\ yap = yapoints + dyy \*/
119
leal (%eax, %ebx, 4), %eax
132
jnc .scale_x_up_y_down
135
/*\ Scaling up both ways \*/
146
/*\ %eax = *yap << 4 \*/
156
/*\ %esi = *yp + xp[x] \*/
160
movl (%eax, %ecx, 4), %eax
161
leal (%esi, %eax, 4), %esi
163
/*\ %eax = xap[x] << 4 \*/
165
movl (%eax, %ecx, 4), %eax
169
/*\ %mm0 = xap[x] << 4 \*/
174
/*\ Load and unpack four pixels in parralel
175
|*| %mm2 = ptr[0], %mm3 = ptr[1]
176
|*| %mm4 = ptr[sow], %mm5 = ptr[sow + 1]
179
movq (%esi, %ebx, 4), %mm4
187
/*\ X interpolation: r = l + (r - l) * xap \*/
196
/*\ Now %mm3 = I(ptr[0], ptr[1]), %mm5 = I(ptr[sow], ptr[sow + 1]) \*/
199
/*\ Load and unpack two pixels
200
|*| %mm3 = ptr[0], %mm5 = ptr[sow]
203
movd (%esi, %ebx, 4), %mm5
207
/*\ Y interpolation: d = u + (d - u) * yap \*/
213
movd %mm5, (%edi, %ecx, 4)
222
/*\ %esi = *yp + xp[x] \*/
226
movl (%eax, %ecx, 4), %eax
227
leal (%esi, %eax, 4), %esi
229
/*\ %eax = xap[x] << 4 \*/
231
movl (%eax, %ecx, 4), %eax
235
/*\ %mm0 = xap[x] << 4 \*/
240
/*\ Load and unpack two pixels in parralel
241
|*| %mm2 = ptr[0], %mm3 = ptr[1]
248
/*\ X interpolation: r = l + (r - l) * xap \*/
254
movd %mm3, (%edi, %ecx, 4)
257
/*\ dptr[x] = *sptr \*/
259
movl %eax, (%edi, %ecx, 4)
267
leal (%edi, %eax, 4), %edi
278
/*\ Scaling down vertically \*/
281
/*\ sow_4 = sow * 4 \*/
288
/*\ Setup My and Cy \*/
308
/*\ %esi = *yp + xp[x] \*/
312
movl (%eax, %ecx, 4), %eax
313
leal (%esi, %eax, 4), %esi
316
/*\ v = (*p * My) >> 10 \*/
322
/*\ i = 0x4000 - My \*/
328
/*\ p += sow; v += (*p * Cy) >> 10 \*/
336
/*\ i -= Cy; while (i > Cy) \*/
347
/*\ p += sow; v += (*p * i) >> 10 \*/
355
/*\ %eax = xap[x] << 5 \*/
357
movl (%eax, %ecx, 4), %eax
360
/*\ mm3 = xap[x] << 5 \*/
368
/*\ vv = (*p * My) >> 10 \*/
374
/*\ i = 0x4000 - My \*/
380
/*\ p += sow; vv += (*p * Cy) >> 10 \*/
388
/*\ i -= Cy; while (i > Cy) \*/
394
/*\ p += sow; v += (*p * i) >> 10 \*/
402
/*\ v = v + (vv - v) * xap \*/
408
/*\ dest[x] = v >> 4 \*/
411
movd %mm0, (%edi, %ecx, 4)
419
leal (%edi, %eax, 4), %edi
432
jnc .scale_x_down_y_down
435
/*\ Scaling down horizontally \*/
438
/*\ sow_4 = sow * 4 \*/
445
/*\ %eax = *yap << 5 \*/
449
/*\ mm3 = *yap << 5 \*/
458
/*\ %esi = *yp + xp[x] \*/
462
movl (%eax, %ecx, 4), %eax
463
leal (%esi, %eax, 4), %esi
465
/*\ Setup Mx and Cx \*/
467
movzwl (%eax, %ecx, 4), %ebx
469
movzwl 2(%eax, %ecx, 4), %eax
482
/*\ v = (*p * Mx) >> 10 \*/
488
/*\ i = 0x4000 - Mx \*/
494
/*\ p += sow; v += (*p * Cx) >> 10 \*/
502
/*\ i -= Cx; while (i > Cx) \*/
513
/*\ p += sow; v += (*p * i) >> 10 \*/
527
/*\ vv = (*p * Mx) >> 10 \*/
533
/*\ i = 0x4000 - Mx \*/
539
/*\ p += sow; vv += (*p * Cx) >> 10 \*/
547
/*\ i -= Cx; while (i > Cx) \*/
553
/*\ p += sow; v += (*p * i) >> 10 \*/
561
/*\ v = v + (vv - v) * yap \*/
567
/*\ dest[x] = v >> 4 \*/
570
movd %mm0, (%edi, %ecx, 4)
578
leal (%edi, %eax, 4), %edi
589
/*\ Scaling down both ways \*/
591
.scale_x_down_y_down:
592
/*\ sow_4 = sow * 4 \*/
599
/*\ Setup My and Cy \*/
610
/*\ %esi = *yp + xp[x] \*/
614
movl (%eax, %ecx, 4), %eax
615
leal (%esi, %eax, 4), %esi
617
/*\ Setup Mx and Cx \*/
619
movzwl (%eax, %ecx, 4), %ebx
621
movzwl 2(%eax, %ecx, 4), %eax
633
/*\ p = sptr; v = (*p * Mx) >> 9 \*/
640
/*\ i = 0x4000 - Mx \*/
646
/*\ v += (*++p * Cx) >> 9 \*/
654
/*\ i -= Cx; while (i > Cx) \*/
665
/*\ v += (*++p * i) >> 9 \*/
680
/*\ j = 0x4000 - My \*/
686
/*\ sptr += sow; p = sptr \*/
689
/*\ vx = (*p * Mx) >> 9 \*/
695
/*\ i = 0x4000 - Mx \*/
701
/*\ vx += (*++p * Cx) >> 9 \*/
709
/*\ i -= Cx; while (i > Cx) \*/
715
/*\ vx += (*++p * i) >> 9 \*/
723
/*\ v += (vx * Cy) >> 14 \*/
731
/*\ j -= Cy; while (j > Cy) \*/
737
/*\ sptr += sow; p = sptr \*/
740
/*\ vx = (*p * Mx) >> 9 \*/
746
/*\ i = 0x4000 - Mx \*/
752
/*\ vx += (*++p * Cx) >> 9 \*/
760
/*\ i -= Cx; while (i > Cx) \*/
766
/*\ vx += (*++p * i) >> 9 \*/
774
/*\ v += (vx * j) >> 14 \*/
782
/*\ dptr[x] = mm0 >> 5 \*/
785
movd %mm0, (%edi, %ecx, 4)
789
jnz .down_down_loop_x
793
leal (%edi, %eax, 4), %edi
799
jnz .down_down_loop_y
814
SIZE(qimageScale_mmx_AARGBA)