1
/*****************************************************************
5
Copyright (c) 2001-2002 by Burkhard Plaum - plaum@ipf.uni-stuttgart.de
7
http://gmerlin.sourceforge.net
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
18
*****************************************************************/
24
#include <colorspace.h>
25
#include <attributes.h>
29
#define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)
31
#define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)
35
#include "interpolate.h"
37
#define SCALE_FUNC_HEAD(a) \
38
for(i = 0; i < s->table[plane].num_coeffs_h; i+=a) \
41
#define SCALE_FUNC_TAIL \
46
/* Bilinear in x direction */
48
#define LOAD_FACTORS_BILINEAR_X \
49
tmp.uw[0] = (s->table[plane].coeffs_h[i].factor[0].fac_i >> 1);\
50
tmp.uw[1] = tmp.uw[0]; \
51
tmp.uw[2] = tmp.uw[0]; \
52
tmp.uw[3] = tmp.uw[0]; \
55
#define LOAD_FACTORS_BILINEAR_Y \
56
tmp.uw[0] = (s->table[plane].coeffs_v[scanline].factor[0].fac_i >> 1);\
57
tmp.uw[1] = tmp.uw[0]; \
58
tmp.uw[2] = tmp.uw[0]; \
59
tmp.uw[3] = tmp.uw[0]; \
64
static void scale_x_15_16_bilinear_mmx(gavl_video_scaler_t * s,
79
src = (uint16_t*)(_src + scanline * src_stride);
80
dst = (uint16_t*)_dst;
83
src_1 = src + s->table[plane].coeffs_h[i].index;
86
LOAD_FACTORS_BILINEAR_X
87
INTERPOLATE_1D_LOAD_SRC_1_15
88
INTERPOLATE_1D_LOAD_SRC_2_15
97
static void scale_x_16_16_bilinear_mmx(gavl_video_scaler_t * s,
110
INTERPOLATE_INIT_TEMP
112
src = (uint16_t*)(_src + scanline * src_stride);
113
dst = (uint16_t*)_dst;
116
src_1 = src + s->table[plane].coeffs_h[i].index;
119
LOAD_FACTORS_BILINEAR_X
120
INTERPOLATE_1D_LOAD_SRC_1_16
121
INTERPOLATE_1D_LOAD_SRC_2_16
130
static void scale_x_24_24_bilinear_mmx(gavl_video_scaler_t * s,
142
INTERPOLATE_INIT_TEMP
144
src = _src + scanline * src_stride;
147
src_1 = src + 3 * s->table[plane].coeffs_h[i].index;
150
LOAD_FACTORS_BILINEAR_X
151
INTERPOLATE_1D_LOAD_SRC_1_24
152
INTERPOLATE_1D_LOAD_SRC_2_24
154
INTERPOLATE_WRITE_RGB24
161
#endif // SLOW_FUNCTIONS
163
static void scale_x_24_32_bilinear_mmx(gavl_video_scaler_t * s,
175
INTERPOLATE_INIT_TEMP
177
src = _src + scanline * src_stride;
180
src_1 = src + 4 * s->table[plane].coeffs_h[i].index;
183
LOAD_FACTORS_BILINEAR_X
184
INTERPOLATE_1D_LOAD_SRC_1_32
185
INTERPOLATE_1D_LOAD_SRC_2_32
187
INTERPOLATE_WRITE_RGB32
195
static void scale_x_32_32_bilinear_mmx(gavl_video_scaler_t * s,
207
INTERPOLATE_INIT_TEMP
209
src = _src + scanline * src_stride;
212
src_1 = src + 4 * s->table[plane].coeffs_h[i].index;
215
LOAD_FACTORS_BILINEAR_X
216
INTERPOLATE_1D_LOAD_SRC_1_32
217
INTERPOLATE_1D_LOAD_SRC_2_32
219
INTERPOLATE_WRITE_RGBA32
227
#ifdef SLOW_FUNCTIONS
229
static void scale_x_8_bilinear_mmx(gavl_video_scaler_t * s,
238
INTERPOLATE_INIT_TEMP
239
src = _src + scanline * src_stride;
242
tmp.uw[0] = *(src + s->table[plane].coeffs_h[i].index);
243
tmp.uw[1] = *(src + s->table[plane].coeffs_h[i+1].index);
244
tmp.uw[2] = *(src + s->table[plane].coeffs_h[i+2].index);
245
tmp.uw[3] = *(src + s->table[plane].coeffs_h[i+3].index);
248
tmp.uw[0] = *(src + s->table[plane].coeffs_h[i].index+1);
249
tmp.uw[1] = *(src + s->table[plane].coeffs_h[i+1].index+1);
250
tmp.uw[2] = *(src + s->table[plane].coeffs_h[i+2].index+1);
251
tmp.uw[3] = *(src + s->table[plane].coeffs_h[i+3].index+1);
254
tmp.uw[0] = *(src + s->table[plane].coeffs_h[i].factor[0]);
255
tmp.uw[1] = *(src + s->table[plane].coeffs_h[i+1].factor[0]);
256
tmp.uw[2] = *(src + s->table[plane].coeffs_h[i+2].factor[0]);
257
tmp.uw[3] = *(src + s->table[plane].coeffs_h[i+3].factor[0]);
261
INTERPOLATE_WRITE_RGBA32
269
static void scale_x_8_bilinear_advance(gavl_video_scaler_t * s,
279
INTERPOLATE_INIT_TEMP
280
src = _src + scanline * src_stride;
283
tmp.uw[0] = *(src + advance * s->table[plane].coeffs_h[i].index);
284
tmp.uw[1] = *(src + advance * s->table[plane].coeffs_h[i+1].index);
285
tmp.uw[2] = *(src + advance * s->table[plane].coeffs_h[i+2].index);
286
tmp.uw[3] = *(src + advance * s->table[plane].coeffs_h[i+3].index);
289
tmp.uw[0] = *(src + advance * (s->table[plane].coeffs_h[i].index + 1));
290
tmp.uw[1] = *(src + advance * (s->table[plane].coeffs_h[i+1].index + 1));
291
tmp.uw[2] = *(src + advance * (s->table[plane].coeffs_h[i+2].index + 1));
292
tmp.uw[3] = *(src + advance * (s->table[plane].coeffs_h[i+3].index + 1));
295
tmp.uw[0] = s->table[plane].coeffs_h[i].factor[0];
296
tmp.uw[1] = s->table[plane].coeffs_h[i+1].factor[0];
297
tmp.uw[2] = s->table[plane].coeffs_h[i+2].factor[0];
298
tmp.uw[3] = s->table[plane].coeffs_h[i+3].factor[0];
304
dst[advance] = tmp.ub[2];
305
dst[2*advance] = tmp.ub[4];
306
dst[3*advance] = tmp.ub[6];
313
static void scale_x_yuy2_bilinear_mmx(gavl_video_scaler_t * s,
320
scale_x_8_bilinear_advance(s,
327
scale_x_8_bilinear_advance(s,
334
scale_x_8_bilinear_advance(s,
343
static void scale_x_uyvy_bilinear_mmx(gavl_video_scaler_t * s,
350
scale_x_8_bilinear_advance(s,
357
scale_x_8_bilinear_advance(s,
364
scale_x_8_bilinear_advance(s,
378
static void scale_y_15_16_bilinear_mmx(gavl_video_scaler_t * s,
389
uint16_t * src_start_1;
390
uint16_t * src_start_2;
392
INTERPOLATE_INIT_TEMP
394
src_start_1 = (uint16_t*)(src + s->table[plane].coeffs_v[scanline].index * src_stride);
395
src_start_2 = (uint16_t*)((uint8_t*)src_start_1 + src_stride);
396
dst = (uint16_t*)_dst;
398
LOAD_FACTORS_BILINEAR_Y
401
src_1 = src_start_1 + i;
402
src_2 = src_start_2 + i;
404
INTERPOLATE_1D_LOAD_SRC_1_15
405
INTERPOLATE_1D_LOAD_SRC_2_15
414
static void scale_y_16_16_bilinear_mmx(gavl_video_scaler_t * s,
425
uint16_t * src_start_1;
426
uint16_t * src_start_2;
428
INTERPOLATE_INIT_TEMP
429
src_start_1 = (uint16_t*)(src + s->table[plane].coeffs_v[scanline].index * src_stride);
430
src_start_2 = (uint16_t*)((uint8_t*)src_start_1 + src_stride);
432
dst = (uint16_t*)_dst;
434
LOAD_FACTORS_BILINEAR_Y
437
src_1 = src_start_1 + i;
438
src_2 = src_start_2 + i;
440
INTERPOLATE_1D_LOAD_SRC_1_16
441
INTERPOLATE_1D_LOAD_SRC_2_16
450
static void scale_y_24_24_bilinear_mmx(gavl_video_scaler_t * s,
458
uint8_t * src_start_1;
459
uint8_t * src_start_2;
463
INTERPOLATE_INIT_TEMP
465
src_start_1 = src + s->table[plane].coeffs_v[scanline].index * src_stride;
466
src_start_2 = src_start_1 + src_stride;
468
LOAD_FACTORS_BILINEAR_Y
471
src_1 = src_start_1 + 3 * i;
472
src_2 = src_start_2 + 3 * i;
474
INTERPOLATE_1D_LOAD_SRC_1_24
475
INTERPOLATE_1D_LOAD_SRC_2_24
477
INTERPOLATE_WRITE_RGB24
483
#endif // SLOW_FUNCTIONS
485
static void scale_y_24_32_bilinear_mmx(gavl_video_scaler_t * s,
493
uint8_t * src_start_1;
494
uint8_t * src_start_2;
498
INTERPOLATE_INIT_TEMP
500
src_start_1 = src + s->table[plane].coeffs_v[scanline].index * src_stride;
501
src_start_2 = src_start_1 + src_stride;
503
LOAD_FACTORS_BILINEAR_Y
506
src_1 = src_start_1 + 4 * i;
507
src_2 = src_start_2 + 4 * i;
509
INTERPOLATE_1D_LOAD_SRC_1_32
510
INTERPOLATE_1D_LOAD_SRC_2_32
512
INTERPOLATE_WRITE_RGB32
519
static void scale_y_32_32_bilinear_mmx(gavl_video_scaler_t * s,
527
uint8_t * src_start_1;
528
uint8_t * src_start_2;
532
INTERPOLATE_INIT_TEMP
534
src_start_1 = src + s->table[plane].coeffs_v[scanline].index * src_stride;
535
src_start_2 = src_start_1 + src_stride;
537
LOAD_FACTORS_BILINEAR_Y
540
src_1 = src_start_1 + 4 * i;
541
src_2 = src_start_2 + 4 * i;
543
INTERPOLATE_1D_LOAD_SRC_1_32
544
INTERPOLATE_1D_LOAD_SRC_2_32
546
INTERPOLATE_WRITE_RGBA32
553
static void scale_y_8_bilinear_mmx(gavl_video_scaler_t * s,
564
INTERPOLATE_INIT_TEMP
566
src_1 = src + s->table[plane].coeffs_v[scanline].index * src_stride;
567
src_2 = src_1 + src_stride;
569
tmp.uw[0] = (s->table[plane].coeffs_v[scanline].factor[0].fac_i >> 1);
570
tmp.uw[1] = tmp.uw[0];
571
tmp.uw[2] = tmp.uw[0];
572
tmp.uw[3] = tmp.uw[0];
578
INTERPOLATE_1D_LOAD_SRC_1_32
579
INTERPOLATE_1D_LOAD_SRC_2_32
581
INTERPOLATE_WRITE_RGBA32
591
static void scale_y_yuv_packed_bilinear_mmx(gavl_video_scaler_t * s,
602
INTERPOLATE_INIT_TEMP
604
src_1 = src + s->table[plane].coeffs_v[scanline].index * src_stride;
605
src_2 = src_1 + src_stride;
607
tmp.uw[0] = (s->table[plane].coeffs_v[scanline].factor[0].fac_i >> 1);
608
tmp.uw[1] = tmp.uw[0];
609
tmp.uw[2] = tmp.uw[0];
610
tmp.uw[3] = tmp.uw[0];
617
INTERPOLATE_1D_LOAD_SRC_1_32
618
INTERPOLATE_1D_LOAD_SRC_2_32
620
INTERPOLATE_WRITE_RGBA32
633
void gavl_init_scale_funcs_mmxext(gavl_scale_funcs_t * tab,
634
gavl_scale_mode_t scale_mode,
635
int scale_x, int scale_y, int min_scanline_width)
637
void gavl_init_scale_funcs_mmx(gavl_scale_funcs_t * tab,
638
gavl_scale_mode_t scale_mode,
639
int scale_x, int scale_y, int min_scanline_width)
645
// fprintf(stderr, "gavl_init_scale_funcs_mmx %d %d\n", scale_x, scale_y);
648
case GAVL_SCALE_AUTO:
649
case GAVL_SCALE_NEAREST:
651
case GAVL_SCALE_BILINEAR:
652
if(scale_x && scale_y)
655
tab->scale_15_16 = scale_xy_15_16_bilinear_mmx;
656
tab->scale_16_16 = scale_xy_16_16_bilinear_mmx;
657
tab->scale_24_24 = scale_xy_24_24_bilinear_mmx;
658
tab->scale_24_32 = scale_xy_24_32_bilinear_mmx;
659
tab->scale_32_32 = scale_xy_32_32_bilinear_mmx;
661
tab->scale_8 = scale_xy_8_bilinear_mmx;
662
tab->scale_yuy2 = scale_xy_yuy2_bilinear_mmx;
663
tab->scale_uyvy = scale_xy_uyvy_bilinear_mmx;
668
#ifdef SLOW_FUNCTIONS
669
tab->scale_15_16 = scale_x_15_16_bilinear_mmx;
670
tab->scale_16_16 = scale_x_16_16_bilinear_mmx;
671
tab->scale_24_24 = scale_x_24_24_bilinear_mmx;
672
tab->scale_yuy2 = scale_x_yuy2_bilinear_mmx;
673
tab->scale_uyvy = scale_x_uyvy_bilinear_mmx;
674
if(!(min_scanline_width % 4))
676
tab->scale_8 = scale_x_8_bilinear_mmx;
679
tab->scale_32_32 = scale_x_32_32_bilinear_mmx;
680
tab->scale_24_32 = scale_x_24_32_bilinear_mmx;
684
#ifdef SLOW_FUNCTIONS
685
tab->scale_15_16 = scale_y_15_16_bilinear_mmx;
686
tab->scale_16_16 = scale_y_16_16_bilinear_mmx;
687
tab->scale_24_24 = scale_y_24_24_bilinear_mmx;
689
tab->scale_24_32 = scale_y_24_32_bilinear_mmx;
690
tab->scale_32_32 = scale_y_32_32_bilinear_mmx;
691
if(!(min_scanline_width % 4))
693
tab->scale_8 = scale_y_8_bilinear_mmx;
696
tab->scale_yuy2 = scale_y_yuv_packed_bilinear_mmx;
697
tab->scale_uyvy = scale_y_yuv_packed_bilinear_mmx;
700
case GAVL_SCALE_NONE: