1
/*****************************************************************
2
* gavl - a general purpose audio/video processing library
4
* Copyright (c) 2001 - 2008 Members of the Gmerlin project
5
* gmerlin-general@lists.sourceforge.net
6
* http://gmerlin.sourceforge.net
8
* This program is free software: you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation, either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program. If not, see <http://www.gnu.org/licenses/>.
20
* *****************************************************************/
23
#include <attributes.h>
26
#include <gavl/gavl.h>
30
#include "../mmx/mmx.h"
31
#include "../sse/sse.h"
33
static const sse_t factor_mask = { .uw = { 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } };
35
static const sse_t min_13 = { .uw = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } };
36
static const sse_t max_13 = { .uw = { 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF } };
40
#define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\
41
fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);
45
#define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)
47
#define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)
61
#define INIT_8_GLOBAL \
63
pxor_r2r(xmm6, xmm6);\
64
movaps_m2r(factor_mask, xmm7);
67
pxor_r2r(xmm3, xmm3);\
70
#define INIT_16_GLOBAL \
72
pxor_r2r(xmm6, xmm6);\
73
movaps_m2r(factor_mask, xmm7);
76
pxor_r2r(xmm3, xmm3);\
79
#define LOAD_FACTOR_8(num) \
81
movd_m2r(ctx->table_v.pixels[ctx->scanline].factor_i[num], xmm2);\
82
pand_r2r(xmm7, xmm2);\
83
pshuflw_r2ri(xmm2,xmm5,0x00);\
84
pshufd_r2ri(xmm5,xmm5,0x00);
86
#define LOAD_FACTOR_16(num) \
88
movd_m2r(ctx->table_v.pixels[ctx->scanline].factor_i[num], xmm2);\
89
pand_r2r(xmm7, xmm2);\
90
pshuflw_r2ri(xmm2,xmm5,0x00);\
91
pshufd_r2ri(xmm5,xmm5,0x00);
94
#define ACCUM_8(num) \
96
movaps_m2r(*src,xmm0);\
97
movaps_r2r(xmm0,xmm1);\
98
punpcklbw_r2r(xmm6, xmm0); \
99
punpckhbw_r2r(xmm6, xmm1); \
102
LOAD_FACTOR_8(num); \
103
/* Accumulate xmm0 */ \
104
pmulhw_r2r(xmm5, xmm0);\
105
paddsw_r2r(xmm0, xmm3);\
106
/* Accumulate xmm1 */ \
107
pmulhw_r2r(xmm5, xmm1);\
108
paddsw_r2r(xmm1, xmm4)
110
#define ACCUM_16(num) \
112
movaps_m2r(*src,xmm0);\
113
movaps_m2r(*(src+16),xmm1);\
116
LOAD_FACTOR_16(num); \
117
/* Accumulate xmm0 */ \
118
pmulhw_r2r(xmm5, xmm0);\
119
paddsw_r2r(xmm0, xmm3);\
120
/* Accumulate xmm1 */ \
121
pmulhw_r2r(xmm5, xmm1);\
122
paddsw_r2r(xmm1, xmm4)
127
packuswb_r2r(xmm4, xmm3);\
128
movups_r2m(xmm3, *dst)
130
#define OUTPUT_16_NOCLIP \
133
movups_r2m(xmm3, *dst);\
134
movups_r2m(xmm4, *(dst+16));\
137
pminsw_m2r(max_13, xmm3);\
138
pminsw_m2r(max_13, xmm4);\
139
pmaxsw_m2r(min_13, xmm3);\
140
pmaxsw_m2r(min_13, xmm4);\
143
movups_r2m(xmm3, *dst);\
144
movups_r2m(xmm4, *(dst+16));\
152
#define ACCUM_C_8(num) \
153
tmp += ctx->table_v.pixels[ctx->scanline].factor_i[num] * *src
155
#define ACCUM_C_16(num) \
156
tmp += ctx->table_v.pixels[ctx->scanline].factor_i[num] * *(uint16_t*)src
160
*dst = (uint8_t)((tmp & ~0xFF)?((-tmp) >> 31) : tmp);
162
#define OUTPUT_C_16 \
164
*(uint16_t*)dst = (uint16_t)((tmp & ~0xFFFF)?((-tmp) >> 31) : tmp);
166
/* scale_uint8_x_1_y_bicubic_sse2 */
168
#define FUNC_NAME scale_uint8_x_1_y_bicubic_sse2
173
#define INIT_GLOBAL INIT_8_GLOBAL
175
#define ACCUM ACCUM_8
176
#define OUTPUT OUTPUT_8
177
#define INIT_C INIT_C_8
178
#define ACCUM_C ACCUM_C_8
179
#define OUTPUT_C OUTPUT_C_8
183
/* scale_uint8_x_2_y_bicubic_sse */
185
#define FUNC_NAME scale_uint8_x_2_y_bicubic_sse2
190
#define INIT_GLOBAL INIT_8_GLOBAL
192
#define ACCUM ACCUM_8
193
#define OUTPUT OUTPUT_8
194
#define INIT_C INIT_C_8
195
#define ACCUM_C ACCUM_C_8
196
#define OUTPUT_C OUTPUT_C_8
200
/* scale_uint8_x_3_y_bicubic_sse2 */
202
#define FUNC_NAME scale_uint8_x_3_y_bicubic_sse2
207
#define INIT_GLOBAL INIT_8_GLOBAL
209
#define ACCUM ACCUM_8
210
#define OUTPUT OUTPUT_8
211
#define INIT_C INIT_C_8
212
#define ACCUM_C ACCUM_C_8
213
#define OUTPUT_C OUTPUT_C_8
217
/* scale_uint8_x_4_y_bicubic_sse2 */
219
#define FUNC_NAME scale_uint8_x_4_y_bicubic_sse2
224
#define INIT_GLOBAL INIT_8_GLOBAL
226
#define ACCUM ACCUM_8
227
#define OUTPUT OUTPUT_8
228
#define INIT_C INIT_C_8
229
#define ACCUM_C ACCUM_C_8
230
#define OUTPUT_C OUTPUT_C_8
235
/* scale_uint8_x_1_y_quadratic_sse2 */
237
#define FUNC_NAME scale_uint8_x_1_y_quadratic_sse2
242
#define INIT_GLOBAL INIT_8_GLOBAL
244
#define ACCUM ACCUM_8
245
#define OUTPUT OUTPUT_8
246
#define INIT_C INIT_C_8
247
#define ACCUM_C ACCUM_C_8
248
#define OUTPUT_C OUTPUT_C_8
252
/* scale_uint8_x_2_y_quadratic_sse2 */
254
#define FUNC_NAME scale_uint8_x_2_y_quadratic_sse2
259
#define INIT_GLOBAL INIT_8_GLOBAL
261
#define ACCUM ACCUM_8
262
#define OUTPUT OUTPUT_8
263
#define INIT_C INIT_C_8
264
#define ACCUM_C ACCUM_C_8
265
#define OUTPUT_C OUTPUT_C_8
269
/* scale_uint8_x_3_y_quadratic_sse2 */
271
#define FUNC_NAME scale_uint8_x_3_y_quadratic_sse2
276
#define INIT_GLOBAL INIT_8_GLOBAL
278
#define ACCUM ACCUM_8
279
#define OUTPUT OUTPUT_8
280
#define INIT_C INIT_C_8
281
#define ACCUM_C ACCUM_C_8
282
#define OUTPUT_C OUTPUT_C_8
286
/* scale_uint8_x_4_y_quadratic_sse2 */
288
#define FUNC_NAME scale_uint8_x_4_y_quadratic_sse2
293
#define INIT_GLOBAL INIT_8_GLOBAL
295
#define ACCUM ACCUM_8
296
#define OUTPUT OUTPUT_8
297
#define INIT_C INIT_C_8
298
#define ACCUM_C ACCUM_C_8
299
#define OUTPUT_C OUTPUT_C_8
303
/* scale_uint8_x_1_y_generic_sse2 */
305
#define FUNC_NAME scale_uint8_x_1_y_generic_sse2
310
#define INIT_GLOBAL INIT_8_GLOBAL
312
#define ACCUM ACCUM_8
313
#define OUTPUT OUTPUT_8
314
#define INIT_C INIT_C_8
315
#define ACCUM_C ACCUM_C_8
316
#define OUTPUT_C OUTPUT_C_8
320
/* scale_uint8_x_2_y_generic_sse2 */
322
#define FUNC_NAME scale_uint8_x_2_y_generic_sse2
327
#define INIT_GLOBAL INIT_8_GLOBAL
329
#define ACCUM ACCUM_8
330
#define OUTPUT OUTPUT_8
331
#define INIT_C INIT_C_8
332
#define ACCUM_C ACCUM_C_8
333
#define OUTPUT_C OUTPUT_C_8
337
/* scale_uint8_x_4_y_generic_sse2 */
339
#define FUNC_NAME scale_uint8_x_4_y_generic_sse2
344
#define INIT_GLOBAL INIT_8_GLOBAL
346
#define ACCUM ACCUM_8
347
#define OUTPUT OUTPUT_8
348
#define INIT_C INIT_C_8
349
#define ACCUM_C ACCUM_C_8
350
#define OUTPUT_C OUTPUT_C_8
354
/* scale_uint8_x_3_y_generic_sse2 */
356
#define FUNC_NAME scale_uint8_x_3_y_generic_sse2
361
#define INIT_GLOBAL INIT_8_GLOBAL
363
#define ACCUM ACCUM_8
364
#define OUTPUT OUTPUT_8
365
#define INIT_C INIT_C_8
366
#define ACCUM_C ACCUM_C_8
367
#define OUTPUT_C OUTPUT_C_8
374
/* scale_uint16_x_1_y_bicubic_sse2 */
376
#define FUNC_NAME scale_uint16_x_1_y_bicubic_sse2
381
#define INIT_GLOBAL INIT_16_GLOBAL
383
#define ACCUM ACCUM_16
384
#define OUTPUT OUTPUT_16
385
#define INIT_C INIT_C_16
386
#define ACCUM_C ACCUM_C_16
387
#define OUTPUT_C OUTPUT_C_16
391
/* scale_uint16_x_1_y_bicubic_noclip_sse2 */
393
#define FUNC_NAME scale_uint16_x_1_y_bicubic_noclip_sse2
398
#define INIT_GLOBAL INIT_16_GLOBAL
400
#define ACCUM ACCUM_16
401
#define OUTPUT OUTPUT_16_NOCLIP
402
#define INIT_C INIT_C_16
403
#define ACCUM_C ACCUM_C_16
404
#define OUTPUT_C OUTPUT_C_16
408
/* scale_uint16_x_2_y_bicubic_sse2 */
410
#define FUNC_NAME scale_uint16_x_2_y_bicubic_sse2
415
#define INIT_GLOBAL INIT_16_GLOBAL
417
#define ACCUM ACCUM_16
418
#define OUTPUT OUTPUT_16
419
#define INIT_C INIT_C_16
420
#define ACCUM_C ACCUM_C_16
421
#define OUTPUT_C OUTPUT_C_16
425
/* scale_uint16_x_2_y_bicubic_noclip_sse2 */
427
#define FUNC_NAME scale_uint16_x_2_y_bicubic_noclip_sse2
432
#define INIT_GLOBAL INIT_16_GLOBAL
434
#define ACCUM ACCUM_16
435
#define OUTPUT OUTPUT_16_NOCLIP
436
#define INIT_C INIT_C_16
437
#define ACCUM_C ACCUM_C_16
438
#define OUTPUT_C OUTPUT_C_16
442
/* scale_uint16_x_3_y_bicubic_sse2 */
444
#define FUNC_NAME scale_uint16_x_3_y_bicubic_sse2
449
#define INIT_GLOBAL INIT_16_GLOBAL
451
#define ACCUM ACCUM_16
452
#define OUTPUT OUTPUT_16
453
#define INIT_C INIT_C_16
454
#define ACCUM_C ACCUM_C_16
455
#define OUTPUT_C OUTPUT_C_16
459
/* scale_uint16_x_3_y_bicubic_noclip_sse2 */
461
#define FUNC_NAME scale_uint16_x_3_y_bicubic_noclip_sse2
466
#define INIT_GLOBAL INIT_16_GLOBAL
468
#define ACCUM ACCUM_16
469
#define OUTPUT OUTPUT_16_NOCLIP
470
#define INIT_C INIT_C_16
471
#define ACCUM_C ACCUM_C_16
472
#define OUTPUT_C OUTPUT_C_16
476
/* scale_uint16_x_4_y_bicubic_sse2 */
478
#define FUNC_NAME scale_uint16_x_4_y_bicubic_sse2
483
#define INIT_GLOBAL INIT_16_GLOBAL
485
#define ACCUM ACCUM_16
486
#define OUTPUT OUTPUT_16
487
#define INIT_C INIT_C_16
488
#define ACCUM_C ACCUM_C_16
489
#define OUTPUT_C OUTPUT_C_16
493
/* scale_uint16_x_4_y_bicubic_noclip_sse2 */
495
#define FUNC_NAME scale_uint16_x_4_y_bicubic_noclip_sse2
500
#define INIT_GLOBAL INIT_16_GLOBAL
502
#define ACCUM ACCUM_16
503
#define OUTPUT OUTPUT_16_NOCLIP
504
#define INIT_C INIT_C_16
505
#define ACCUM_C ACCUM_C_16
506
#define OUTPUT_C OUTPUT_C_16
512
/* scale_uint16_x_1_y_quadratic_sse2 */
514
#define FUNC_NAME scale_uint16_x_1_y_quadratic_sse2
519
#define INIT_GLOBAL INIT_16_GLOBAL
521
#define ACCUM ACCUM_16
522
#define OUTPUT OUTPUT_16_NOCLIP
523
#define INIT_C INIT_C_16
524
#define ACCUM_C ACCUM_C_16
525
#define OUTPUT_C OUTPUT_C_16
529
/* scale_uint16_x_2_y_quadratic_sse2 */
531
#define FUNC_NAME scale_uint16_x_2_y_quadratic_sse2
536
#define INIT_GLOBAL INIT_16_GLOBAL
538
#define ACCUM ACCUM_16
539
#define OUTPUT OUTPUT_16_NOCLIP
540
#define INIT_C INIT_C_16
541
#define ACCUM_C ACCUM_C_16
542
#define OUTPUT_C OUTPUT_C_16
546
/* scale_uint16_x_3_y_quadratic_sse2 */
548
#define FUNC_NAME scale_uint16_x_3_y_quadratic_sse2
553
#define INIT_GLOBAL INIT_16_GLOBAL
555
#define ACCUM ACCUM_16
556
#define OUTPUT OUTPUT_16_NOCLIP
557
#define INIT_C INIT_C_16
558
#define ACCUM_C ACCUM_C_16
559
#define OUTPUT_C OUTPUT_C_16
563
/* scale_uint16_x_4_y_quadratic_sse2 */
565
#define FUNC_NAME scale_uint16_x_4_y_quadratic_sse2
570
#define INIT_GLOBAL INIT_16_GLOBAL
572
#define ACCUM ACCUM_16
573
#define OUTPUT OUTPUT_16_NOCLIP
574
#define INIT_C INIT_C_16
575
#define ACCUM_C ACCUM_C_16
576
#define OUTPUT_C OUTPUT_C_16
581
/* scale_uint16_x_1_y_generic_sse2 */
583
#define FUNC_NAME scale_uint16_x_1_y_generic_sse2
588
#define INIT_GLOBAL INIT_16_GLOBAL
590
#define ACCUM ACCUM_16
591
#define OUTPUT OUTPUT_16
592
#define INIT_C INIT_C_16
593
#define ACCUM_C ACCUM_C_16
594
#define OUTPUT_C OUTPUT_C_16
598
/* scale_uint16_x_2_y_generic_sse2 */
600
#define FUNC_NAME scale_uint16_x_2_y_generic_sse2
605
#define INIT_GLOBAL INIT_16_GLOBAL
607
#define ACCUM ACCUM_16
608
#define OUTPUT OUTPUT_16
609
#define INIT_C INIT_C_16
610
#define ACCUM_C ACCUM_C_16
611
#define OUTPUT_C OUTPUT_C_16
615
/* scale_uint16_x_3_y_generic_sse2 */
617
#define FUNC_NAME scale_uint16_x_3_y_generic_sse2
622
#define INIT_GLOBAL INIT_16_GLOBAL
624
#define ACCUM ACCUM_16
625
#define OUTPUT OUTPUT_16
626
#define INIT_C INIT_C_16
627
#define ACCUM_C ACCUM_C_16
628
#define OUTPUT_C OUTPUT_C_16
632
/* scale_uint16_x_4_y_generic_sse2 */
634
#define FUNC_NAME scale_uint16_x_4_y_generic_sse2
639
#define INIT_GLOBAL INIT_16_GLOBAL
641
#define ACCUM ACCUM_16
642
#define OUTPUT OUTPUT_16
643
#define INIT_C INIT_C_16
644
#define ACCUM_C ACCUM_C_16
645
#define OUTPUT_C OUTPUT_C_16
654
void gavl_init_scale_funcs_quadratic_y_sse2(gavl_scale_funcs_t * tab,
655
int src_advance, int dst_advance)
657
if((src_advance == 1) && (dst_advance == 1))
659
tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_quadratic_sse2;
660
tab->funcs_y.bits_uint8_noadvance = 14;
662
else if((src_advance == 3) && (dst_advance == 3))
664
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_quadratic_sse2;
665
tab->funcs_y.bits_uint8_noadvance = 14;
667
else if((src_advance == 4) && (dst_advance == 4))
669
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_quadratic_sse2;
670
tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_quadratic_sse2;
671
tab->funcs_y.bits_uint8_noadvance = 14;
673
else if((src_advance == 2) && (dst_advance == 2))
675
tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_quadratic_sse2;
676
tab->funcs_y.bits_uint8_noadvance = 14;
678
tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_quadratic_sse2;
679
tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_quadratic_sse2;
680
tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_quadratic_sse2;
681
tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_quadratic_sse2;
682
tab->funcs_y.bits_uint16 = 14;
685
void gavl_init_scale_funcs_bicubic_y_sse2(gavl_scale_funcs_t * tab,
686
int src_advance, int dst_advance)
688
if((src_advance == 1) && (dst_advance == 1))
690
tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bicubic_sse2;
691
tab->funcs_y.bits_uint8_noadvance = 14;
693
else if((src_advance == 3) && (dst_advance == 3))
695
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bicubic_sse2;
696
tab->funcs_y.bits_uint8_noadvance = 14;
698
else if((src_advance == 4) && (dst_advance == 4))
700
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bicubic_sse2;
701
tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bicubic_sse2;
702
tab->funcs_y.bits_uint8_noadvance = 14;
704
else if((src_advance == 2) && (dst_advance == 2))
706
tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bicubic_sse2;
707
tab->funcs_y.bits_uint8_noadvance = 14;
710
tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_bicubic_sse2;
711
tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_bicubic_sse2;
712
tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_bicubic_sse2;
713
tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_bicubic_sse2;
714
tab->funcs_y.bits_uint16 = 14;
717
void gavl_init_scale_funcs_bicubic_y_noclip_sse2(gavl_scale_funcs_t * tab,
718
int src_advance, int dst_advance)
720
if((src_advance == 1) && (dst_advance == 1))
722
tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bicubic_sse2;
723
tab->funcs_y.bits_uint8_noadvance = 14;
725
else if((src_advance == 3) && (dst_advance == 3))
727
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bicubic_sse2;
728
tab->funcs_y.bits_uint8_noadvance = 14;
730
else if((src_advance == 4) && (dst_advance == 4))
732
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bicubic_sse2;
733
tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bicubic_sse2;
734
tab->funcs_y.bits_uint8_noadvance = 14;
736
else if((src_advance == 2) && (dst_advance == 2))
738
tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bicubic_sse2;
739
tab->funcs_y.bits_uint8_noadvance = 14;
742
tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_bicubic_noclip_sse2;
743
tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_bicubic_noclip_sse2;
744
tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_bicubic_noclip_sse2;
745
tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_bicubic_noclip_sse2;
746
tab->funcs_y.bits_uint16 = 14;
750
void gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,
751
int src_advance, int dst_advance)
753
void gavl_init_scale_funcs_generic_y_sse2(gavl_scale_funcs_t * tab,
754
int src_advance, int dst_advance)
757
if((src_advance == 1) && (dst_advance == 1))
759
tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_generic_sse2;
760
tab->funcs_y.bits_uint8_noadvance = 14;
762
else if((src_advance == 3) && (dst_advance == 3))
764
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_generic_sse2;
765
tab->funcs_y.bits_uint8_noadvance = 14;
767
else if((src_advance == 2) && (dst_advance == 2))
769
tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_generic_sse2;
770
tab->funcs_y.bits_uint8_noadvance = 14;
772
else if((src_advance == 4) && (dst_advance == 4))
774
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_generic_sse2;
775
tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_generic_sse2;
776
tab->funcs_y.bits_uint8_noadvance = 14;
778
tab->funcs_y.scale_uint16_x_1 = scale_uint16_x_1_y_generic_sse2;
779
tab->funcs_y.scale_uint16_x_2 = scale_uint16_x_2_y_generic_sse2;
780
tab->funcs_y.scale_uint16_x_3 = scale_uint16_x_3_y_generic_sse2;
781
tab->funcs_y.scale_uint16_x_4 = scale_uint16_x_4_y_generic_sse2;
782
tab->funcs_y.bits_uint16 = 14;
788
/* scale_uint8_x_1_y_bilinear_sse2 */
790
#define FUNC_NAME scale_uint8_x_1_y_bilinear_sse2
795
#include "scale_y_linear_8.h"
797
/* scale_uint8_x_2_y_bilinear_sse2 */
799
#define FUNC_NAME scale_uint8_x_2_y_bilinear_sse2
804
#include "scale_y_linear_8.h"
806
/* scale_uint8_x_4_y_bilinear_sse2 */
808
#define FUNC_NAME scale_uint8_x_4_y_bilinear_sse2
813
#include "scale_y_linear_8.h"
815
/* scale_uint8_x_3_y_bilinear_sse2 */
817
#define FUNC_NAME scale_uint8_x_3_y_bilinear_sse2
822
#include "scale_y_linear_8.h"
826
void gavl_init_scale_funcs_bilinear_y_sse2(gavl_scale_funcs_t * tab,
827
int src_advance, int dst_advance)
830
if((src_advance == 1) && (dst_advance == 1))
832
tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bilinear_sse2;
833
tab->funcs_y.bits_uint8_noadvance = 14;
835
else if((src_advance == 3) && (dst_advance == 3))
837
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bilinear_sse2;
838
tab->funcs_y.bits_uint8_noadvance = 14;
840
else if((src_advance == 2) && (dst_advance == 2))
842
tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bilinear_sse2;
843
tab->funcs_y.bits_uint8_noadvance = 14;
845
else if((src_advance == 4) && (dst_advance == 4))
847
tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bilinear_sse2;
848
tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bilinear_sse2;
849
tab->funcs_y.bits_uint8_noadvance = 14;