1
/*****************************************************************
2
* gavl - a general purpose audio/video processing library
4
* Copyright (c) 2001 - 2008 Members of the Gmerlin project
5
* gmerlin-general@lists.sourceforge.net
6
* http://gmerlin.sourceforge.net
8
* This program is free software: you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation, either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program. If not, see <http://www.gnu.org/licenses/>.
20
* *****************************************************************/
23
#include <attributes.h>
26
#include <gavl/gavl.h>
30
// #include "../mmx/mmx.h"
31
#include "../sse/sse.h"
35
#define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\
36
fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);
39
#define INIT_FLOAT_GLOBAL_4 \
40
movups_m2r(ctx->min_values_f[0], xmm5);\
41
movups_m2r(ctx->max_values_f[0], xmm6);
43
#define INIT_FLOAT_GLOBAL_2 \
44
movups_m2r(ctx->min_values_f[0], xmm5);\
45
movups_m2r(ctx->max_values_f[0], xmm6);\
46
shufps_r2ri(xmm5, xmm5, 0x44);\
47
shufps_r2ri(xmm6, xmm6, 0x44);
49
#define INIT_FLOAT_GLOBAL_1 \
50
movups_m2r(ctx->min_values_f[0], xmm5);\
51
movups_m2r(ctx->max_values_f[0], xmm6);\
52
shufps_r2ri(xmm5, xmm5, 0x00);\
53
shufps_r2ri(xmm6, xmm6, 0x00);
56
xorps_r2r(xmm3, xmm3); \
57
xorps_r2r(xmm4, xmm4);
59
#define ACCUM_FLOAT(num) \
61
movaps_m2r(*src,xmm1);\
62
movaps_m2r(*(src+16),xmm2);\
64
movss_m2r(ctx->table_v.pixels[ctx->scanline].factor_f[num], xmm0);\
65
shufps_r2ri(xmm0, xmm0, 0x00);\
66
/* Accumulate xmm0 */ \
67
mulps_r2r(xmm0, xmm1);\
68
addps_r2r(xmm1, xmm3);\
69
/* Accumulate xmm1 */ \
70
mulps_r2r(xmm0, xmm2);\
74
#define OUTPUT_FLOAT_NOCLIP \
75
movups_r2m(xmm3, *dst);\
76
movups_r2m(xmm4, *(dst+16));\
79
#define OUTPUT_FLOAT \
80
minps_r2r(xmm6, xmm3);\
81
minps_r2r(xmm6, xmm4);\
82
maxps_r2r(xmm5, xmm3);\
83
maxps_r2r(xmm5, xmm4);\
84
movups_r2m(xmm3, *dst);\
85
movups_r2m(xmm4, *(dst+16));
87
#define INIT_C_FLOAT \
88
xorps_r2r(xmm2, xmm2);
90
#define ACCUM_C_FLOAT(num) \
91
movss_m2r(*src, xmm1);\
92
mulss_m2r(ctx->table_v.pixels[ctx->scanline].factor_f[num], xmm1);\
93
addss_r2r(xmm1, xmm2);\
96
#define OUTPUT_C_FLOAT \
97
minss_r2r(xmm6, xmm2);\
98
maxss_r2r(xmm5, xmm2);\
99
movss_r2m(xmm2, *dst);
101
#define OUTPUT_C_FLOAT_NOCLIP \
102
movss_r2m(xmm2, *dst);
107
/* scale_float_x_1_y_bicubic_sse */
109
#define FUNC_NAME scale_float_x_1_y_bicubic_sse
113
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_1
114
#define INIT INIT_FLOAT
115
#define ACCUM ACCUM_FLOAT
116
#define OUTPUT OUTPUT_FLOAT
117
#define INIT_C INIT_C_FLOAT
118
#define ACCUM_C ACCUM_C_FLOAT
119
#define OUTPUT_C OUTPUT_C_FLOAT
121
#include "../sse/scale_y.h"
123
/* scale_float_x_1_y_bicubic_noclip_sse */
125
#define FUNC_NAME scale_float_x_1_y_bicubic_noclip_sse
129
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_1
130
#define INIT INIT_FLOAT
131
#define ACCUM ACCUM_FLOAT
132
#define OUTPUT OUTPUT_FLOAT_NOCLIP
133
#define INIT_C INIT_C_FLOAT
134
#define ACCUM_C ACCUM_C_FLOAT
135
#define OUTPUT_C OUTPUT_C_FLOAT_NOCLIP
137
#include "../sse/scale_y.h"
139
/* scale_float_x_2_y_bicubic_sse */
141
#define FUNC_NAME scale_float_x_2_y_bicubic_sse
145
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_2
146
#define INIT INIT_FLOAT
147
#define ACCUM ACCUM_FLOAT
148
#define OUTPUT OUTPUT_FLOAT
149
#define INIT_C INIT_C_FLOAT
150
#define ACCUM_C ACCUM_C_FLOAT
151
#define OUTPUT_C OUTPUT_C_FLOAT
153
#include "../sse/scale_y.h"
155
/* scale_float_x_2_y_bicubic_noclip_sse */
157
#define FUNC_NAME scale_float_x_2_y_bicubic_noclip_sse
161
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_2
162
#define INIT INIT_FLOAT
163
#define ACCUM ACCUM_FLOAT
164
#define OUTPUT OUTPUT_FLOAT_NOCLIP
165
#define INIT_C INIT_C_FLOAT
166
#define ACCUM_C ACCUM_C_FLOAT
167
#define OUTPUT_C OUTPUT_C_FLOAT_NOCLIP
169
#include "../sse/scale_y.h"
172
/* scale_float_x_4_y_bicubic_sse */
174
#define FUNC_NAME scale_float_x_4_y_bicubic_sse
178
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_4
179
#define INIT INIT_FLOAT
180
#define ACCUM ACCUM_FLOAT
181
#define OUTPUT OUTPUT_FLOAT
182
#define INIT_C INIT_C_FLOAT
183
#define ACCUM_C ACCUM_C_FLOAT
184
#define OUTPUT_C OUTPUT_C_FLOAT
186
#include "../sse/scale_y.h"
188
/* scale_float_x_4_y_bicubic_noclip_sse */
190
#define FUNC_NAME scale_float_x_4_y_bicubic_noclip_sse
194
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_4
195
#define INIT INIT_FLOAT
196
#define ACCUM ACCUM_FLOAT
197
#define OUTPUT OUTPUT_FLOAT_NOCLIP
198
#define INIT_C INIT_C_FLOAT
199
#define ACCUM_C ACCUM_C_FLOAT
200
#define OUTPUT_C OUTPUT_C_FLOAT
202
#include "../sse/scale_y.h"
206
/* scale_float_x_1_y_quadratic_sse */
208
#define FUNC_NAME scale_float_x_1_y_quadratic_sse
212
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_1
213
#define INIT INIT_FLOAT
214
#define ACCUM ACCUM_FLOAT
215
#define OUTPUT OUTPUT_FLOAT_NOCLIP
216
#define INIT_C INIT_C_FLOAT
217
#define ACCUM_C ACCUM_C_FLOAT
218
#define OUTPUT_C OUTPUT_C_FLOAT_NOCLIP
220
#include "../sse/scale_y.h"
222
/* scale_float_x_2_y_quadratic_sse */
224
#define FUNC_NAME scale_float_x_2_y_quadratic_sse
228
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_2
229
#define INIT INIT_FLOAT
230
#define ACCUM ACCUM_FLOAT
231
#define OUTPUT OUTPUT_FLOAT_NOCLIP
232
#define INIT_C INIT_C_FLOAT
233
#define ACCUM_C ACCUM_C_FLOAT
234
#define OUTPUT_C OUTPUT_C_FLOAT_NOCLIP
236
#include "../sse/scale_y.h"
239
/* scale_float_x_4_y_quadratic_sse */
241
#define FUNC_NAME scale_float_x_4_y_quadratic_sse
245
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_4
246
#define INIT INIT_FLOAT
247
#define ACCUM ACCUM_FLOAT
248
#define OUTPUT OUTPUT_FLOAT_NOCLIP
249
#define INIT_C INIT_C_FLOAT
250
#define ACCUM_C ACCUM_C_FLOAT
251
#define OUTPUT_C OUTPUT_C_FLOAT_NOCLIP
253
#include "../sse/scale_y.h"
256
/* scale_float_x_1_y_generic_sse */
258
#define FUNC_NAME scale_float_x_1_y_generic_sse
262
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_1
263
#define INIT INIT_FLOAT
264
#define ACCUM ACCUM_FLOAT
265
#define OUTPUT OUTPUT_FLOAT
266
#define INIT_C INIT_C_FLOAT
267
#define ACCUM_C ACCUM_C_FLOAT
268
#define OUTPUT_C OUTPUT_C_FLOAT
270
#include "../sse/scale_y.h"
272
/* scale_float_x_2_y_generic_sse */
274
#define FUNC_NAME scale_float_x_2_y_generic_sse
278
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_2
279
#define INIT INIT_FLOAT
280
#define ACCUM ACCUM_FLOAT
281
#define OUTPUT OUTPUT_FLOAT
282
#define INIT_C INIT_C_FLOAT
283
#define ACCUM_C ACCUM_C_FLOAT
284
#define OUTPUT_C OUTPUT_C_FLOAT
286
#include "../sse/scale_y.h"
289
/* scale_float_x_4_y_generic_sse */
291
#define FUNC_NAME scale_float_x_4_y_generic_sse
295
#define INIT_GLOBAL INIT_FLOAT_GLOBAL_4
296
#define INIT INIT_FLOAT
297
#define ACCUM ACCUM_FLOAT
298
#define OUTPUT OUTPUT_FLOAT
299
#define INIT_C INIT_C_FLOAT
300
#define ACCUM_C ACCUM_C_FLOAT
301
#define OUTPUT_C OUTPUT_C_FLOAT
303
#include "../sse/scale_y.h"
305
void gavl_init_scale_funcs_quadratic_y_sse(gavl_scale_funcs_t * tab,
306
int src_advance, int dst_advance)
308
tab->funcs_y.scale_float_x_1 = scale_float_x_1_y_quadratic_sse;
309
tab->funcs_y.scale_float_x_2 = scale_float_x_2_y_quadratic_sse;
310
tab->funcs_y.scale_float_x_4 = scale_float_x_4_y_quadratic_sse;
313
void gavl_init_scale_funcs_bicubic_y_sse(gavl_scale_funcs_t * tab,
314
int src_advance, int dst_advance)
316
tab->funcs_y.scale_float_x_1 = scale_float_x_1_y_bicubic_sse;
317
tab->funcs_y.scale_float_x_2 = scale_float_x_2_y_bicubic_sse;
318
tab->funcs_y.scale_float_x_4 = scale_float_x_4_y_bicubic_sse;
321
void gavl_init_scale_funcs_bicubic_y_noclip_sse(gavl_scale_funcs_t * tab,
322
int src_advance, int dst_advance)
324
tab->funcs_y.scale_float_x_1 = scale_float_x_1_y_bicubic_noclip_sse;
325
tab->funcs_y.scale_float_x_2 = scale_float_x_2_y_bicubic_noclip_sse;
326
tab->funcs_y.scale_float_x_4 = scale_float_x_4_y_bicubic_noclip_sse;
329
void gavl_init_scale_funcs_generic_y_sse(gavl_scale_funcs_t * tab,
330
int src_advance, int dst_advance)
332
tab->funcs_y.scale_float_x_1 = scale_float_x_1_y_generic_sse;
333
tab->funcs_y.scale_float_x_2 = scale_float_x_2_y_generic_sse;
334
tab->funcs_y.scale_float_x_4 = scale_float_x_4_y_generic_sse;
337
/* scale_uint8_x_1_y_bilinear_sse */
339
#define FUNC_NAME scale_float_x_1_y_bilinear_sse
342
#include "scale_y_linear.h"
344
/* scale_float_x_2_y_bilinear_sse */
346
#define FUNC_NAME scale_float_x_2_y_bilinear_sse
349
#include "scale_y_linear.h"
351
/* scale_float_x_4_y_bilinear_sse */
353
#define FUNC_NAME scale_float_x_4_y_bilinear_sse
356
#include "scale_y_linear.h"
358
/* scale_float_x_3_y_bilinear_sse */
360
#define FUNC_NAME scale_float_x_3_y_bilinear_sse
363
#include "scale_y_linear.h"
365
void gavl_init_scale_funcs_bilinear_y_sse(gavl_scale_funcs_t * tab,
366
int src_advance, int dst_advance)
369
tab->funcs_y.scale_float_x_1 = scale_float_x_1_y_bilinear_sse;
370
tab->funcs_y.scale_float_x_2 = scale_float_x_2_y_bilinear_sse;
371
tab->funcs_y.scale_float_x_3 = scale_float_x_3_y_bilinear_sse;
372
tab->funcs_y.scale_float_x_4 = scale_float_x_4_y_bilinear_sse;