39
39
typedef short DCTELEM;
41
void fdct_ifast (DCTELEM *data);
42
void fdct_ifast248 (DCTELEM *data);
41
void ff_fdct_ifast (DCTELEM *data);
42
void ff_fdct_ifast248 (DCTELEM *data);
43
43
void ff_jpeg_fdct_islow_8(DCTELEM *data);
44
44
void ff_jpeg_fdct_islow_10(DCTELEM *data);
45
45
void ff_fdct248_islow_8(DCTELEM *data);
46
46
void ff_fdct248_islow_10(DCTELEM *data);
48
void j_rev_dct (DCTELEM *data);
49
void j_rev_dct4 (DCTELEM *data);
50
void j_rev_dct2 (DCTELEM *data);
51
void j_rev_dct1 (DCTELEM *data);
48
void ff_j_rev_dct (DCTELEM *data);
52
49
void ff_wmv2_idct_c(DCTELEM *data);
54
51
void ff_fdct_mmx(DCTELEM *block);
55
void ff_fdct_mmx2(DCTELEM *block);
52
void ff_fdct_mmxext(DCTELEM *block);
56
53
void ff_fdct_sse2(DCTELEM *block);
58
55
#define H264_IDCT(depth) \
104
101
#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
105
102
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
107
/* VP3 DSP functions */
108
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
109
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
110
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
111
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
113
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
114
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
117
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
119
104
/* RV40 functions */
120
105
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
121
106
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
197
182
const uint8_t *scantable;
198
183
uint8_t permutated[64];
199
184
uint8_t raster_end[64];
201
/** Used by dct_quantize_altivec to find last-non-zero */
202
DECLARE_ALIGNED(16, uint8_t, inverse)[64];
206
187
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
207
188
void ff_init_scantable_permutation(uint8_t *idct_permutation,
208
189
int idct_permutation_type);
210
#define EMULATED_EDGE(depth) \
211
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
212
int block_w, int block_h,\
213
int src_x, int src_y, int w, int h);
219
void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
220
void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
221
void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
239
207
void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
240
208
int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
242
* Motion estimation with emulated edge values.
243
* @param buf pointer to destination buffer (unaligned)
244
* @param src pointer to pixel source (unaligned)
245
* @param linesize width (in pixels) for src/buf
246
* @param block_w number of pixels (per row) to copy to buf
247
* @param block_h nummber of pixel rows to copy to buf
248
* @param src_x offset of src to start of row - this may be negative
249
* @param src_y offset of src to top of image - this may be negative
250
* @param w width of src in pixels
251
* @param h height of src in pixels
253
void (*emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize,
254
int block_w, int block_h,
255
int src_x, int src_y, int w, int h);
257
210
* translational global motion compensation.
259
212
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
388
340
void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
389
341
int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
390
342
void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
391
/* this might write to dst[w] */
392
void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
393
343
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
394
344
void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
399
349
void (*h261_loop_filter)(uint8_t *src, int stride);
401
void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
402
void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
404
void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
405
void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
406
void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
408
351
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
409
352
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
410
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
411
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
412
void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len);
353
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
413
354
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
414
355
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
415
356
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
418
359
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
419
360
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
421
* Multiply a vector of floats by a scalar float. Source and
422
* destination vectors must overlap exactly or not at all.
423
* @param dst result vector, 16-byte aligned
424
* @param src input vector, 16-byte aligned
425
* @param mul scalar value
426
* @param len length of vector, multiple of 4
428
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
431
* Multiply a vector of floats by a scalar float and add to
432
* destination vector. Source and destination vectors must
433
* overlap exactly or not at all.
434
* @param dst result vector, 16-byte aligned
435
* @param src input vector, 16-byte aligned
436
* @param mul scalar value
437
* @param len length of vector, multiple of 4
439
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
442
362
* Calculate the scalar product of two vectors of floats.
443
363
* @param v1 first vector, 16-byte aligned
444
364
* @param v2 second vector, 16-byte aligned
521
441
#define EDGE_TOP 1
522
442
#define EDGE_BOTTOM 2
524
void (*prefetch)(void *mem, int stride, int h);
526
444
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
528
/* mlp/truehd functions */
529
void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
530
int firorder, int iirorder,
531
unsigned int filter_shift, int32_t mask, int blocksize,
532
int32_t *sample_buffer);
534
/* intrax8 functions */
535
void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
536
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
537
int * range, int * sum, int edges);
540
447
* Calculate scalar product of two vectors.
541
448
* @param len length of vectors, should be multiple of 16
542
* @param shift number of bits to discard from product
544
int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
450
int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
545
451
/* ape functions */
547
453
* Calculate scalar product of v1 and v2,
583
489
op_fill_func fill_block_tab[2];
586
void dsputil_static_init(void);
587
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
492
void ff_dsputil_static_init(void);
493
void ff_dsputil_init(DSPContext* p, AVCodecContext *avctx);
589
495
int ff_check_alignment(void);
498
* Return the scalar product of two vectors.
500
* @param v1 first input vector
501
* @param v2 first input vector
502
* @param len number of elements
504
* @return sum of elementwise products
506
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
592
509
* permute block according to permuatation.
593
510
* @param last last non zero element in scantable order
646
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
647
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
648
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
649
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
650
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
651
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
652
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
653
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
654
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
563
void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
564
void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
565
void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
566
void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
567
void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
568
void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
569
void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
656
571
void ff_dsputil_init_dwt(DSPContext *c);
657
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
658
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
659
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
573
#if (ARCH_ARM && HAVE_NEON) || ARCH_PPC || HAVE_MMX
664
574
# define STRIDE_ALIGN 16
669
#define STRIDE_ALIGN 16
673
#define STRIDE_ALIGN 16
678
576
# define STRIDE_ALIGN 8
579
// Some broken preprocessors need a second expansion
580
// to be forced to tokenize __VA_ARGS__
681
583
#define LOCAL_ALIGNED_A(a, t, v, s, o, ...) \
682
584
uint8_t la_##v[sizeof(t s o) + (a)]; \
683
585
t (*v) o = (void *)FFALIGN((uintptr_t)la_##v, a)
685
#define LOCAL_ALIGNED_D(a, t, v, s, o, ...) DECLARE_ALIGNED(a, t, v) s o
587
#define LOCAL_ALIGNED_D(a, t, v, s, o, ...) \
588
DECLARE_ALIGNED(a, t, la_##v) s o; \
687
#define LOCAL_ALIGNED(a, t, v, ...) LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,)
591
#define LOCAL_ALIGNED(a, t, v, ...) E(LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,))
689
593
#if HAVE_LOCAL_ALIGNED_8
690
# define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,)
594
# define LOCAL_ALIGNED_8(t, v, ...) E(LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,))
692
596
# define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED(8, t, v, __VA_ARGS__)
695
599
#if HAVE_LOCAL_ALIGNED_16
696
# define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,)
600
# define LOCAL_ALIGNED_16(t, v, ...) E(LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,))
698
602
# define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
701
#define WRAPPER8_16(name8, name16)\
702
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
703
return name8(s, dst , src , stride, h)\
704
+name8(s, dst+8 , src+8 , stride, h);\
707
605
#define WRAPPER8_16_SQ(name8, name16)\
708
606
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\