25
25
* Started from sample code by Juan J. Sierralta P.
28
29
#include <stdlib.h>
30
31
#include <string.h>
32
33
#include <unistd.h>
35
37
#include "libavutil/cpu.h"
36
38
#include "libavutil/common.h"
37
39
#include "libavutil/lfg.h"
40
#include "libavutil/time.h"
39
42
#include "simple_idct.h"
40
43
#include "aandcttab.h"
48
void ff_mmx_idct(DCTELEM *data);
49
void ff_mmxext_idct(DCTELEM *data);
51
void odivx_idct_c(short *block);
54
52
void ff_bfin_idct(DCTELEM *block);
55
53
void ff_bfin_fdct(DCTELEM *block);
58
void fdct_altivec(DCTELEM *block);
59
//void idct_altivec(DCTELEM *block);?? no routine
56
void ff_fdct_altivec(DCTELEM *block);
62
59
void ff_j_rev_dct_arm(DCTELEM *data);
79
#ifndef FAAN_POSTSCALE
80
#define FAAN_SCALE SCALE_PERM
82
#define FAAN_SCALE NO_PERM
85
76
static int cpu_flags;
87
78
static const struct algo fdct_tab[] = {
88
79
{ "REF-DBL", ff_ref_fdct, NO_PERM },
89
{ "FAAN", ff_faandct, FAAN_SCALE },
90
{ "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
80
{ "FAAN", ff_faandct, NO_PERM },
81
{ "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
91
82
{ "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
94
85
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
95
{ "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
86
{ "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT },
96
87
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
100
{ "altivecfdct", fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
91
{ "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
110
101
static const struct algo idct_tab[] = {
111
102
{ "FAANI", ff_faanidct, NO_PERM },
112
103
{ "REF-DBL", ff_ref_idct, NO_PERM },
113
{ "INT", j_rev_dct, MMX_PERM },
104
{ "INT", ff_j_rev_dct, MMX_PERM },
114
105
{ "SIMPLE-C", ff_simple_idct_8, NO_PERM },
118
{ "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
119
{ "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
121
108
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
122
109
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
123
{ "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
110
{ "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
124
111
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
133
120
{ "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
136
{ "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM },
123
{ "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
139
{ "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM },
126
{ "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
142
{ "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM },
129
{ "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
189
169
DECLARE_ALIGNED(16, static DCTELEM, block)[64];
190
170
DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
192
static inline void mmx_emms(void)
195
if (cpu_flags & AV_CPU_FLAG_MMX)
196
__asm__ volatile ("emms\n\t");
200
172
static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
328
300
init_block(block, test, is_idct, &prng);
329
301
permute(block1, block, dct->format);
334
306
for (it = 0; it < NB_ITS_SPEED; it++) {
478
450
idct248_put(img_dest, 8, block);
480
452
it1 += NB_ITS_SPEED;
481
ti1 = gettime() - ti;
453
ti1 = av_gettime() - ti;
482
454
} while (ti1 < 1000000);
485
457
printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
486
458
(double) it1 * 1000.0 / (double) ti1);