45
static int quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
48
for( i = 0; i < 64; i++ )
49
QUANT_ONE( dct[i], mf[i], bias[i] );
53
static int quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
56
for( i = 0; i < 16; i++ )
57
QUANT_ONE( dct[i], mf[i], bias[i] );
61
static int quant_4x4_dc( int16_t dct[16], int mf, int bias )
64
for( i = 0; i < 16; i++ )
45
static int quant_8x8( dctcoef dct[64], uint16_t mf[64], uint16_t bias[64] )
48
for( int i = 0; i < 64; i++ )
49
QUANT_ONE( dct[i], mf[i], bias[i] );
53
static int quant_4x4( dctcoef dct[16], uint16_t mf[16], uint16_t bias[16] )
56
for( int i = 0; i < 16; i++ )
57
QUANT_ONE( dct[i], mf[i], bias[i] );
61
static int quant_4x4_dc( dctcoef dct[16], int mf, int bias )
64
for( int i = 0; i < 16; i++ )
65
65
QUANT_ONE( dct[i], mf, bias );
69
static int quant_2x2_dc( int16_t dct[4], int mf, int bias )
69
static int quant_2x2_dc( dctcoef dct[4], int mf, int bias )
72
72
QUANT_ONE( dct[0], mf, bias );
82
82
#define DEQUANT_SHR( x ) \
83
83
dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits)
85
static void dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
85
static void dequant_4x4( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
87
87
const int i_mf = i_qp%6;
88
88
const int i_qbits = i_qp/6 - 4;
93
for( i = 0; i < 16; i++ )
92
for( int i = 0; i < 16; i++ )
98
97
const int f = 1 << (-i_qbits-1);
99
for( i = 0; i < 16; i++ )
98
for( int i = 0; i < 16; i++ )
104
static void dequant_8x8( int16_t dct[64], int dequant_mf[6][64], int i_qp )
103
static void dequant_8x8( dctcoef dct[64], int dequant_mf[6][64], int i_qp )
106
105
const int i_mf = i_qp%6;
107
106
const int i_qbits = i_qp/6 - 6;
110
108
if( i_qbits >= 0 )
112
for( i = 0; i < 64; i++ )
110
for( int i = 0; i < 64; i++ )
113
111
DEQUANT_SHL( i );
117
115
const int f = 1 << (-i_qbits-1);
118
for( i = 0; i < 64; i++ )
116
for( int i = 0; i < 64; i++ )
119
117
DEQUANT_SHR( i );
123
static void dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
121
static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
125
123
const int i_qbits = i_qp/6 - 6;
128
125
if( i_qbits >= 0 )
130
127
const int i_dmf = dequant_mf[i_qp%6][0] << i_qbits;
131
for( i = 0; i < 16; i++ )
128
for( int i = 0; i < 16; i++ )
136
133
const int i_dmf = dequant_mf[i_qp%6][0];
137
134
const int f = 1 << (-i_qbits-1);
138
for( i = 0; i < 16; i++ )
135
for( int i = 0; i < 16; i++ )
139
136
dct[i] = ( dct[i] * i_dmf + f ) >> (-i_qbits);
143
static void x264_denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
140
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int size )
146
for( i=1; i<size; i++ )
142
for( int i = 1; i < size; i++ )
148
144
int level = dct[i];
149
145
int sign = level>>15;
163
159
* chroma: for the complete mb: if score < 7 -> null
166
const uint8_t x264_decimate_table4[16] = {
167
3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
168
const uint8_t x264_decimate_table8[64] = {
162
const uint8_t x264_decimate_table4[16] =
164
3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0
166
const uint8_t x264_decimate_table8[64] =
169
168
3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
170
169
1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
171
170
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
172
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
171
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
174
static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max )
174
static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
176
176
const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
178
178
int idx = i_max - 1;
180
180
/* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
181
while( idx >= 0 && M32( &dct[idx-1] ) == 0 )
181
while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
183
183
if( idx >= 0 && dct[idx] == 0 )
204
static int x264_decimate_score15( int16_t *dct )
204
static int x264_decimate_score15( dctcoef *dct )
206
206
return x264_decimate_score_internal( dct+1, 15 );
208
static int x264_decimate_score16( int16_t *dct )
208
static int x264_decimate_score16( dctcoef *dct )
210
210
return x264_decimate_score_internal( dct, 16 );
212
static int x264_decimate_score64( int16_t *dct )
212
static int x264_decimate_score64( dctcoef *dct )
214
214
return x264_decimate_score_internal( dct, 64 );
217
static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
217
static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
220
220
for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
228
static int x264_coeff_last4( int16_t *l )
228
static int x264_coeff_last4( dctcoef *l )
230
230
return x264_coeff_last_internal( l, 4 );
232
static int x264_coeff_last15( int16_t *l )
232
static int x264_coeff_last15( dctcoef *l )
234
234
return x264_coeff_last_internal( l, 15 );
236
static int x264_coeff_last16( int16_t *l )
236
static int x264_coeff_last16( dctcoef *l )
238
238
return x264_coeff_last_internal( l, 16 );
240
static int x264_coeff_last64( int16_t *l )
240
static int x264_coeff_last64( dctcoef *l )
242
242
return x264_coeff_last_internal( l, 64 );
245
245
#define level_run(num)\
246
static int x264_coeff_level_run##num( int16_t *dct, x264_run_level_t *runlevel )\
246
static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\
248
248
int i_last = runlevel->last = x264_coeff_last##num(dct);\
249
249
int i_total = 0;\
308
308
if( cpu&X264_CPU_MMXEXT )
310
310
pf->quant_2x2_dc = x264_quant_2x2_dc_mmxext;
312
312
pf->quant_4x4_dc = x264_quant_4x4_dc_mmxext;
313
313
pf->decimate_score15 = x264_decimate_score15_mmxext;
314
314
pf->decimate_score16 = x264_decimate_score16_mmxext;
315
if( cpu&X264_CPU_SLOW_CTZ )
317
pf->decimate_score15 = x264_decimate_score15_mmxext_slowctz;
318
pf->decimate_score16 = x264_decimate_score16_mmxext_slowctz;
315
320
pf->decimate_score64 = x264_decimate_score64_mmxext;
316
321
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmxext;
317
322
pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmxext;