4
* Copyright (C) 1994-1996, Thomas G. Lane.
5
* This file is part of the Independent JPEG Group's software.
6
* For conditions of distribution and use, see the accompanying README file.
8
* This file contains the forward-DCT management logic.
9
* This code selects a particular DCT implementation to be used,
10
* and it performs related housekeeping chores including coefficient
14
#define JPEG_INTERNALS
17
#include "jdct.h" /* Private declarations for DCT subsystem */
20
/* Private subobject for this module */
23
struct jpeg_forward_dct pub; /* public fields */
25
/* Pointer to the DCT routine actually in use */
26
forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
28
/* The actual post-DCT divisors --- not identical to the quant table
29
* entries, because of scaling (especially for an unnormalized DCT).
30
* Each table is given in normal array order.
32
DCTELEM * divisors[NUM_QUANT_TBLS];
34
#ifdef DCT_FLOAT_SUPPORTED
35
/* Same as above for the floating-point case. */
36
float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
37
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
41
typedef my_fdct_controller * my_fdct_ptr;
44
/* The current scaled-DCT routines require ISLOW-style divisor tables,
45
* so be sure to compile that code if either ISLOW or SCALING is requested.
47
#ifdef DCT_ISLOW_SUPPORTED
48
#define PROVIDE_ISLOW_TABLES
50
#ifdef DCT_SCALING_SUPPORTED
51
#define PROVIDE_ISLOW_TABLES
57
* Perform forward DCT on one or more blocks of a component.
59
* The input samples are taken from the sample_data[] array starting at
60
* position start_row/start_col, and moving to the right for any additional
61
* blocks. The quantized coefficients are returned in coef_blocks[].
65
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
66
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
67
JDIMENSION start_row, JDIMENSION start_col,
68
JDIMENSION num_blocks)
69
/* This version is used for integer DCT implementations. */
71
/* This routine is heavily used, so it's worth coding it tightly. */
72
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
73
forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
74
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
75
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
78
sample_data += start_row; /* fold in the vertical offset once */
80
for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
82
(*do_dct) (workspace, sample_data, start_col);
84
/* Quantize/descale the coefficients, and store into coef_blocks[] */
85
{ register DCTELEM temp, qval;
87
register JCOEFPTR output_ptr = coef_blocks[bi];
89
for (i = 0; i < DCTSIZE2; i++) {
92
/* Divide the coefficient value by qval, ensuring proper rounding.
93
* Since C does not specify the direction of rounding for negative
94
* quotients, we have to force the dividend positive for portability.
96
* In most files, at least half of the output values will be zero
97
* (at default quantization settings, more like three-quarters...)
98
* so we should ensure that this case is fast. On many machines,
99
* a comparison is enough cheaper than a divide to make a special test
100
* a win. Since both inputs will be nonnegative, we need only test
101
* for a < b to discover whether a/b is 0.
102
* If your machine's division is fast enough, define FAST_DIVIDE.
105
#define DIVIDE_BY(a,b) a /= b
107
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
111
temp += qval>>1; /* for rounding */
112
DIVIDE_BY(temp, qval);
115
temp += qval>>1; /* for rounding */
116
DIVIDE_BY(temp, qval);
118
output_ptr[i] = (JCOEF) temp;
125
#ifdef DCT_FLOAT_SUPPORTED
128
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
129
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
130
JDIMENSION start_row, JDIMENSION start_col,
131
JDIMENSION num_blocks)
132
/* This version is used for floating-point DCT implementations. */
134
/* This routine is heavily used, so it's worth coding it tightly. */
135
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
136
float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
137
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
138
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
141
sample_data += start_row; /* fold in the vertical offset once */
143
for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
144
/* Perform the DCT */
145
(*do_dct) (workspace, sample_data, start_col);
147
/* Quantize/descale the coefficients, and store into coef_blocks[] */
148
{ register FAST_FLOAT temp;
150
register JCOEFPTR output_ptr = coef_blocks[bi];
152
for (i = 0; i < DCTSIZE2; i++) {
153
/* Apply the quantization and scaling factor */
154
temp = workspace[i] * divisors[i];
155
/* Round to nearest integer.
156
* Since C does not specify the direction of rounding for negative
157
* quotients, we have to force the dividend positive for portability.
158
* The maximum coefficient size is +-16K (for 12-bit data), so this
159
* code should work for either 16-bit or 32-bit ints.
161
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
167
#endif /* DCT_FLOAT_SUPPORTED */
171
* Initialize for a processing pass.
172
* Verify that all referenced Q-tables are present, and set up
173
* the divisor table for each one.
174
* In the current implementation, DCT of all components is done during
175
* the first pass, even if only some components will be output in the
176
* first scan. Hence all components should be examined here.
180
start_pass_fdctmgr (j_compress_ptr cinfo)
182
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
184
jpeg_component_info *compptr;
189
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
191
/* Select the proper DCT routine for this component's scaling */
192
switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
193
#ifdef DCT_SCALING_SUPPORTED
195
fdct->do_dct[ci] = jpeg_fdct_1x1;
196
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
199
fdct->do_dct[ci] = jpeg_fdct_2x2;
200
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
203
fdct->do_dct[ci] = jpeg_fdct_3x3;
204
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
207
fdct->do_dct[ci] = jpeg_fdct_4x4;
208
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
211
fdct->do_dct[ci] = jpeg_fdct_5x5;
212
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
215
fdct->do_dct[ci] = jpeg_fdct_6x6;
216
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
219
fdct->do_dct[ci] = jpeg_fdct_7x7;
220
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
223
fdct->do_dct[ci] = jpeg_fdct_9x9;
224
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
226
case ((10 << 8) + 10):
227
fdct->do_dct[ci] = jpeg_fdct_10x10;
228
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
230
case ((11 << 8) + 11):
231
fdct->do_dct[ci] = jpeg_fdct_11x11;
232
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
234
case ((12 << 8) + 12):
235
fdct->do_dct[ci] = jpeg_fdct_12x12;
236
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
238
case ((13 << 8) + 13):
239
fdct->do_dct[ci] = jpeg_fdct_13x13;
240
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
242
case ((14 << 8) + 14):
243
fdct->do_dct[ci] = jpeg_fdct_14x14;
244
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
246
case ((15 << 8) + 15):
247
fdct->do_dct[ci] = jpeg_fdct_15x15;
248
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
250
case ((16 << 8) + 16):
251
fdct->do_dct[ci] = jpeg_fdct_16x16;
252
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
254
case ((16 << 8) + 8):
255
fdct->do_dct[ci] = jpeg_fdct_16x8;
256
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
258
case ((14 << 8) + 7):
259
fdct->do_dct[ci] = jpeg_fdct_14x7;
260
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
262
case ((12 << 8) + 6):
263
fdct->do_dct[ci] = jpeg_fdct_12x6;
264
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
266
case ((10 << 8) + 5):
267
fdct->do_dct[ci] = jpeg_fdct_10x5;
268
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
271
fdct->do_dct[ci] = jpeg_fdct_8x4;
272
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
275
fdct->do_dct[ci] = jpeg_fdct_6x3;
276
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
279
fdct->do_dct[ci] = jpeg_fdct_4x2;
280
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
283
fdct->do_dct[ci] = jpeg_fdct_2x1;
284
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
286
case ((8 << 8) + 16):
287
fdct->do_dct[ci] = jpeg_fdct_8x16;
288
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
290
case ((7 << 8) + 14):
291
fdct->do_dct[ci] = jpeg_fdct_7x14;
292
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
294
case ((6 << 8) + 12):
295
fdct->do_dct[ci] = jpeg_fdct_6x12;
296
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
298
case ((5 << 8) + 10):
299
fdct->do_dct[ci] = jpeg_fdct_5x10;
300
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
303
fdct->do_dct[ci] = jpeg_fdct_4x8;
304
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
307
fdct->do_dct[ci] = jpeg_fdct_3x6;
308
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
311
fdct->do_dct[ci] = jpeg_fdct_2x4;
312
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
315
fdct->do_dct[ci] = jpeg_fdct_1x2;
316
method = JDCT_ISLOW; /* jfdctint uses islow-style table */
319
case ((DCTSIZE << 8) + DCTSIZE):
320
switch (cinfo->dct_method) {
321
#ifdef DCT_ISLOW_SUPPORTED
323
fdct->do_dct[ci] = jpeg_fdct_islow;
327
#ifdef DCT_IFAST_SUPPORTED
329
fdct->do_dct[ci] = jpeg_fdct_ifast;
333
#ifdef DCT_FLOAT_SUPPORTED
335
fdct->do_float_dct[ci] = jpeg_fdct_float;
340
ERREXIT(cinfo, JERR_NOT_COMPILED);
345
ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
346
compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
349
qtblno = compptr->quant_tbl_no;
350
/* Make sure specified quantization table is present */
351
if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
352
cinfo->quant_tbl_ptrs[qtblno] == NULL)
353
ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
354
qtbl = cinfo->quant_tbl_ptrs[qtblno];
355
/* Compute divisors for this quant table */
356
/* We may do this more than once for same table, but it's not a big deal */
358
#ifdef PROVIDE_ISLOW_TABLES
360
/* For LL&M IDCT method, divisors are equal to raw quantization
361
* coefficients multiplied by 8 (to counteract scaling).
363
if (fdct->divisors[qtblno] == NULL) {
364
fdct->divisors[qtblno] = (DCTELEM *)
365
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
366
DCTSIZE2 * SIZEOF(DCTELEM));
368
dtbl = fdct->divisors[qtblno];
369
for (i = 0; i < DCTSIZE2; i++) {
370
dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
372
fdct->pub.forward_DCT[ci] = forward_DCT;
375
#ifdef DCT_IFAST_SUPPORTED
378
/* For AA&N IDCT method, divisors are equal to quantization
379
* coefficients scaled by scalefactor[row]*scalefactor[col], where
381
* scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
382
* We apply a further scale factor of 8.
384
#define CONST_BITS 14
385
static const INT16 aanscales[DCTSIZE2] = {
386
/* precomputed values scaled up by 14 bits */
387
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
388
22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
389
21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
390
19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
391
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
392
12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
393
8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
394
4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
398
if (fdct->divisors[qtblno] == NULL) {
399
fdct->divisors[qtblno] = (DCTELEM *)
400
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
401
DCTSIZE2 * SIZEOF(DCTELEM));
403
dtbl = fdct->divisors[qtblno];
404
for (i = 0; i < DCTSIZE2; i++) {
406
DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
407
(INT32) aanscales[i]),
411
fdct->pub.forward_DCT[ci] = forward_DCT;
414
#ifdef DCT_FLOAT_SUPPORTED
417
/* For float AA&N IDCT method, divisors are equal to quantization
418
* coefficients scaled by scalefactor[row]*scalefactor[col], where
420
* scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
421
* We apply a further scale factor of 8.
422
* What's actually stored is 1/divisor so that the inner loop can
423
* use a multiplication rather than a division.
427
static const double aanscalefactor[DCTSIZE] = {
428
1.0, 1.387039845, 1.306562965, 1.175875602,
429
1.0, 0.785694958, 0.541196100, 0.275899379
432
if (fdct->float_divisors[qtblno] == NULL) {
433
fdct->float_divisors[qtblno] = (FAST_FLOAT *)
434
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
435
DCTSIZE2 * SIZEOF(FAST_FLOAT));
437
fdtbl = fdct->float_divisors[qtblno];
439
for (row = 0; row < DCTSIZE; row++) {
440
for (col = 0; col < DCTSIZE; col++) {
441
fdtbl[i] = (FAST_FLOAT)
442
(1.0 / (((double) qtbl->quantval[i] *
443
aanscalefactor[row] * aanscalefactor[col] * 8.0)));
448
fdct->pub.forward_DCT[ci] = forward_DCT_float;
452
ERREXIT(cinfo, JERR_NOT_COMPILED);
460
* Initialize FDCT manager.
464
jinit_forward_dct (j_compress_ptr cinfo)
470
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
471
SIZEOF(my_fdct_controller));
472
cinfo->fdct = (struct jpeg_forward_dct *) fdct;
473
fdct->pub.start_pass = start_pass_fdctmgr;
475
/* Mark divisor tables unallocated */
476
for (i = 0; i < NUM_QUANT_TBLS; i++) {
477
fdct->divisors[i] = NULL;
478
#ifdef DCT_FLOAT_SUPPORTED
479
fdct->float_divisors[i] = NULL;