940
940
dataptr++; /* advance pointer to next column */
948
void j_rev_dct4(DCTBLOCK data)
950
int32_t tmp0, tmp1, tmp2, tmp3;
951
int32_t tmp10, tmp11, tmp12, tmp13;
953
int32_t d0, d2, d4, d6;
954
register DCTELEM *dataptr;
957
/* Pass 1: process rows. */
958
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
959
/* furthermore, we scale the results by 2**PASS1_BITS. */
965
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
966
/* Due to quantization, we will usually find that many of the input
967
* coefficients are zero, especially the AC terms. We can exploit this
968
* by short-circuiting the IDCT calculation for any row in which all
969
* the AC terms are zero. In that case each output is equal to the
970
* DC coefficient (with scale factor as needed).
971
* With typical images and quantization tables, half or more of the
972
* row DCT calculations can be simplified this way.
975
register int *idataptr = (int*)dataptr;
982
if ((d2 | d4 | d6) == 0) {
983
/* AC terms all zero */
985
/* Compute a 32 bit value to assign. */
986
DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
987
register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
993
dataptr += DCTSTRIDE; /* advance pointer to next row */
997
/* Even part: reverse the even part of the forward DCT. */
998
/* The rotator is sqrt(2)*c(-6). */
1001
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1002
z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1003
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1004
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1006
tmp0 = (d0 + d4) << CONST_BITS;
1007
tmp1 = (d0 - d4) << CONST_BITS;
1009
tmp10 = tmp0 + tmp3;
1010
tmp13 = tmp0 - tmp3;
1011
tmp11 = tmp1 + tmp2;
1012
tmp12 = tmp1 - tmp2;
1014
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1015
tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1016
tmp3 = MULTIPLY(d6, FIX_0_541196100);
1018
tmp0 = (d0 + d4) << CONST_BITS;
1019
tmp1 = (d0 - d4) << CONST_BITS;
1021
tmp10 = tmp0 + tmp3;
1022
tmp13 = tmp0 - tmp3;
1023
tmp11 = tmp1 + tmp2;
1024
tmp12 = tmp1 - tmp2;
1028
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1029
tmp2 = MULTIPLY(d2, FIX_0_541196100);
1030
tmp3 = MULTIPLY(d2, FIX_1_306562965);
1032
tmp0 = (d0 + d4) << CONST_BITS;
1033
tmp1 = (d0 - d4) << CONST_BITS;
1035
tmp10 = tmp0 + tmp3;
1036
tmp13 = tmp0 - tmp3;
1037
tmp11 = tmp1 + tmp2;
1038
tmp12 = tmp1 - tmp2;
1040
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1041
tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1042
tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1046
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1048
dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1049
dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
1050
dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
1051
dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
1053
dataptr += DCTSTRIDE; /* advance pointer to next row */
1056
/* Pass 2: process columns. */
1057
/* Note that we must descale the results by a factor of 8 == 2**3, */
1058
/* and also undo the PASS1_BITS scaling. */
1061
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
1062
/* Columns of zeroes can be exploited in the same way as we did with rows.
1063
* However, the row calculation has created many nonzero AC terms, so the
1064
* simplification applies less often (typically 5% to 10% of the time).
1065
* On machines with very fast multiplication, it's possible that the
1066
* test takes more time than it's worth. In that case this section
1067
* may be commented out.
1070
d0 = dataptr[DCTSTRIDE*0];
1071
d2 = dataptr[DCTSTRIDE*1];
1072
d4 = dataptr[DCTSTRIDE*2];
1073
d6 = dataptr[DCTSTRIDE*3];
1075
/* Even part: reverse the even part of the forward DCT. */
1076
/* The rotator is sqrt(2)*c(-6). */
1079
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1080
z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1081
tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1082
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1084
tmp0 = (d0 + d4) << CONST_BITS;
1085
tmp1 = (d0 - d4) << CONST_BITS;
1087
tmp10 = tmp0 + tmp3;
1088
tmp13 = tmp0 - tmp3;
1089
tmp11 = tmp1 + tmp2;
1090
tmp12 = tmp1 - tmp2;
1092
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1093
tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1094
tmp3 = MULTIPLY(d6, FIX_0_541196100);
1096
tmp0 = (d0 + d4) << CONST_BITS;
1097
tmp1 = (d0 - d4) << CONST_BITS;
1099
tmp10 = tmp0 + tmp3;
1100
tmp13 = tmp0 - tmp3;
1101
tmp11 = tmp1 + tmp2;
1102
tmp12 = tmp1 - tmp2;
1106
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1107
tmp2 = MULTIPLY(d2, FIX_0_541196100);
1108
tmp3 = MULTIPLY(d2, FIX_1_306562965);
1110
tmp0 = (d0 + d4) << CONST_BITS;
1111
tmp1 = (d0 - d4) << CONST_BITS;
1113
tmp10 = tmp0 + tmp3;
1114
tmp13 = tmp0 - tmp3;
1115
tmp11 = tmp1 + tmp2;
1116
tmp12 = tmp1 - tmp2;
1118
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1119
tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1120
tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1124
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1126
dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
1127
dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
1128
dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
1129
dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
1131
dataptr++; /* advance pointer to next column */
1135
void j_rev_dct2(DCTBLOCK data){
1136
int d00, d01, d10, d11;
1139
d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
1140
d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
1141
d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
1142
d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
1144
data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
1145
data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
1146
data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
1147
data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
1150
void j_rev_dct1(DCTBLOCK data){
1151
data[0] = (data[0] + 4)>>3;