411
#ifdef IDCT_SCALING_SUPPORTED
415
* Perform dequantization and inverse DCT on one block of coefficients,
416
* producing a 7x7 output block.
418
* Optimized algorithm with 12 multiplications in the 1-D kernel.
419
* cK represents sqrt(2) * cos(K*pi/14).
423
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
425
JSAMPARRAY output_buf, JDIMENSION output_col)
427
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
430
ISLOW_MULT_TYPE * quantptr;
433
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
435
int workspace[7*7]; /* buffers data between passes */
438
/* Pass 1: process columns from input, store into work array. */
441
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
443
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
446
tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
447
tmp13 <<= CONST_BITS;
448
/* Add fudge factor here for final descale. */
449
tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
451
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
452
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
453
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
455
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
456
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
457
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
460
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
461
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
462
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
463
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
467
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
468
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
469
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
471
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
472
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
475
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
477
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
479
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
481
/* Final output stage */
483
wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
484
wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
485
wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
486
wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
487
wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
488
wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
489
wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
492
/* Pass 2: process 7 rows from work array, store into output array. */
495
for (ctr = 0; ctr < 7; ctr++) {
496
outptr = output_buf[ctr] + output_col;
500
/* Add fudge factor here for final descale. */
501
tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
502
tmp13 <<= CONST_BITS;
504
z1 = (INT32) wsptr[2];
505
z2 = (INT32) wsptr[4];
506
z3 = (INT32) wsptr[6];
508
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
509
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
510
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
513
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
514
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
515
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
516
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
520
z1 = (INT32) wsptr[1];
521
z2 = (INT32) wsptr[3];
522
z3 = (INT32) wsptr[5];
524
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
525
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
528
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
530
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
532
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
534
/* Final output stage */
536
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
537
CONST_BITS+PASS1_BITS+3)
539
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
540
CONST_BITS+PASS1_BITS+3)
542
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
543
CONST_BITS+PASS1_BITS+3)
545
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
546
CONST_BITS+PASS1_BITS+3)
548
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
549
CONST_BITS+PASS1_BITS+3)
551
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
552
CONST_BITS+PASS1_BITS+3)
554
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
555
CONST_BITS+PASS1_BITS+3)
558
wsptr += 7; /* advance pointer to next row */
564
* Perform dequantization and inverse DCT on one block of coefficients,
565
* producing a reduced-size 6x6 output block.
567
* Optimized algorithm with 3 multiplications in the 1-D kernel.
568
* cK represents sqrt(2) * cos(K*pi/12).
572
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
574
JSAMPARRAY output_buf, JDIMENSION output_col)
576
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
579
ISLOW_MULT_TYPE * quantptr;
582
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
584
int workspace[6*6]; /* buffers data between passes */
587
/* Pass 1: process columns from input, store into work array. */
590
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
592
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
595
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
597
/* Add fudge factor here for final descale. */
598
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
599
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
600
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
602
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
603
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
604
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
610
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
611
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
612
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
613
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
614
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
615
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
616
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
618
/* Final output stage */
620
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
621
wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
622
wsptr[6*1] = (int) (tmp11 + tmp1);
623
wsptr[6*4] = (int) (tmp11 - tmp1);
624
wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
625
wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
628
/* Pass 2: process 6 rows from work array, store into output array. */
631
for (ctr = 0; ctr < 6; ctr++) {
632
outptr = output_buf[ctr] + output_col;
636
/* Add fudge factor here for final descale. */
637
tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
639
tmp2 = (INT32) wsptr[4];
640
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
642
tmp11 = tmp0 - tmp10 - tmp10;
643
tmp10 = (INT32) wsptr[2];
644
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
650
z1 = (INT32) wsptr[1];
651
z2 = (INT32) wsptr[3];
652
z3 = (INT32) wsptr[5];
653
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
654
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
655
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
656
tmp1 = (z1 - z2 - z3) << CONST_BITS;
658
/* Final output stage */
660
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
661
CONST_BITS+PASS1_BITS+3)
663
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
664
CONST_BITS+PASS1_BITS+3)
666
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
667
CONST_BITS+PASS1_BITS+3)
669
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
670
CONST_BITS+PASS1_BITS+3)
672
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
673
CONST_BITS+PASS1_BITS+3)
675
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
676
CONST_BITS+PASS1_BITS+3)
679
wsptr += 6; /* advance pointer to next row */
685
* Perform dequantization and inverse DCT on one block of coefficients,
686
* producing a reduced-size 5x5 output block.
688
* Optimized algorithm with 5 multiplications in the 1-D kernel.
689
* cK represents sqrt(2) * cos(K*pi/10).
693
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
695
JSAMPARRAY output_buf, JDIMENSION output_col)
697
INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
700
ISLOW_MULT_TYPE * quantptr;
703
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
705
int workspace[5*5]; /* buffers data between passes */
708
/* Pass 1: process columns from input, store into work array. */
711
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
713
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
716
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
717
tmp12 <<= CONST_BITS;
718
/* Add fudge factor here for final descale. */
719
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
720
tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
721
tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
722
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
723
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
731
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
732
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
734
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
735
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
736
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
738
/* Final output stage */
740
wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
741
wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
742
wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
743
wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
744
wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
747
/* Pass 2: process 5 rows from work array, store into output array. */
750
for (ctr = 0; ctr < 5; ctr++) {
751
outptr = output_buf[ctr] + output_col;
755
/* Add fudge factor here for final descale. */
756
tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
757
tmp12 <<= CONST_BITS;
758
tmp0 = (INT32) wsptr[2];
759
tmp1 = (INT32) wsptr[4];
760
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
761
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
769
z2 = (INT32) wsptr[1];
770
z3 = (INT32) wsptr[3];
772
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
773
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
774
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
776
/* Final output stage */
778
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
779
CONST_BITS+PASS1_BITS+3)
781
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
782
CONST_BITS+PASS1_BITS+3)
784
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
785
CONST_BITS+PASS1_BITS+3)
787
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
788
CONST_BITS+PASS1_BITS+3)
790
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
791
CONST_BITS+PASS1_BITS+3)
794
wsptr += 5; /* advance pointer to next row */
800
* Perform dequantization and inverse DCT on one block of coefficients,
801
* producing a reduced-size 3x3 output block.
803
* Optimized algorithm with 2 multiplications in the 1-D kernel.
804
* cK represents sqrt(2) * cos(K*pi/6).
808
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
810
JSAMPARRAY output_buf, JDIMENSION output_col)
812
INT32 tmp0, tmp2, tmp10, tmp12;
814
ISLOW_MULT_TYPE * quantptr;
817
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
819
int workspace[3*3]; /* buffers data between passes */
822
/* Pass 1: process columns from input, store into work array. */
825
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
827
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
830
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
832
/* Add fudge factor here for final descale. */
833
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
834
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
835
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
836
tmp10 = tmp0 + tmp12;
837
tmp2 = tmp0 - tmp12 - tmp12;
841
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
842
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
844
/* Final output stage */
846
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
847
wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
848
wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
851
/* Pass 2: process 3 rows from work array, store into output array. */
854
for (ctr = 0; ctr < 3; ctr++) {
855
outptr = output_buf[ctr] + output_col;
859
/* Add fudge factor here for final descale. */
860
tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
862
tmp2 = (INT32) wsptr[2];
863
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
864
tmp10 = tmp0 + tmp12;
865
tmp2 = tmp0 - tmp12 - tmp12;
869
tmp12 = (INT32) wsptr[1];
870
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
872
/* Final output stage */
874
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
875
CONST_BITS+PASS1_BITS+3)
877
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
878
CONST_BITS+PASS1_BITS+3)
880
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
881
CONST_BITS+PASS1_BITS+3)
884
wsptr += 3; /* advance pointer to next row */
890
* Perform dequantization and inverse DCT on one block of coefficients,
891
* producing a 9x9 output block.
893
* Optimized algorithm with 10 multiplications in the 1-D kernel.
894
* cK represents sqrt(2) * cos(K*pi/18).
898
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
900
JSAMPARRAY output_buf, JDIMENSION output_col)
902
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
903
INT32 z1, z2, z3, z4;
905
ISLOW_MULT_TYPE * quantptr;
908
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
910
int workspace[8*9]; /* buffers data between passes */
913
/* Pass 1: process columns from input, store into work array. */
916
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
918
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
921
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
923
/* Add fudge factor here for final descale. */
924
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
926
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
927
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
928
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
930
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
932
tmp2 = tmp0 - tmp3 - tmp3;
934
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
936
tmp14 = tmp2 - tmp0 - tmp0;
938
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
939
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
940
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
942
tmp10 = tmp1 + tmp0 - tmp3;
943
tmp12 = tmp1 - tmp0 + tmp2;
944
tmp13 = tmp1 - tmp2 + tmp3;
948
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
949
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
950
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
951
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
953
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
955
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
956
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
957
tmp0 = tmp2 + tmp3 - z2;
958
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
961
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
963
/* Final output stage */
965
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
966
wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
967
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
968
wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
969
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
970
wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
971
wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
972
wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
973
wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
976
/* Pass 2: process 9 rows from work array, store into output array. */
979
for (ctr = 0; ctr < 9; ctr++) {
980
outptr = output_buf[ctr] + output_col;
984
/* Add fudge factor here for final descale. */
985
tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
988
z1 = (INT32) wsptr[2];
989
z2 = (INT32) wsptr[4];
990
z3 = (INT32) wsptr[6];
992
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
994
tmp2 = tmp0 - tmp3 - tmp3;
996
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
998
tmp14 = tmp2 - tmp0 - tmp0;
1000
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1001
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1002
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1004
tmp10 = tmp1 + tmp0 - tmp3;
1005
tmp12 = tmp1 - tmp0 + tmp2;
1006
tmp13 = tmp1 - tmp2 + tmp3;
1010
z1 = (INT32) wsptr[1];
1011
z2 = (INT32) wsptr[3];
1012
z3 = (INT32) wsptr[5];
1013
z4 = (INT32) wsptr[7];
1015
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1017
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1018
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1019
tmp0 = tmp2 + tmp3 - z2;
1020
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1023
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1025
/* Final output stage */
1027
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1028
CONST_BITS+PASS1_BITS+3)
1030
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1031
CONST_BITS+PASS1_BITS+3)
1033
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1034
CONST_BITS+PASS1_BITS+3)
1036
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1037
CONST_BITS+PASS1_BITS+3)
1039
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1040
CONST_BITS+PASS1_BITS+3)
1042
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1043
CONST_BITS+PASS1_BITS+3)
1045
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1046
CONST_BITS+PASS1_BITS+3)
1048
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1049
CONST_BITS+PASS1_BITS+3)
1051
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1052
CONST_BITS+PASS1_BITS+3)
1055
wsptr += 8; /* advance pointer to next row */
1061
* Perform dequantization and inverse DCT on one block of coefficients,
1062
* producing a 10x10 output block.
1064
* Optimized algorithm with 12 multiplications in the 1-D kernel.
1065
* cK represents sqrt(2) * cos(K*pi/20).
1069
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1070
JCOEFPTR coef_block,
1071
JSAMPARRAY output_buf, JDIMENSION output_col)
1073
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1074
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1075
INT32 z1, z2, z3, z4, z5;
1077
ISLOW_MULT_TYPE * quantptr;
1080
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1082
int workspace[8*10]; /* buffers data between passes */
1085
/* Pass 1: process columns from input, store into work array. */
1088
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1090
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1093
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1095
/* Add fudge factor here for final descale. */
1096
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1097
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1098
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1099
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1103
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1104
CONST_BITS-PASS1_BITS);
1106
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1107
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1109
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1110
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1111
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1113
tmp20 = tmp10 + tmp12;
1114
tmp24 = tmp10 - tmp12;
1115
tmp21 = tmp11 + tmp13;
1116
tmp23 = tmp11 - tmp13;
1120
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1121
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1122
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1123
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1128
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1129
z5 = z3 << CONST_BITS;
1131
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1134
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1135
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1137
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1138
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1140
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1142
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1143
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1145
/* Final output stage */
1147
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1148
wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1149
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1150
wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1151
wsptr[8*2] = (int) (tmp22 + tmp12);
1152
wsptr[8*7] = (int) (tmp22 - tmp12);
1153
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1154
wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1155
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1156
wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1159
/* Pass 2: process 10 rows from work array, store into output array. */
1162
for (ctr = 0; ctr < 10; ctr++) {
1163
outptr = output_buf[ctr] + output_col;
1167
/* Add fudge factor here for final descale. */
1168
z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1170
z4 = (INT32) wsptr[4];
1171
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1172
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1176
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1178
z2 = (INT32) wsptr[2];
1179
z3 = (INT32) wsptr[6];
1181
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1182
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1183
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1185
tmp20 = tmp10 + tmp12;
1186
tmp24 = tmp10 - tmp12;
1187
tmp21 = tmp11 + tmp13;
1188
tmp23 = tmp11 - tmp13;
1192
z1 = (INT32) wsptr[1];
1193
z2 = (INT32) wsptr[3];
1194
z3 = (INT32) wsptr[5];
1196
z4 = (INT32) wsptr[7];
1201
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1203
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1206
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1207
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1209
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1210
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1212
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1214
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1215
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1217
/* Final output stage */
1219
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1220
CONST_BITS+PASS1_BITS+3)
1222
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1223
CONST_BITS+PASS1_BITS+3)
1225
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1226
CONST_BITS+PASS1_BITS+3)
1228
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1229
CONST_BITS+PASS1_BITS+3)
1231
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1232
CONST_BITS+PASS1_BITS+3)
1234
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1235
CONST_BITS+PASS1_BITS+3)
1237
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1238
CONST_BITS+PASS1_BITS+3)
1240
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1241
CONST_BITS+PASS1_BITS+3)
1243
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1244
CONST_BITS+PASS1_BITS+3)
1246
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1247
CONST_BITS+PASS1_BITS+3)
1250
wsptr += 8; /* advance pointer to next row */
1256
* Perform dequantization and inverse DCT on one block of coefficients,
1257
* producing a 11x11 output block.
1259
* Optimized algorithm with 24 multiplications in the 1-D kernel.
1260
* cK represents sqrt(2) * cos(K*pi/22).
1264
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1265
JCOEFPTR coef_block,
1266
JSAMPARRAY output_buf, JDIMENSION output_col)
1268
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1269
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1270
INT32 z1, z2, z3, z4;
1272
ISLOW_MULT_TYPE * quantptr;
1275
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1277
int workspace[8*11]; /* buffers data between passes */
1280
/* Pass 1: process columns from input, store into work array. */
1283
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1285
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1288
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1289
tmp10 <<= CONST_BITS;
1290
/* Add fudge factor here for final descale. */
1291
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1293
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1294
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1295
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1297
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1298
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1300
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1302
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1303
tmp21 = tmp20 + tmp23 + tmp25 -
1304
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1305
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1306
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1308
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1309
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1310
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1311
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1315
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1316
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1317
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1318
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1321
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1322
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1323
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1324
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1325
tmp10 = tmp11 + tmp12 + tmp13 -
1326
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1327
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1328
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1329
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1330
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1332
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1333
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1334
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1335
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1337
/* Final output stage */
1339
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1340
wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1341
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1342
wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1343
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1344
wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1345
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1346
wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1347
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1348
wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1349
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1352
/* Pass 2: process 11 rows from work array, store into output array. */
1355
for (ctr = 0; ctr < 11; ctr++) {
1356
outptr = output_buf[ctr] + output_col;
1360
/* Add fudge factor here for final descale. */
1361
tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1362
tmp10 <<= CONST_BITS;
1364
z1 = (INT32) wsptr[2];
1365
z2 = (INT32) wsptr[4];
1366
z3 = (INT32) wsptr[6];
1368
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1369
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1371
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1373
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1374
tmp21 = tmp20 + tmp23 + tmp25 -
1375
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1376
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1377
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1379
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1380
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1381
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1382
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1386
z1 = (INT32) wsptr[1];
1387
z2 = (INT32) wsptr[3];
1388
z3 = (INT32) wsptr[5];
1389
z4 = (INT32) wsptr[7];
1392
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1393
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1394
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1395
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1396
tmp10 = tmp11 + tmp12 + tmp13 -
1397
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1398
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1399
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1400
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1401
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1403
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1404
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1405
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1406
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1408
/* Final output stage */
1410
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1411
CONST_BITS+PASS1_BITS+3)
1413
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1414
CONST_BITS+PASS1_BITS+3)
1416
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1417
CONST_BITS+PASS1_BITS+3)
1419
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1420
CONST_BITS+PASS1_BITS+3)
1422
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1423
CONST_BITS+PASS1_BITS+3)
1425
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1426
CONST_BITS+PASS1_BITS+3)
1428
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1429
CONST_BITS+PASS1_BITS+3)
1431
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1432
CONST_BITS+PASS1_BITS+3)
1434
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1435
CONST_BITS+PASS1_BITS+3)
1437
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1438
CONST_BITS+PASS1_BITS+3)
1440
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1441
CONST_BITS+PASS1_BITS+3)
1444
wsptr += 8; /* advance pointer to next row */
1450
* Perform dequantization and inverse DCT on one block of coefficients,
1451
* producing a 12x12 output block.
1453
* Optimized algorithm with 15 multiplications in the 1-D kernel.
1454
* cK represents sqrt(2) * cos(K*pi/24).
1458
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1459
JCOEFPTR coef_block,
1460
JSAMPARRAY output_buf, JDIMENSION output_col)
1462
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1463
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1464
INT32 z1, z2, z3, z4;
1466
ISLOW_MULT_TYPE * quantptr;
1469
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1471
int workspace[8*12]; /* buffers data between passes */
1474
/* Pass 1: process columns from input, store into work array. */
1477
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1479
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1482
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1484
/* Add fudge factor here for final descale. */
1485
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1487
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1488
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1493
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1494
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1496
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1506
tmp20 = tmp10 + tmp12;
1507
tmp25 = tmp10 - tmp12;
1509
tmp12 = z4 - z1 - z2;
1511
tmp22 = tmp11 + tmp12;
1512
tmp23 = tmp11 - tmp12;
1516
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1517
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1518
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1519
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1521
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1522
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1525
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1526
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1527
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1528
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1529
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1530
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1531
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1532
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1536
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1537
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1538
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1540
/* Final output stage */
1542
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1543
wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1544
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1545
wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1546
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1547
wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1548
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1549
wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1550
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1551
wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1552
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1553
wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1556
/* Pass 2: process 12 rows from work array, store into output array. */
1559
for (ctr = 0; ctr < 12; ctr++) {
1560
outptr = output_buf[ctr] + output_col;
1564
/* Add fudge factor here for final descale. */
1565
z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1568
z4 = (INT32) wsptr[4];
1569
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1574
z1 = (INT32) wsptr[2];
1575
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1577
z2 = (INT32) wsptr[6];
1587
tmp20 = tmp10 + tmp12;
1588
tmp25 = tmp10 - tmp12;
1590
tmp12 = z4 - z1 - z2;
1592
tmp22 = tmp11 + tmp12;
1593
tmp23 = tmp11 - tmp12;
1597
z1 = (INT32) wsptr[1];
1598
z2 = (INT32) wsptr[3];
1599
z3 = (INT32) wsptr[5];
1600
z4 = (INT32) wsptr[7];
1602
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1603
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1606
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1607
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1608
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1609
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1610
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1611
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1612
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1613
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1617
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1618
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1619
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1621
/* Final output stage */
1623
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1624
CONST_BITS+PASS1_BITS+3)
1626
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1627
CONST_BITS+PASS1_BITS+3)
1629
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1630
CONST_BITS+PASS1_BITS+3)
1632
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1633
CONST_BITS+PASS1_BITS+3)
1635
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1636
CONST_BITS+PASS1_BITS+3)
1638
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1639
CONST_BITS+PASS1_BITS+3)
1641
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1642
CONST_BITS+PASS1_BITS+3)
1644
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1645
CONST_BITS+PASS1_BITS+3)
1647
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1648
CONST_BITS+PASS1_BITS+3)
1650
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1651
CONST_BITS+PASS1_BITS+3)
1653
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1654
CONST_BITS+PASS1_BITS+3)
1656
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1657
CONST_BITS+PASS1_BITS+3)
1660
wsptr += 8; /* advance pointer to next row */
1666
* Perform dequantization and inverse DCT on one block of coefficients,
1667
* producing a 13x13 output block.
1669
* Optimized algorithm with 29 multiplications in the 1-D kernel.
1670
* cK represents sqrt(2) * cos(K*pi/26).
1674
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1675
JCOEFPTR coef_block,
1676
JSAMPARRAY output_buf, JDIMENSION output_col)
1678
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1679
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1680
INT32 z1, z2, z3, z4;
1682
ISLOW_MULT_TYPE * quantptr;
1685
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1687
int workspace[8*13]; /* buffers data between passes */
1690
/* Pass 1: process columns from input, store into work array. */
1693
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1695
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1698
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1700
/* Add fudge factor here for final descale. */
1701
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1703
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1704
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1705
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1710
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1711
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1713
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1714
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1716
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1717
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1719
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1720
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1722
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1723
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1725
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1726
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1728
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1732
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1733
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1734
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1735
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1737
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1738
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1740
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1741
tmp10 = tmp11 + tmp12 + tmp13 -
1742
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1743
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1744
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1745
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1746
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1748
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1749
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1752
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1753
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1754
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1755
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1757
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1758
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1760
/* Final output stage */
1762
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1763
wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1764
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1765
wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1766
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1767
wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1768
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1769
wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1770
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1771
wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1772
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1773
wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1774
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1777
/* Pass 2: process 13 rows from work array, store into output array. */
1780
for (ctr = 0; ctr < 13; ctr++) {
1781
outptr = output_buf[ctr] + output_col;
1785
/* Add fudge factor here for final descale. */
1786
z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1789
z2 = (INT32) wsptr[2];
1790
z3 = (INT32) wsptr[4];
1791
z4 = (INT32) wsptr[6];
1796
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1797
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1799
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1800
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1802
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1803
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1805
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1806
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1808
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1809
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1811
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1812
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1814
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1818
z1 = (INT32) wsptr[1];
1819
z2 = (INT32) wsptr[3];
1820
z3 = (INT32) wsptr[5];
1821
z4 = (INT32) wsptr[7];
1823
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1824
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1826
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1827
tmp10 = tmp11 + tmp12 + tmp13 -
1828
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1829
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1830
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1831
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1832
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1834
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1835
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1838
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1839
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1840
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1841
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1843
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1844
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1846
/* Final output stage */
1848
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1849
CONST_BITS+PASS1_BITS+3)
1851
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1852
CONST_BITS+PASS1_BITS+3)
1854
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1855
CONST_BITS+PASS1_BITS+3)
1857
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1858
CONST_BITS+PASS1_BITS+3)
1860
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1861
CONST_BITS+PASS1_BITS+3)
1863
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1864
CONST_BITS+PASS1_BITS+3)
1866
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1867
CONST_BITS+PASS1_BITS+3)
1869
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1870
CONST_BITS+PASS1_BITS+3)
1872
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1873
CONST_BITS+PASS1_BITS+3)
1875
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1876
CONST_BITS+PASS1_BITS+3)
1878
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1879
CONST_BITS+PASS1_BITS+3)
1881
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1882
CONST_BITS+PASS1_BITS+3)
1884
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
1885
CONST_BITS+PASS1_BITS+3)
1888
wsptr += 8; /* advance pointer to next row */
1894
* Perform dequantization and inverse DCT on one block of coefficients,
1895
* producing a 14x14 output block.
1897
* Optimized algorithm with 20 multiplications in the 1-D kernel.
1898
* cK represents sqrt(2) * cos(K*pi/28).
1902
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1903
JCOEFPTR coef_block,
1904
JSAMPARRAY output_buf, JDIMENSION output_col)
1906
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1907
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1908
INT32 z1, z2, z3, z4;
1910
ISLOW_MULT_TYPE * quantptr;
1913
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1915
int workspace[8*14]; /* buffers data between passes */
1918
/* Pass 1: process columns from input, store into work array. */
1921
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1923
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1926
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1928
/* Add fudge factor here for final descale. */
1929
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1930
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1931
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
1932
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
1933
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
1939
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
1940
CONST_BITS-PASS1_BITS);
1942
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1943
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1945
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
1947
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
1948
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
1949
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
1950
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
1952
tmp20 = tmp10 + tmp13;
1953
tmp26 = tmp10 - tmp13;
1954
tmp21 = tmp11 + tmp14;
1955
tmp25 = tmp11 - tmp14;
1956
tmp22 = tmp12 + tmp15;
1957
tmp24 = tmp12 - tmp15;
1961
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1962
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1963
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1964
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1965
tmp13 = z4 << CONST_BITS;
1968
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
1969
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
1970
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
1971
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
1972
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
1974
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
1977
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
1978
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
1979
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
1980
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
1981
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
1982
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
1984
tmp13 = (z1 - z3) << PASS1_BITS;
1986
/* Final output stage */
1988
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1989
wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1990
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1991
wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1992
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1993
wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1994
wsptr[8*3] = (int) (tmp23 + tmp13);
1995
wsptr[8*10] = (int) (tmp23 - tmp13);
1996
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1997
wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1998
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1999
wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2000
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2001
wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2004
/* Pass 2: process 14 rows from work array, store into output array. */
2007
for (ctr = 0; ctr < 14; ctr++) {
2008
outptr = output_buf[ctr] + output_col;
2012
/* Add fudge factor here for final descale. */
2013
z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2015
z4 = (INT32) wsptr[4];
2016
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2017
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2018
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2024
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2026
z1 = (INT32) wsptr[2];
2027
z2 = (INT32) wsptr[6];
2029
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2031
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2032
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2033
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2034
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2036
tmp20 = tmp10 + tmp13;
2037
tmp26 = tmp10 - tmp13;
2038
tmp21 = tmp11 + tmp14;
2039
tmp25 = tmp11 - tmp14;
2040
tmp22 = tmp12 + tmp15;
2041
tmp24 = tmp12 - tmp15;
2045
z1 = (INT32) wsptr[1];
2046
z2 = (INT32) wsptr[3];
2047
z3 = (INT32) wsptr[5];
2048
z4 = (INT32) wsptr[7];
2052
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2053
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2054
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2055
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2056
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2058
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2060
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2061
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2062
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2063
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2064
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2065
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2067
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2069
/* Final output stage */
2071
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2072
CONST_BITS+PASS1_BITS+3)
2074
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2075
CONST_BITS+PASS1_BITS+3)
2077
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2078
CONST_BITS+PASS1_BITS+3)
2080
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2081
CONST_BITS+PASS1_BITS+3)
2083
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2084
CONST_BITS+PASS1_BITS+3)
2086
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2087
CONST_BITS+PASS1_BITS+3)
2089
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2090
CONST_BITS+PASS1_BITS+3)
2092
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2093
CONST_BITS+PASS1_BITS+3)
2095
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2096
CONST_BITS+PASS1_BITS+3)
2098
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2099
CONST_BITS+PASS1_BITS+3)
2101
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2102
CONST_BITS+PASS1_BITS+3)
2104
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2105
CONST_BITS+PASS1_BITS+3)
2107
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2108
CONST_BITS+PASS1_BITS+3)
2110
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2111
CONST_BITS+PASS1_BITS+3)
2114
wsptr += 8; /* advance pointer to next row */
2120
* Perform dequantization and inverse DCT on one block of coefficients,
2121
* producing a 15x15 output block.
2123
* Optimized algorithm with 22 multiplications in the 1-D kernel.
2124
* cK represents sqrt(2) * cos(K*pi/30).
2128
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129
JCOEFPTR coef_block,
2130
JSAMPARRAY output_buf, JDIMENSION output_col)
2132
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2134
INT32 z1, z2, z3, z4;
2136
ISLOW_MULT_TYPE * quantptr;
2139
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2141
int workspace[8*15]; /* buffers data between passes */
2144
/* Pass 1: process columns from input, store into work array. */
2147
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2149
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2152
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2154
/* Add fudge factor here for final descale. */
2155
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2157
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2158
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2159
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2161
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2162
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2166
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2170
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2171
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2172
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2174
tmp20 = tmp13 + tmp10 + tmp11;
2175
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2177
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2178
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2180
tmp25 = tmp13 - tmp10 - tmp11;
2181
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2183
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2184
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2186
tmp21 = tmp12 + tmp10 + tmp11;
2187
tmp24 = tmp13 - tmp10 + tmp11;
2189
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2190
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2194
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2195
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2196
z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2197
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2198
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2201
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2202
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2203
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2205
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2206
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2208
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2210
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2211
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2212
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2213
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2214
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2215
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2217
/* Final output stage */
2219
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2220
wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2221
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2222
wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2223
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2224
wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2225
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2226
wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2227
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2228
wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2229
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2230
wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2231
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2232
wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2233
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2236
/* Pass 2: process 15 rows from work array, store into output array. */
2239
for (ctr = 0; ctr < 15; ctr++) {
2240
outptr = output_buf[ctr] + output_col;
2244
/* Add fudge factor here for final descale. */
2245
z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2248
z2 = (INT32) wsptr[2];
2249
z3 = (INT32) wsptr[4];
2250
z4 = (INT32) wsptr[6];
2252
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2253
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2257
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2261
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2262
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2263
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2265
tmp20 = tmp13 + tmp10 + tmp11;
2266
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2268
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2269
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2271
tmp25 = tmp13 - tmp10 - tmp11;
2272
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2274
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2275
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2277
tmp21 = tmp12 + tmp10 + tmp11;
2278
tmp24 = tmp13 - tmp10 + tmp11;
2280
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2281
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2285
z1 = (INT32) wsptr[1];
2286
z2 = (INT32) wsptr[3];
2287
z4 = (INT32) wsptr[5];
2288
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2289
z4 = (INT32) wsptr[7];
2292
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2293
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2294
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2296
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2297
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2299
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2301
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2302
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2303
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2304
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2305
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2306
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2308
/* Final output stage */
2310
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2311
CONST_BITS+PASS1_BITS+3)
2313
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2314
CONST_BITS+PASS1_BITS+3)
2316
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2317
CONST_BITS+PASS1_BITS+3)
2319
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2320
CONST_BITS+PASS1_BITS+3)
2322
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2323
CONST_BITS+PASS1_BITS+3)
2325
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2326
CONST_BITS+PASS1_BITS+3)
2328
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2329
CONST_BITS+PASS1_BITS+3)
2331
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2332
CONST_BITS+PASS1_BITS+3)
2334
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2335
CONST_BITS+PASS1_BITS+3)
2337
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2338
CONST_BITS+PASS1_BITS+3)
2340
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2341
CONST_BITS+PASS1_BITS+3)
2343
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2344
CONST_BITS+PASS1_BITS+3)
2346
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2347
CONST_BITS+PASS1_BITS+3)
2349
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2350
CONST_BITS+PASS1_BITS+3)
2352
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2353
CONST_BITS+PASS1_BITS+3)
2356
wsptr += 8; /* advance pointer to next row */
2362
* Perform dequantization and inverse DCT on one block of coefficients,
2363
* producing a 16x16 output block.
2365
* Optimized algorithm with 28 multiplications in the 1-D kernel.
2366
* cK represents sqrt(2) * cos(K*pi/32).
2370
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2371
JCOEFPTR coef_block,
2372
JSAMPARRAY output_buf, JDIMENSION output_col)
2374
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2375
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2376
INT32 z1, z2, z3, z4;
2378
ISLOW_MULT_TYPE * quantptr;
2381
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2383
int workspace[8*16]; /* buffers data between passes */
2386
/* Pass 1: process columns from input, store into work array. */
2389
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2391
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2394
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2395
tmp0 <<= CONST_BITS;
2396
/* Add fudge factor here for final descale. */
2397
tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
2399
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2400
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2401
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2403
tmp10 = tmp0 + tmp1;
2404
tmp11 = tmp0 - tmp1;
2405
tmp12 = tmp0 + tmp2;
2406
tmp13 = tmp0 - tmp2;
2408
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2409
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2411
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2412
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2414
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2415
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2416
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2417
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2419
tmp20 = tmp10 + tmp0;
2420
tmp27 = tmp10 - tmp0;
2421
tmp21 = tmp12 + tmp1;
2422
tmp26 = tmp12 - tmp1;
2423
tmp22 = tmp13 + tmp2;
2424
tmp25 = tmp13 - tmp2;
2425
tmp23 = tmp11 + tmp3;
2426
tmp24 = tmp11 - tmp3;
2430
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2431
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2432
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2433
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2437
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2438
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2439
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2440
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2441
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2442
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2443
tmp0 = tmp1 + tmp2 + tmp3 -
2444
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2445
tmp13 = tmp10 + tmp11 + tmp12 -
2446
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2447
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2448
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2449
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2450
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2451
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2452
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2454
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2456
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2457
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2458
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2460
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2463
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2467
/* Final output stage */
2469
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2470
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2471
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2472
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2473
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2474
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2475
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2476
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2477
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2478
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2479
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2480
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2481
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2482
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2483
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2484
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2487
/* Pass 2: process 16 rows from work array, store into output array. */
2490
for (ctr = 0; ctr < 16; ctr++) {
2491
outptr = output_buf[ctr] + output_col;
2495
/* Add fudge factor here for final descale. */
2496
tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2497
tmp0 <<= CONST_BITS;
2499
z1 = (INT32) wsptr[4];
2500
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2501
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2503
tmp10 = tmp0 + tmp1;
2504
tmp11 = tmp0 - tmp1;
2505
tmp12 = tmp0 + tmp2;
2506
tmp13 = tmp0 - tmp2;
2508
z1 = (INT32) wsptr[2];
2509
z2 = (INT32) wsptr[6];
2511
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2512
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2514
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2515
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2516
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2517
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2519
tmp20 = tmp10 + tmp0;
2520
tmp27 = tmp10 - tmp0;
2521
tmp21 = tmp12 + tmp1;
2522
tmp26 = tmp12 - tmp1;
2523
tmp22 = tmp13 + tmp2;
2524
tmp25 = tmp13 - tmp2;
2525
tmp23 = tmp11 + tmp3;
2526
tmp24 = tmp11 - tmp3;
2530
z1 = (INT32) wsptr[1];
2531
z2 = (INT32) wsptr[3];
2532
z3 = (INT32) wsptr[5];
2533
z4 = (INT32) wsptr[7];
2537
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2538
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2539
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2540
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2541
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2542
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2543
tmp0 = tmp1 + tmp2 + tmp3 -
2544
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2545
tmp13 = tmp10 + tmp11 + tmp12 -
2546
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2547
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2548
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2549
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2550
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2551
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2552
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2554
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2556
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2557
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2558
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2560
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2563
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2567
/* Final output stage */
2569
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2570
CONST_BITS+PASS1_BITS+3)
2572
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2573
CONST_BITS+PASS1_BITS+3)
2575
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2576
CONST_BITS+PASS1_BITS+3)
2578
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2579
CONST_BITS+PASS1_BITS+3)
2581
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2582
CONST_BITS+PASS1_BITS+3)
2584
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2585
CONST_BITS+PASS1_BITS+3)
2587
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2588
CONST_BITS+PASS1_BITS+3)
2590
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2591
CONST_BITS+PASS1_BITS+3)
2593
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2594
CONST_BITS+PASS1_BITS+3)
2596
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2597
CONST_BITS+PASS1_BITS+3)
2599
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2600
CONST_BITS+PASS1_BITS+3)
2602
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2603
CONST_BITS+PASS1_BITS+3)
2605
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2606
CONST_BITS+PASS1_BITS+3)
2608
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2609
CONST_BITS+PASS1_BITS+3)
2611
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2612
CONST_BITS+PASS1_BITS+3)
2614
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2615
CONST_BITS+PASS1_BITS+3)
2618
wsptr += 8; /* advance pointer to next row */
2622
#endif /* IDCT_SCALING_SUPPORTED */
389
2623
#endif /* DCT_ISLOW_SUPPORTED */