~ubuntu-branches/ubuntu/trusty/libdv/trusty

« back to all changes in this revision

Viewing changes to libdv/enc_input.c

  • Committer: Bazaar Package Importer
  • Author(s): Daniel Kobras
  • Date: 2006-09-26 14:22:15 UTC
  • mfrom: (1.3.1 upstream) (3.1.4 edgy)
  • Revision ID: james.westby@ubuntu.com-20060926142215-q2arp4stpw6lrb5p
Tags: 1.0.0-1
* New upstream version.
* Removed patches:
  + [01_changelog_update_CVS]
  + [10_amd64_linkage_fix_CVS]
  + [10_inline_fixes_CVS]
  + [20_no_exec_stack_CVS]
  + [30_unload_memleak_fix_CVS]
  + [40_playdv_exit_fix_CVS]
  + [50_gcc41_asm_constraint_fixes_CVS]
    All of the above are part of the new upstream version.
* debian/control: In Build-Depends, remove alternative dependencies on
  obsolete SDL and X packages.
* debian/control: Complies with version 3.7.2 of Debian policy.
* debian/libdv4.README.Debian: Document lack of position-independent code
  in i386 version of libdv's shared library as mandated by recent versions
  of Debian policy.

Show diffs side-by-side

added added

removed removed

Lines of Context:
36
36
#include "encode.h"
37
37
#include "dct.h"
38
38
#include "dv_types.h"
39
 
#if ARCH_X86
 
39
#if ARCH_X86 || ARCH_X86_64
40
40
#include "mmx.h"
41
41
#else
42
42
#include <math.h>
61
61
 
62
62
// #define ARCH_X86 0
63
63
 
64
 
#if !ARCH_X86
 
64
#if (!ARCH_X86) && (!ARCH_X86_64)
65
65
static inline int f2b(float f)
66
66
{
67
67
        int b = rint(f);
85
85
extern void _dv_rgbtoycb_mmx(unsigned char* inPtr, int rows, int columns,
86
86
                         short* outyPtr, short* outuPtr, short* outvPtr);
87
87
 
 
88
extern void _dv_rgbtoycb_mmx_x86_64(unsigned char* inPtr, int rows, int columns,
 
89
                         short* outyPtr, short* outuPtr, short* outvPtr);
 
90
 
88
91
void dv_enc_rgb_to_ycb(unsigned char* img_rgb, int height,
89
92
                       short* img_y, short* img_cr, short* img_cb)
90
93
{
91
 
#if !ARCH_X86
 
94
#if (!ARCH_X86) && (!ARCH_X86_64)
92
95
#if 1
93
96
       int i;
94
97
       int ti;
162
165
                }
163
166
        }
164
167
#endif
 
168
#elif ARCH_X86_64
 
169
        _dv_rgbtoycb_mmx_x86_64(img_rgb, height, DV_WIDTH, (short*) img_y,
 
170
                     (short*) img_cr, (short*) img_cb);
 
171
        emms();
165
172
#else
166
173
        _dv_rgbtoycb_mmx(img_rgb, height, DV_WIDTH, (short*) img_y,
167
174
                     (short*) img_cr, (short*) img_cb);
179
186
static short* img_cr = NULL; /* [DV_PAL_HEIGHT * DV_WIDTH / 2]; */
180
187
static short* img_cb = NULL; /* [DV_PAL_HEIGHT * DV_WIDTH / 2]; */
181
188
 
182
 
#if !ARCH_X86
 
189
#if (!ARCH_X86) && (!ARCH_X86_64)
183
190
 
184
191
static int need_dct_248_transposed(dv_coeff_t * bl)
185
192
{
210
217
        return ((res_cols * 65536 / res_rows) > DCT_248_THRESHOLD);
211
218
}
212
219
 
213
 
#else
 
220
#elif ARCH_X86
214
221
 
215
222
extern int _dv_need_dct_248_mmx_rows(dv_coeff_t * bl);
216
223
 
252
259
        }
253
260
}
254
261
 
 
262
#else
 
263
 
 
264
extern int _dv_need_dct_248_mmx_x86_64_rows(dv_coeff_t * bl);
 
265
 
 
266
extern void _dv_transpose_mmx_x86_64(short * dst);
 
267
extern void _dv_ppm_copy_y_block_mmx_x86_64(short * dst, short * src);
 
268
extern void _dv_ppm_copy_pal_c_block_mmx_x86_64(short * dst, short * src);
 
269
extern void _dv_ppm_copy_ntsc_c_block_mmx_x86_64(short * dst, short * src);
 
270
 
 
271
static void finish_mb_mmx_x86_64(dv_macroblock_t* mb)
 
272
{
 
273
        int b;
 
274
        int need_dct_248_rows[6];
 
275
        dv_block_t* bl = mb->b;
 
276
 
 
277
        if (force_dct != -1) {
 
278
                for (b = 0; b < 6; b++) {
 
279
                        bl[b].dct_mode = force_dct;
 
280
                }
 
281
        } else {
 
282
                for (b = 0; b < 6; b++) {
 
283
                        need_dct_248_rows[b]
 
284
                                = _dv_need_dct_248_mmx_x86_64_rows(bl[b].coeffs) + 1;
 
285
                }
 
286
        }
 
287
        _dv_transpose_mmx_x86_64(bl[0].coeffs);
 
288
        _dv_transpose_mmx_x86_64(bl[1].coeffs);
 
289
        _dv_transpose_mmx_x86_64(bl[2].coeffs);
 
290
        _dv_transpose_mmx_x86_64(bl[3].coeffs);
 
291
        _dv_transpose_mmx_x86_64(bl[4].coeffs);
 
292
        _dv_transpose_mmx_x86_64(bl[5].coeffs);
 
293
 
 
294
        if (force_dct == -1) {
 
295
                for (b = 0; b < 6; b++) {
 
296
                        bl[b].dct_mode = 
 
297
                                ((need_dct_248_rows[b] * 65536 / 
 
298
                                  (_dv_need_dct_248_mmx_x86_64_rows(bl[b].coeffs) + 1))
 
299
                                 > DCT_248_THRESHOLD) ? DV_DCT_248 : DV_DCT_88;
 
300
                }
 
301
        }
 
302
}
 
303
 
255
304
#endif /* ARCH_X86 */
256
305
 
257
306
static int read_ppm_stream(FILE* f, int * isPAL, int * height_)
363
412
        int x = mb->x;
364
413
        dv_block_t* bl = mb->b;
365
414
 
366
 
#if !ARCH_X86
 
415
#if (!ARCH_X86) && (!ARCH_X86_64)
367
416
        if (isPAL) { /* PAL */
368
417
                int i,j;
369
418
                for (j = 0; j < 8; j++) {
463
512
                                ? DV_DCT_248 : DV_DCT_88;
464
513
                }
465
514
        }
466
 
#else
 
515
#elif ARCH_X86
467
516
        if (isPAL) { /* PAL */
468
517
                short* start_y = img_y + y * DV_WIDTH + x;
469
518
                _dv_ppm_copy_y_block_mmx(bl[0].coeffs, start_y);
523
572
        finish_mb_mmx(mb);
524
573
 
525
574
        emms();
 
575
 
 
576
#else
 
577
 
 
578
        if (isPAL) { /* PAL */
 
579
                short* start_y = img_y + y * DV_WIDTH + x;
 
580
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
581
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
582
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
583
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
584
                _dv_ppm_copy_pal_c_block_mmx_x86_64(bl[4].coeffs,
 
585
                                         img_cr+y * DV_WIDTH/2+ x/2);
 
586
                _dv_ppm_copy_pal_c_block_mmx_x86_64(bl[5].coeffs,
 
587
                                         img_cb+y * DV_WIDTH/2+ x/2);
 
588
        } else if (mb->x == DV_WIDTH- 16) { /* rightmost NTSC block */
 
589
                short* start_y = img_y + y * DV_WIDTH + x;
 
590
                int i,j;
 
591
 
 
592
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
593
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
594
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
595
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
596
 
 
597
                for (j = 0; j < 8; j++) {
 
598
                        for (i = 0; i < 4; i++) {
 
599
                                bl[4].coeffs[8 * j + i] = 
 
600
                                        (img_cr[(y + j) * DV_WIDTH/2
 
601
                                               + x / 2 + i*2]
 
602
                                         + img_cr[(y + j) * DV_WIDTH/2 
 
603
                                                 + x / 2 + 1 + i*2]) >> 1;
 
604
                                bl[5].coeffs[8 * j + i] = 
 
605
                                        (img_cb[(y + j) * DV_WIDTH/2
 
606
                                               + x / 2 + i*2]
 
607
                                         + img_cb[(y + j) * DV_WIDTH/2 
 
608
                                                 + x / 2 + 1 + i*2]) >> 1;
 
609
                                bl[4].coeffs[8 * j + i + 4] = 
 
610
                                        (img_cr[(y + j + 8) * DV_WIDTH/2
 
611
                                               + x / 2 + i*2]
 
612
                                         + img_cr[(y + j + 8) * DV_WIDTH/2 
 
613
                                                 + x / 2 + 1 + i*2]) >> 1;
 
614
                                bl[5].coeffs[8 * j + i + 4] = 
 
615
                                        (img_cb[(y + j + 8) * DV_WIDTH/2
 
616
                                               + x / 2 + i*2]
 
617
                                         + img_cb[(y + j + 8) * DV_WIDTH/2 
 
618
                                                 + x / 2 + 1 + i*2]) >> 1;
 
619
                        }
 
620
                }
 
621
 
 
622
        } else { /* NTSC */
 
623
                short* start_y = img_y + y * DV_WIDTH + x;
 
624
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
625
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
626
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 16);
 
627
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 24);
 
628
                _dv_ppm_copy_ntsc_c_block_mmx_x86_64(bl[4].coeffs,
 
629
                                          img_cr + y*DV_WIDTH/2 + x/2);
 
630
                _dv_ppm_copy_ntsc_c_block_mmx_x86_64(bl[5].coeffs,
 
631
                                          img_cb + y*DV_WIDTH/2 + x/2);
 
632
        }
 
633
 
 
634
        finish_mb_mmx_x86_64(mb);
 
635
 
 
636
        emms();
526
637
#endif
527
638
}
528
639
 
640
751
                                                << (DCT_YUV_PRECISION - 1);
641
752
}
642
753
 
643
 
#if !ARCH_X86
 
754
#if (!ARCH_X86) && (!ARCH_X86_64)
644
755
static inline short pgm_get_y(int y, int x)
645
756
{
646
757
        return (((short) real_readbuf[y * DV_WIDTH + x]) - 128 + 16)
662
773
 
663
774
}
664
775
 
665
 
#else
 
776
#elif ARCH_X86
666
777
extern void _dv_pgm_copy_y_block_mmx(short * dst, unsigned char * src);
667
778
extern void _dv_pgm_copy_pal_c_block_mmx(short * dst, unsigned char * src);
668
779
extern void _dv_pgm_copy_ntsc_c_block_mmx(short * dst, unsigned char * src);
 
780
#else
 
781
extern void _dv_pgm_copy_y_block_mmx_x86_64(short * dst, unsigned char * src);
 
782
extern void _dv_pgm_copy_pal_c_block_mmx_x86_64(short * dst, unsigned char * src);
 
783
extern void _dv_pgm_copy_ntsc_c_block_mmx_x86_64(short * dst, unsigned char * src);
669
784
#endif
670
785
 
671
786
static void pgm_fill_macroblock(dv_macroblock_t *mb, int isPAL)
673
788
        int y = mb->y;
674
789
        int x = mb->x;
675
790
        dv_block_t* bl = mb->b;
676
 
#if !ARCH_X86
 
791
#if (!ARCH_X86) && (!ARCH_X86_64)
677
792
        if (isPAL) { /* PAL */
678
793
                int i,j;
679
794
                for (j = 0; j < 8; j++) {
743
858
                                ? DV_DCT_248 : DV_DCT_88;
744
859
                }
745
860
        }
746
 
#else
 
861
#elif ARCH_X86
747
862
        if (isPAL) { /* PAL */
748
863
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
749
864
                unsigned char* img_cr = real_readbuf 
811
926
        finish_mb_mmx(mb);
812
927
 
813
928
        emms();
 
929
#else
 
930
 
 
931
 
 
932
        if (isPAL) { /* PAL */
 
933
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
934
                unsigned char* img_cr = real_readbuf 
 
935
                        + DV_WIDTH * DV_PAL_HEIGHT + DV_WIDTH / 2;
 
936
                unsigned char* img_cb = real_readbuf 
 
937
                        + DV_WIDTH * DV_PAL_HEIGHT;
 
938
 
 
939
                _dv_pgm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
940
                _dv_pgm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
941
                _dv_pgm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
942
                _dv_pgm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
943
                _dv_pgm_copy_pal_c_block_mmx_x86_64(bl[4].coeffs,
 
944
                                         img_cr + y * DV_WIDTH / 2 + x / 2);
 
945
                _dv_pgm_copy_pal_c_block_mmx_x86_64(bl[5].coeffs,
 
946
                                         img_cb + y * DV_WIDTH / 2 + x / 2);
 
947
        } else if (x == DV_WIDTH- 16) {  /* rightmost NTSC block */
 
948
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
949
#if 0
 
950
                unsigned char* img_cr = real_readbuf 
 
951
                        + (isPAL ? (DV_WIDTH * DV_PAL_HEIGHT)
 
952
                           : (DV_WIDTH * DV_NTSC_HEIGHT)) + DV_WIDTH / 2;
 
953
                unsigned char* img_cb = real_readbuf 
 
954
                        + (isPAL ? (DV_WIDTH * DV_PAL_HEIGHT) 
 
955
                           : (DV_WIDTH * DV_NTSC_HEIGHT));
 
956
#endif
 
957
                int i,j;
 
958
 
 
959
                _dv_pgm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
960
                _dv_pgm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
961
                _dv_pgm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
962
                _dv_pgm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
963
 
 
964
                for (j = 0; j < 8; j++) {
 
965
                        for (i = 0; i < 4; i++) {
 
966
                                bl[4].coeffs[8*j + i*2] = 
 
967
                                        bl[4].coeffs[8*j + i*2 + 1] = 
 
968
                                        pgm_get_cr_ntsc(y/2 + j, x/2 + i * 2);
 
969
                                bl[5].coeffs[8*j + i*2] = 
 
970
                                        bl[5].coeffs[8*j + i*2 + 1] = 
 
971
                                        pgm_get_cb_ntsc(y/2 + j, x/2 + i * 2);
 
972
                                bl[4].coeffs[8*j + (i+4)*2] = 
 
973
                                        bl[4].coeffs[8*j + (i+4)*2 + 1] = 
 
974
                                        pgm_get_cr_ntsc(y/2 + j +8, x/2 + i * 2);
 
975
                                bl[5].coeffs[8*j + (i+4)*2] = 
 
976
                                        bl[5].coeffs[8*j + (i+4)*2 + 1] = 
 
977
                                        pgm_get_cb_ntsc(y/2 + j +8, x/2 + i * 2);
 
978
                        }
 
979
                }
 
980
        } else {                              /* NTSC */
 
981
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
982
                unsigned char* img_cr = real_readbuf 
 
983
                        + DV_WIDTH * DV_NTSC_HEIGHT + DV_WIDTH / 2;
 
984
                unsigned char* img_cb = real_readbuf 
 
985
                        + DV_WIDTH * DV_NTSC_HEIGHT;
 
986
                _dv_pgm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
987
                _dv_pgm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
988
                _dv_pgm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 16);
 
989
                _dv_pgm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 24);
 
990
                _dv_pgm_copy_ntsc_c_block_mmx_x86_64(bl[4].coeffs,
 
991
                                          img_cr + y * DV_WIDTH / 2 + x / 2);
 
992
                _dv_pgm_copy_ntsc_c_block_mmx_x86_64(bl[5].coeffs,
 
993
                                          img_cb + y * DV_WIDTH / 2 + x / 2);
 
994
        }
 
995
 
 
996
        finish_mb_mmx_x86_64(mb);
 
997
 
 
998
        emms();
814
999
#endif
815
1000
        {
816
1001
                register int i, j;
937
1122
                                                << (DCT_YUV_PRECISION - 1);
938
1123
}
939
1124
 
940
 
#if !ARCH_X86
 
1125
#if (!ARCH_X86) && (!ARCH_X86_64)
941
1126
static inline short video_get_y(int y, int x)
942
1127
{
943
1128
        return (((short) real_readbuf[y * DV_WIDTH + x]) - 128)
963
1148
}
964
1149
 
965
1150
 
966
 
#else
 
1151
#elif ARCH_X86
967
1152
extern void _dv_video_copy_y_block_mmx(short * dst, unsigned char * src);
968
1153
extern void _dv_video_copy_pal_c_block_mmx(short * dst, unsigned char * src);
969
1154
extern void _dv_video_copy_ntsc_c_block_mmx(short * dst, unsigned char * src);
 
1155
#else
 
1156
extern void _dv_video_copy_y_block_mmx_x86_64(short * dst, unsigned char * src);
 
1157
extern void _dv_video_copy_pal_c_block_mmx_x86_64(short * dst, unsigned char * src);
 
1158
extern void _dv_video_copy_ntsc_c_block_mmx_x86_64(short * dst, unsigned char * src);
970
1159
#endif
971
1160
 
972
1161
 
975
1164
        int y = mb->y;
976
1165
        int x = mb->x;
977
1166
        dv_block_t* bl = mb->b;
978
 
#if !ARCH_X86
 
1167
#if (!ARCH_X86) && (!ARCH_X86_64)
979
1168
        if (isPAL) { /* PAL */
980
1169
                int i,j;
981
1170
                for (j = 0; j < 8; j++) {
1038
1227
                                ? DV_DCT_248 : DV_DCT_88;
1039
1228
                }
1040
1229
        }
1041
 
#else
 
1230
#elif ARCH_X86
1042
1231
        if (isPAL) { /* PAL */
1043
1232
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
1044
1233
                unsigned char* img_cr = real_readbuf 
1105
1294
        finish_mb_mmx(mb);
1106
1295
 
1107
1296
        emms();
 
1297
#else
 
1298
        if (isPAL) { /* PAL */
 
1299
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
1300
                unsigned char* img_cr = real_readbuf 
 
1301
                        + (isPAL ? DV_WIDTH * DV_PAL_HEIGHT * 3/2  
 
1302
                           : DV_WIDTH * DV_NTSC_HEIGHT * 3/2);
 
1303
                unsigned char* img_cb = real_readbuf 
 
1304
                        + (isPAL ? DV_WIDTH * DV_PAL_HEIGHT
 
1305
                           : DV_WIDTH * DV_NTSC_HEIGHT);
 
1306
 
 
1307
                _dv_video_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1308
                _dv_video_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1309
                _dv_video_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
1310
                _dv_video_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH+8);
 
1311
                _dv_video_copy_pal_c_block_mmx_x86_64(bl[4].coeffs,
 
1312
                                         img_cr + y * DV_WIDTH / 2 + x / 2);
 
1313
                _dv_video_copy_pal_c_block_mmx_x86_64(bl[5].coeffs,
 
1314
                                         img_cb + y * DV_WIDTH / 2 + x / 2);
 
1315
        } else if (x == DV_WIDTH- 16) {       /* rightmost NTSC block */
 
1316
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
1317
#if 0
 
1318
                unsigned char* img_cr = real_readbuf 
 
1319
                        + (isPAL ? DV_WIDTH * DV_PAL_HEIGHT * 3/2  
 
1320
                           : DV_WIDTH * DV_NTSC_HEIGHT * 3/2);
 
1321
                unsigned char* img_cb = real_readbuf 
 
1322
                        + (isPAL ? DV_WIDTH * DV_PAL_HEIGHT
 
1323
                           : DV_WIDTH * DV_NTSC_HEIGHT);
 
1324
#endif
 
1325
                int i,j;
 
1326
 
 
1327
                _dv_video_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1328
                _dv_video_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1329
                _dv_video_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
1330
                _dv_video_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH+8);
 
1331
                
 
1332
 
 
1333
                for (j = 0; j < 8; j++) {
 
1334
                        for (i = 0; i < 4; i++) {
 
1335
                                bl[4].coeffs[8*j + i] = 
 
1336
                                        video_get_cr_ntsc(y/2+j, x/2+i);
 
1337
                                bl[5].coeffs[8*j + i] = 
 
1338
                                        video_get_cb_ntsc(y/2+j, x/2+i);
 
1339
                                bl[4].coeffs[8*j + (i+4)] = 
 
1340
                                        video_get_cr_ntsc(y/2+j+8, x/2+i);
 
1341
                                bl[5].coeffs[8*j + (i+4)] = 
 
1342
                                        video_get_cb_ntsc(y/2+j+8, x/2+i);
 
1343
                        }
 
1344
                }
 
1345
        } else {                              /* NTSC */
 
1346
                unsigned char* start_y = real_readbuf + y * DV_WIDTH + x;
 
1347
                unsigned char* img_cr = real_readbuf 
 
1348
                        + DV_WIDTH * DV_NTSC_HEIGHT * 3 / 2;
 
1349
                unsigned char* img_cb = real_readbuf 
 
1350
                        + DV_WIDTH * DV_NTSC_HEIGHT;
 
1351
                _dv_video_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1352
                _dv_video_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1353
                _dv_video_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 16);
 
1354
                _dv_video_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 24);
 
1355
                _dv_video_copy_ntsc_c_block_mmx_x86_64(bl[4].coeffs,
 
1356
                                          img_cr + y * DV_WIDTH / 2 + x / 2);
 
1357
                _dv_video_copy_ntsc_c_block_mmx_x86_64(bl[5].coeffs,
 
1358
                                          img_cb + y * DV_WIDTH / 2 + x / 2);
 
1359
        }
 
1360
 
 
1361
        finish_mb_mmx_x86_64(mb);
 
1362
 
 
1363
        emms();
1108
1364
#endif
1109
1365
}
1110
1366
 
1151
1407
        dv_block_t *bl = mb->b;
1152
1408
 
1153
1409
 
1154
 
#if !ARCH_X86
 
1410
#if (!ARCH_X86) && (!ARCH_X86_64)
1155
1411
        if (dv_enc->isPAL) { /* PAL */
1156
1412
                int i,j;
1157
1413
                for (j = 0; j < 8; j++) {
1286
1542
                                ? DV_DCT_248 : DV_DCT_88;
1287
1543
                }
1288
1544
        }
1289
 
#else
 
1545
#elif ARCH_X86
1290
1546
        int b;
1291
1547
        int need_dct_248_rows[6];
1292
1548
 
1374
1630
        }
1375
1631
 
1376
1632
        emms();
 
1633
#else
 
1634
        int b;
 
1635
        int need_dct_248_rows[6];
 
1636
 
 
1637
        if (dv_enc->isPAL) { /* PAL or rightmost NTSC block */
 
1638
                short* start_y = dv_enc->img_y + y * DV_WIDTH + x;
 
1639
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1640
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1641
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
1642
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
1643
                _dv_ppm_copy_pal_c_block_mmx_x86_64(bl[4].coeffs,
 
1644
                                         dv_enc->img_cr+y * DV_WIDTH/2+ x/2);
 
1645
                _dv_ppm_copy_pal_c_block_mmx_x86_64(bl[5].coeffs,
 
1646
                                         dv_enc->img_cb+y * DV_WIDTH/2+ x/2);
 
1647
        } else if (x == DV_WIDTH- 16) { /* rightmost NTSC block */
 
1648
                short* start_y = dv_enc->img_y + y * DV_WIDTH + x;
 
1649
                int i,j;
 
1650
 
 
1651
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1652
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1653
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 8 * DV_WIDTH);
 
1654
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 8 * DV_WIDTH + 8);
 
1655
 
 
1656
                for (j = 0; j < 8; j++) {
 
1657
                        for (i = 0; i < 4; i++) {
 
1658
                                bl[4].coeffs[8 * j + i] = 
 
1659
                                        (dv_enc->img_cr[(y + j) * DV_WIDTH/2
 
1660
                                               + x / 2 + i*2]
 
1661
                                         + dv_enc->img_cr[(y + j) * DV_WIDTH/2 
 
1662
                                                 + x / 2 + 1 + i*2]) >> 1;
 
1663
                                bl[5].coeffs[8 * j + i] = 
 
1664
                                        (dv_enc->img_cb[(y + j) * DV_WIDTH/2
 
1665
                                               + x / 2 + i*2]
 
1666
                                         + dv_enc->img_cb[(y + j) * DV_WIDTH/2 
 
1667
                                                 + x / 2 + 1 + i*2]) >> 1;
 
1668
                                bl[4].coeffs[8 * j + i + 4] = 
 
1669
                                        (dv_enc->img_cr[(y + j + 8) * DV_WIDTH/2
 
1670
                                               + x / 2 + i*2]
 
1671
                                         + dv_enc->img_cr[(y + j + 8) * DV_WIDTH/2 
 
1672
                                                 + x / 2 + 1 + i*2]) >> 1;
 
1673
                                bl[5].coeffs[8 * j + i + 4] = 
 
1674
                                        (dv_enc->img_cb[(y + j + 8) * DV_WIDTH/2
 
1675
                                               + x / 2 + i*2]
 
1676
                                         + dv_enc->img_cb[(y + j + 8) * DV_WIDTH/2 
 
1677
                                                 + x / 2 + 1 + i*2]) >> 1;
 
1678
                        }
 
1679
                }
 
1680
        } else {                              /* NTSC */
 
1681
                short* start_y = dv_enc->img_y + y * DV_WIDTH + x;
 
1682
                _dv_ppm_copy_y_block_mmx_x86_64(bl[0].coeffs, start_y);
 
1683
                _dv_ppm_copy_y_block_mmx_x86_64(bl[1].coeffs, start_y + 8);
 
1684
                _dv_ppm_copy_y_block_mmx_x86_64(bl[2].coeffs, start_y + 16);
 
1685
                _dv_ppm_copy_y_block_mmx_x86_64(bl[3].coeffs, start_y + 24);
 
1686
                _dv_ppm_copy_ntsc_c_block_mmx_x86_64(bl[4].coeffs,
 
1687
                                          dv_enc->img_cr + y*DV_WIDTH/2 + x/2);
 
1688
                _dv_ppm_copy_ntsc_c_block_mmx_x86_64(bl[5].coeffs,
 
1689
                                          dv_enc->img_cb + y*DV_WIDTH/2 + x/2);
 
1690
        }
 
1691
 
 
1692
        
 
1693
        /* from finish_mb_mmx() */
 
1694
        if (dv_enc->force_dct != -1) {
 
1695
                for (b = 0; b < 6; b++) {
 
1696
                        bl[b].dct_mode = dv_enc->force_dct;
 
1697
                }
 
1698
        } else {
 
1699
                for (b = 0; b < 6; b++) {
 
1700
                        need_dct_248_rows[b]
 
1701
                                = _dv_need_dct_248_mmx_x86_64_rows(bl[b].coeffs) + 1;
 
1702
                }
 
1703
        }
 
1704
        _dv_transpose_mmx_x86_64(bl[0].coeffs);
 
1705
        _dv_transpose_mmx_x86_64(bl[1].coeffs);
 
1706
        _dv_transpose_mmx_x86_64(bl[2].coeffs);
 
1707
        _dv_transpose_mmx_x86_64(bl[3].coeffs);
 
1708
        _dv_transpose_mmx_x86_64(bl[4].coeffs);
 
1709
        _dv_transpose_mmx_x86_64(bl[5].coeffs);
 
1710
 
 
1711
        if (dv_enc->force_dct == -1) {
 
1712
                for (b = 0; b < 6; b++) {
 
1713
                        bl[b].dct_mode = 
 
1714
                                ((need_dct_248_rows[b] * 65536 / 
 
1715
                                  (_dv_need_dct_248_mmx_x86_64_rows(bl[b].coeffs) + 1))
 
1716
                                 > DCT_248_THRESHOLD) ? DV_DCT_248 : DV_DCT_88;
 
1717
                }
 
1718
        }
 
1719
 
 
1720
        emms();
1377
1721
#endif
1378
1722
}