445
445
memset( pixf, 0, sizeof(*pixf) );
447
#define INIT( name, cpu ) \
447
#define INIT2( name, cpu ) \
448
448
pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
449
pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu;\
449
pixf->name[PIXEL_16x8] = x264_pixel_##name##_16x8##cpu;
450
#define INIT4( name, cpu ) \
450
452
pixf->name[PIXEL_8x16] = x264_pixel_##name##_8x16##cpu;\
451
pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu;\
452
pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu;\
453
pixf->name[PIXEL_8x8] = x264_pixel_##name##_8x8##cpu;
454
#define INIT5( name, cpu ) \
456
pixf->name[PIXEL_8x4] = x264_pixel_##name##_8x4##cpu;
457
#define INIT7( name, cpu ) \
453
459
pixf->name[PIXEL_4x8] = x264_pixel_##name##_4x8##cpu;\
454
460
pixf->name[PIXEL_4x4] = x264_pixel_##name##_4x4##cpu;
462
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16;
463
pixf->sa8d[PIXEL_16x8] = x264_pixel_sa8d_16x8;
464
pixf->sa8d[PIXEL_8x16] = x264_pixel_sa8d_8x16;
465
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8;
466
469
pixf->ssim_4x4x2_core = ssim_4x4x2_core;
467
470
pixf->ssim_end4 = ssim_end4;
474
477
if( cpu&X264_CPU_MMX )
479
482
if( cpu&X264_CPU_MMXEXT )
481
INIT( sad, _mmxext );
482
INIT( sad_x3, _mmxext );
483
INIT( sad_x4, _mmxext );
484
INIT( satd, _mmxext );
486
pixf->sad_pde[PIXEL_16x16] = x264_pixel_sad_pde_16x16_mmxext;
487
pixf->sad_pde[PIXEL_16x8 ] = x264_pixel_sad_pde_16x8_mmxext;
488
pixf->sad_pde[PIXEL_8x16 ] = x264_pixel_sad_pde_8x16_mmxext;
484
INIT7( sad, _mmxext );
485
INIT7( sad_x3, _mmxext );
486
INIT7( sad_x4, _mmxext );
487
INIT7( satd, _mmxext );
490
489
pixf->ads[PIXEL_16x16] = x264_pixel_ads4_mmxext;
491
490
pixf->ads[PIXEL_16x8 ] = x264_pixel_ads2_mmxext;
496
495
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext;
497
496
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
498
497
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext;
499
if( cpu&X264_CPU_CACHELINE_SPLIT )
501
if( cpu&X264_CPU_CACHELINE_32 )
503
INIT5( sad, _cache32_mmxext );
504
INIT4( sad_x3, _cache32_mmxext );
505
INIT4( sad_x4, _cache32_mmxext );
509
INIT5( sad, _cache64_mmxext );
510
INIT4( sad_x3, _cache64_mmxext );
511
INIT4( sad_x4, _cache64_mmxext );
515
if( cpu&X264_CPU_CACHELINE_SPLIT )
517
pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_cache64_mmxext;
518
pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_cache64_mmxext;
519
pixf->sad[PIXEL_8x4] = x264_pixel_sad_8x4_cache64_mmxext;
520
pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_cache64_mmxext;
521
pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_cache64_mmxext;
522
pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_mmxext;
523
pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_cache64_mmxext;
500
526
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
501
527
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext;
505
531
// disable on AMD processors since it is slower
506
532
if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
508
pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_sse2;
509
pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_sse2;
511
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_sse2;
512
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_sse2;
513
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_sse2;
514
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_sse2;
515
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_sse2;
517
pixf->sad_x3[PIXEL_16x16] = x264_pixel_sad_x3_16x16_sse2;
518
pixf->sad_x3[PIXEL_16x8 ] = x264_pixel_sad_x3_16x8_sse2;
519
pixf->sad_x4[PIXEL_16x16] = x264_pixel_sad_x4_16x16_sse2;
520
pixf->sad_x4[PIXEL_16x8 ] = x264_pixel_sad_x4_16x8_sse2;
535
INIT2( sad_x3, _sse2 );
536
INIT2( sad_x4, _sse2 );
537
INIT5( satd, _sse2 );
540
if( cpu&X264_CPU_CACHELINE_SPLIT )
542
INIT2( sad, _cache64_sse2 );
543
INIT2( sad_x3, _cache64_sse2 );
544
INIT2( sad_x4, _cache64_sse2 );
522
548
// these are faster on both Intel and AMD
523
549
if( cpu&X264_CPU_SSE2 )
525
pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_sse2;
526
pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_sse2;
527
552
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2;
528
553
pixf->ssim_end4 = x264_pixel_ssim_end4_sse2;
563
if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) )
566
INIT2( sad_x3, _sse3 );
567
INIT2( sad_x4, _sse3 );
537
570
if( cpu&X264_CPU_SSSE3 )
540
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
541
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
542
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
543
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3;
544
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3;
572
INIT5( satd, _ssse3 );
545
573
#ifdef ARCH_X86_64
546
574
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
547
575
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
577
if( cpu&X264_CPU_CACHELINE_SPLIT )
579
INIT2( sad, _cache64_ssse3 );
580
INIT2( sad_x3, _cache64_ssse3 );
581
INIT2( sad_x4, _cache64_ssse3 );
551
585
#endif //HAVE_MMX
559
593
#ifdef ARCH_UltraSparc
560
pixf->sad[PIXEL_8x8] = x264_pixel_sad_8x8_vis;
561
pixf->sad[PIXEL_8x16] = x264_pixel_sad_8x16_vis;
562
pixf->sad[PIXEL_16x8] = x264_pixel_sad_16x8_vis;
563
pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_vis;
565
pixf->sad_x3[PIXEL_8x8] = x264_pixel_sad_x3_8x8_vis;
566
pixf->sad_x3[PIXEL_8x16] = x264_pixel_sad_x3_8x16_vis;
567
pixf->sad_x3[PIXEL_16x8] = x264_pixel_sad_x3_16x8_vis;
568
pixf->sad_x3[PIXEL_16x16] = x264_pixel_sad_x3_16x16_vis;
570
pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_vis;
571
pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_vis;
572
pixf->sad_x4[PIXEL_16x8] = x264_pixel_sad_x4_16x8_vis;
573
pixf->sad_x4[PIXEL_16x16] = x264_pixel_sad_x4_16x16_vis;
595
INIT4( sad_x3, _vis );
596
INIT4( sad_x4, _vis );
576
599
pixf->ads[PIXEL_8x16] =