~ubuntu-branches/ubuntu/vivid/x264/vivid-proposed

« back to all changes in this revision

Viewing changes to common/pixel.c

  • Committer: Package Import Robot
  • Author(s): Reinhard Tartler
  • Date: 2014-02-13 23:27:06 UTC
  • mfrom: (1.4.5) (21.1.8 sid)
  • Revision ID: package-import@ubuntu.com-20140213232706-pt0zdfm6orvn8r8w
Tags: 2:0.142.2389+git956c8d8-1
Update to new upstream snapshot

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 * pixel.c: pixel metrics
3
3
 *****************************************************************************
4
 
 * Copyright (C) 2003-2013 x264 project
 
4
 * Copyright (C) 2003-2014 x264 project
5
5
 *
6
6
 * Authors: Loren Merritt <lorenm@u.washington.edu>
7
7
 *          Laurent Aimar <fenrir@via.ecp.fr>
555
555
 
556
556
#if HAVE_MMX
557
557
#if HIGH_BIT_DEPTH
 
558
#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
 
559
#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
558
560
#define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
559
561
#define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
560
562
#define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
561
563
INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
562
 
INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
 
564
INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
 
565
INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
 
566
INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
563
567
INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
564
568
INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
 
569
INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
 
570
INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
565
571
INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
566
572
INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
 
573
INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
 
574
INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
567
575
INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
 
576
INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
 
577
INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
568
578
#else
569
579
#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
570
580
INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
845
855
    if( cpu&X264_CPU_MMX2 )
846
856
    {
847
857
        INIT7( sad, _mmx2 );
 
858
        INIT7_NAME( sad_aligned, sad, _mmx2 );
848
859
        INIT7( sad_x3, _mmx2 );
849
860
        INIT7( sad_x4, _mmx2 );
850
861
        INIT8( satd, _mmx2 );
867
878
        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
868
879
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
869
880
        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
 
881
        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
 
882
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
870
883
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
871
884
        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
872
885
    }
908
921
        pixf->asd8 = x264_pixel_asd8_sse2;
909
922
        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
910
923
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
 
924
        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
 
925
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
911
926
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
912
927
    }
913
928
    if( cpu&X264_CPU_SSE2_IS_FAST )
923
938
    if( cpu&X264_CPU_SSSE3 )
924
939
    {
925
940
        INIT4_NAME( sad_aligned, sad, _ssse3_aligned );
 
941
        pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_ssse3;
 
942
        pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_ssse3;
926
943
        INIT7( sad, _ssse3 );
927
944
        INIT7( sad_x3, _ssse3 );
928
945
        INIT7( sad_x4, _ssse3 );
945
962
        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
946
963
        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
947
964
        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
 
965
        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
 
966
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
948
967
        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
949
968
    }
950
969
    if( cpu&X264_CPU_SSE4 )
960
979
#if ARCH_X86_64
961
980
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
962
981
#endif
 
982
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
963
983
    }
964
984
    if( cpu&X264_CPU_AVX )
965
985
    {
 
986
        INIT5_NAME( sad_aligned, sad, _ssse3 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
966
987
        INIT_ADS( _avx );
967
988
        INIT6( satd, _avx );
968
989
        pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_avx;
981
1002
#if ARCH_X86_64
982
1003
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
983
1004
#endif
 
1005
        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
984
1006
    }
985
1007
    if( cpu&X264_CPU_XOP )
986
1008
    {
987
1009
        pixf->vsad = x264_pixel_vsad_xop;
988
1010
        pixf->asd8 = x264_pixel_asd8_xop;
989
1011
    }
 
1012
    if( cpu&X264_CPU_AVX2 )
 
1013
    {
 
1014
        INIT2( ssd, _avx2 );
 
1015
        INIT2( sad, _avx2 );
 
1016
        INIT2_NAME( sad_aligned, sad, _avx2 );
 
1017
        INIT2( sad_x3, _avx2 );
 
1018
        INIT2( sad_x4, _avx2 );
 
1019
        pixf->vsad = x264_pixel_vsad_avx2;
 
1020
        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2;
 
1021
        pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_avx2;
 
1022
    }
990
1023
#endif // HAVE_MMX
991
1024
#else // !HIGH_BIT_DEPTH
992
1025
#if HAVE_MMX
1083
1116
        pixf->satd[PIXEL_4x16]   = x264_pixel_satd_4x16_sse2;
1084
1117
        INIT6( satd_x3, _sse2 );
1085
1118
        INIT6( satd_x4, _sse2 );
1086
 
        if( !(cpu&X264_CPU_STACK_MOD4) )
1087
 
        {
1088
 
            INIT4( hadamard_ac, _sse2 );
1089
 
        }
 
1119
        INIT4( hadamard_ac, _sse2 );
1090
1120
        INIT_ADS( _sse2 );
1091
1121
        pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;
1092
1122
        pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_sse2;
1107
1137
               pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
1108
1138
           }
1109
1139
        }
1110
 
 
1111
 
        if( cpu&X264_CPU_SSE_MISALIGN )
1112
 
        {
1113
 
            INIT2( sad_x3, _sse2_misalign );
1114
 
            INIT2( sad_x4, _sse2_misalign );
1115
 
        }
1116
1140
    }
1117
1141
 
1118
1142
    if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
1136
1160
 
1137
1161
    if( cpu&X264_CPU_SSSE3 )
1138
1162
    {
 
1163
        INIT4( hadamard_ac, _ssse3 );
1139
1164
        if( !(cpu&X264_CPU_STACK_MOD4) )
1140
1165
        {
1141
 
            INIT4( hadamard_ac, _ssse3 );
1142
1166
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_ssse3;
1143
1167
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_ssse3;
1144
1168
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_ssse3;
1155
1179
            pixf->satd[PIXEL_4x16]  = x264_pixel_satd_4x16_ssse3_atom;
1156
1180
            INIT6( satd_x3, _ssse3_atom );
1157
1181
            INIT6( satd_x4, _ssse3_atom );
1158
 
            if( !(cpu&X264_CPU_STACK_MOD4) )
1159
 
            {
1160
 
                INIT4( hadamard_ac, _ssse3_atom );
1161
 
            }
 
1182
            INIT4( hadamard_ac, _ssse3_atom );
1162
1183
#if ARCH_X86_64
1163
1184
            pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_ssse3_atom;
1164
1185
#endif
1190
1211
            INIT2( sad_x3, _cache64_ssse3 );
1191
1212
            INIT2( sad_x4, _cache64_ssse3 );
1192
1213
        }
 
1214
        else
 
1215
        {
 
1216
            INIT2( sad_x3, _ssse3 );
 
1217
            INIT5( sad_x4, _ssse3 );
 
1218
        }
1193
1219
        if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
1194
1220
        {
1195
1221
            INIT5( ssd, _sse2 ); /* on conroe, sse2 is faster for width8/16 */
1201
1227
        INIT8( satd, _sse4 );
1202
1228
        INIT7( satd_x3, _sse4 );
1203
1229
        INIT7( satd_x4, _sse4 );
 
1230
        INIT4( hadamard_ac, _sse4 );
1204
1231
        if( !(cpu&X264_CPU_STACK_MOD4) )
1205
1232
        {
1206
 
            INIT4( hadamard_ac, _sse4 );
1207
1233
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_sse4;
1208
1234
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_sse4;
1209
1235
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_sse4;
1221
1247
 
1222
1248
    if( cpu&X264_CPU_AVX )
1223
1249
    {
 
1250
        INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
 
1251
        INIT2( sad_x3, _avx );
 
1252
        INIT2( sad_x4, _avx );
1224
1253
        INIT8( satd, _avx );
1225
1254
        INIT7( satd_x3, _avx );
1226
1255
        INIT7( satd_x4, _avx );
1227
1256
        INIT_ADS( _avx );
 
1257
        INIT4( hadamard_ac, _avx );
1228
1258
        if( !(cpu&X264_CPU_STACK_MOD4) )
1229
1259
        {
1230
 
            INIT4( hadamard_ac, _avx );
1231
1260
            pixf->intra_sad_x9_4x4  = x264_intra_sad_x9_4x4_avx;
1232
1261
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_avx;
1233
1262
            pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx;
1255
1284
        INIT7( satd, _xop );
1256
1285
        INIT7( satd_x3, _xop );
1257
1286
        INIT7( satd_x4, _xop );
 
1287
        INIT4( hadamard_ac, _xop );
1258
1288
        if( !(cpu&X264_CPU_STACK_MOD4) )
1259
1289
        {
1260
 
            INIT4( hadamard_ac, _xop );
1261
1290
            pixf->intra_satd_x9_4x4 = x264_intra_satd_x9_4x4_xop;
1262
1291
        }
1263
1292
        INIT5( ssd, _xop );
1273
1302
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
1274
1303
#endif
1275
1304
    }
 
1305
 
 
1306
    if( cpu&X264_CPU_AVX2 )
 
1307
    {
 
1308
        INIT2( ssd, _avx2 );
 
1309
        INIT2( sad_x3, _avx2 );
 
1310
        INIT2( sad_x4, _avx2 );
 
1311
        INIT4( satd, _avx2 );
 
1312
        INIT2( hadamard_ac, _avx2 );
 
1313
        INIT_ADS( _avx2 );
 
1314
        pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_avx2;
 
1315
        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx2;
 
1316
        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_avx2;
 
1317
        pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_avx2;
 
1318
        pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_avx2;
 
1319
        pixf->intra_sad_x9_8x8  = x264_intra_sad_x9_8x8_avx2;
 
1320
        pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_avx2;
 
1321
        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2;
 
1322
#if ARCH_X86_64
 
1323
        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx2;
 
1324
#endif
 
1325
    }
1276
1326
#endif //HAVE_MMX
1277
1327
 
1278
1328
#if HAVE_ARMV6