~ubuntu-branches/ubuntu/saucy/x264/saucy-updates

« back to all changes in this revision

Viewing changes to encoder/analyse.c

Committer: Bazaar Package Importer
Author(s): Lionel Le Folgoc
Date: 2009-01-15 23:15:42 UTC
mto: (12.1.1 sid) (1.3.1)
mto: This revision was merged to the branch mainline in revision 14.
Revision ID: james.westby@ubuntu.com-20090115231542-19v12f85z0e7zev7

Tags: upstream-0.svn20081230

Import upstream version 0.svn20081230

files added:
common/vlc.c

common/x86/cpu-a.asm

common/x86/util.h

common/x86/x86util.asm

doc/standards.txt

tools/checkasm-a.asm

files removed:
ChangeLog

build/win32/libx264.dsp

build/win32/x264.dsp

build/win32/x264.dsw

common/vlc.h

common/x86/cpu-32.asm

common/x86/cpu-64.asm

common/x86/x86inc-32.asm

common/x86/x86inc-64.asm

encoder/eval.c

gtk/Makefile

gtk/fr.po

gtk/test.c

gtk/x264.ico

gtk/x264.png

gtk/x264_gtk.c

gtk/x264_gtk.h

gtk/x264_gtk_bitrate.c

gtk/x264_gtk_bitrate.h

gtk/x264_gtk_cqm.c

gtk/x264_gtk_cqm.h

gtk/x264_gtk_demuxers.h

gtk/x264_gtk_encode.c

gtk/x264_gtk_encode_encode.c

gtk/x264_gtk_encode_encode.h

gtk/x264_gtk_encode_main_window.c

gtk/x264_gtk_encode_main_window.h

gtk/x264_gtk_encode_private.h

gtk/x264_gtk_encode_status_window.c

gtk/x264_gtk_encode_status_window.h

gtk/x264_gtk_enum.h

gtk/x264_gtk_i18n.h

gtk/x264_gtk_mb.c

gtk/x264_gtk_mb.h

gtk/x264_gtk_more.c

gtk/x264_gtk_more.h

gtk/x264_gtk_private.h

gtk/x264_gtk_rc.c

gtk/x264_gtk_rc.h

gtk/x264gtk.rc

tools/Jamfile

tools/avc2avi.c

tools/checkasm-32.asm

tools/x264-rd.sh

files modified:
AUTHORS

Makefile

build/win32/libx264.vcproj

build/win32/x264.sln

build/win32/x264.vcproj

common/bs.h

common/cabac.c

common/cabac.h

common/common.c

common/common.h

common/cpu.c

common/cpu.h

common/dct.c

common/dct.h

common/display-x11.c

common/display.h

common/frame.c

common/frame.h

common/macroblock.c

common/macroblock.h

common/mc.c

common/mc.h

common/mdate.c

common/osdep.h

common/pixel.c

common/pixel.h

common/ppc/dct.c

common/ppc/dct.h

common/ppc/deblock.c

common/ppc/mc.c

common/ppc/mc.h

common/ppc/pixel.c

common/ppc/pixel.h

common/ppc/ppccommon.h

common/ppc/predict.c

common/ppc/predict.h

common/ppc/quant.c

common/ppc/quant.h

common/predict.c

common/predict.h

common/quant.c

common/quant.h

common/set.c

common/set.h

common/sparc/pixel.asm

common/sparc/pixel.h

common/visualize.c

common/visualize.h

common/x86/cabac-a.asm

common/x86/dct-32.asm

common/x86/dct-64.asm

common/x86/dct-a.asm

common/x86/dct.h

common/x86/deblock-a.asm

common/x86/mc-a.asm

common/x86/mc-a2.asm

common/x86/mc-c.c

common/x86/mc.h

common/x86/pixel-32.asm

common/x86/pixel-a.asm

common/x86/pixel.h

common/x86/predict-a.asm

common/x86/predict-c.c

common/x86/predict.h

common/x86/quant-a.asm

common/x86/quant.h

common/x86/sad-a.asm

common/x86/x86inc.asm

configure

encoder/analyse.c

encoder/analyse.h

encoder/cabac.c

encoder/cavlc.c

encoder/encoder.c

encoder/macroblock.c

encoder/macroblock.h

encoder/me.c

encoder/me.h

encoder/ratecontrol.c

encoder/ratecontrol.h

encoder/rdo.c

encoder/set.c

encoder/set.h

encoder/slicetype.c

extras/getopt.c

extras/getopt.h

extras/stdint.h

matroska.c

matroska.h

muxers.c

muxers.h

tools/checkasm.c

tools/countquant_x264.pl

tools/regression-test.pl *

tools/xyuv.c

version.sh

x264.c

x264.h

Show diffs side-by-side

added added

removed removed

encoder/analyse.c

/*****************************************************************************

* analyse.c: h264 encoder library

*****************************************************************************

* $Id: analyse.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $

* Authors: Laurent Aimar <fenrir@via.ecp.fr>

* Loren Merritt <lorenm@u.washington.edu>

* Jason Garrett-Glaser <darkshikari@gmail.com>

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.

*****************************************************************************/

#include <math.h>

/* 8x8 */

int i_cost8x8;

/* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */

DECLARE_ALIGNED_8( int mvc[32][5][2] );

DECLARE_ALIGNED_4( int16_t mvc[32][5][2] );

x264_me_t me8x8[4];

/* Sub 4x4 */

int i_lambda2;

int i_qp;

int16_t *p_cost_mv;

int b_mbrd;

int i_mbrd;

/* I: Intra part */

int i_satd_i4x4;

int i_predict4x4[16];

100

int i_satd_pcm;

101

100

102

/* Chroma part */

101

103

int i_satd_i8x8chroma;

102

104

int i_satd_i8x8chroma_dir[4];

128

130

} x264_mb_analysis_t;

129

131

130

132

/* lambda = pow(2,qp/6-2) */

131

static const int i_qp0_cost_table[52] = {

133

const int x264_lambda_tab[52] = {

132

134

1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */

133

135

1, 1, 1, 1, /* 8-11 */

134

136

1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */

139

141

};

140

142

141

143

/* lambda2 = pow(lambda,2) * .9 * 256 */

142

static const int i_qp0_cost2_table[52] = {

144

const int x264_lambda2_tab[52] = {

143

145

14, 18, 22, 28, 36, 45, 57, 72, /* 0 - 7 */

144

146

91, 115, 145, 182, 230, 290, 365, 460, /* 8 - 15 */

145

147

580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16 - 23 */

202

204

203

205

static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )

204

206

{

207

int i = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);

208

/* mbrd == 1 -> RD mode decision */

209

/* mbrd == 2 -> RD refinement */

210

a->i_mbrd = (i>=6) + (i>=8);

205

211

/* conduct the analysis using this lamda and QP */

206

212

a->i_qp = h->mb.i_qp = i_qp;

207

h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];

208

a->i_lambda = i_qp0_cost_table[i_qp];

209

a->i_lambda2 = i_qp0_cost2_table[i_qp];

210

a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 &&

211

( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo );

212

213

h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];

214

a->i_lambda = x264_lambda_tab[i_qp];

215

a->i_lambda2 = x264_lambda2_tab[i_qp];

213

216

h->mb.i_me_method = h->param.analyse.i_me_method;

214

217

h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;

215

218

h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P

216

219

&& h->mb.i_subpel_refine >= 5;

217

h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->b_mbrd;

220

h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;

218

221

h->mb.b_transform_8x8 = 0;

219

222

h->mb.b_noise_reduction = 0;

220

223

224

227

a->i_satd_i4x4 =

225

228

a->i_satd_i8x8chroma = COST_MAX;

226

229

230

/* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */

231

a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;

232

227

233

a->b_fast_intra = 0;

228

234

h->mb.i_skip_intra =

229

235

h->mb.b_lossless ? 0 :

230

a->b_mbrd ? 2 :

236

a->i_mbrd ? 2 :

231

237

!h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;

232

238

233

239

/* II: Inter part P/B frame */

340

346

a->b_fast_intra = 1;

341

347

}

342

348

}

349

h->mb.b_skip_mc = 0;

343

350

}

344

351

}

345

352

462

469

}

463

470

}

464

471

472

/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */

473

static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )

474

{

475

DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );

476

DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );

477

DECLARE_ALIGNED_16( uint8_t zero[16*FDEC_STRIDE] ) = {0};

478

int i;

479

480

if( do_both_dct || h->mb.b_transform_8x8 )

481

{

482

h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], zero );

483

for( i = 0; i < 4; i++ )

484

h->zigzagf.scan_8x8( h->mb.pic.fenc_dct8[i], dct8x8[i] );

485

}

486

if( do_both_dct || !h->mb.b_transform_8x8 )

487

{

488

h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], zero );

489

for( i = 0; i < 16; i++ )

490

h->zigzagf.scan_4x4( h->mb.pic.fenc_dct4[i], dct4x4[i] );

491

}

492

}

493

494

/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */

495

static inline void x264_mb_cache_fenc_satd( x264_t *h )

496

{

497

DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};

498

uint8_t *fenc;

499

int x, y, satd_sum = 0, sa8d_sum = 0;

500

if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )

501

x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );

502

if( !h->mb.i_psy_rd )

503

return;

504

for( y = 0; y < 4; y++ )

505

for( x = 0; x < 4; x++ )

506

{

507

fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;

508

h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )

509

- (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);

510

satd_sum += h->mb.pic.fenc_satd[y][x];

511

}

512

for( y = 0; y < 2; y++ )

513

for( x = 0; x < 2; x++ )

514

{

515

fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;

516

h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )

517

- (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);

518

sa8d_sum += h->mb.pic.fenc_sa8d[y][x];

519

}

520

h->mb.pic.fenc_satd_sum = satd_sum;

521

h->mb.pic.fenc_sa8d_sum = sa8d_sum;

522

}

523

465

524

static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )

466

525

{

467

526

int i;

493

552

h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE, p_srcc[0], FENC_STRIDE );

494

553

satdv[I_PRED_CHROMA_P] =

495

554

h->pixf.mbcmp[PIXEL_8x8]( p_dstc[1], FDEC_STRIDE, p_srcc[1], FENC_STRIDE );

496

555

497

556

for( i=0; i<i_max; i++ )

498

557

{

499

558

int i_mode = predict_mode[i];

512

571

int i_mode = predict_mode[i];

513

572

514

573

/* we do the prediction */

515

h->predict_8x8c[i_mode]( p_dstc[0] );

516

h->predict_8x8c[i_mode]( p_dstc[1] );

574

if( h->mb.b_lossless )

575

x264_predict_lossless_8x8_chroma( h, i_mode );

576

else

577

{

578

h->predict_8x8c[i_mode]( p_dstc[0] );

579

h->predict_8x8c[i_mode]( p_dstc[1] );

580

}

517

581

518

582

/* we calculate the cost */

519

583

i_satd = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,

539

603

int i, idx;

540

604

int i_max;

541

605

int predict_mode[9];

542

int b_merged_satd = h->pixf.intra_satd_x3_16x16 && h->pixf.mbcmp[0] == h->pixf.satd[0];

606

int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;

543

607

544

608

/*---------------- Try all mode and calculate their score ---------------*/

545

609

548

612

549

613

if( b_merged_satd && i_max == 4 )

550

614

{

551

h->pixf.intra_satd_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );

615

h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );

552

616

h->predict_16x16[I_PRED_16x16_P]( p_dst );

553

617

a->i_satd_i16x16_dir[I_PRED_16x16_P] =

554

618

h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );

564

628

{

565

629

int i_satd;

566

630

int i_mode = predict_mode[i];

567

h->predict_16x16[i_mode]( p_dst );

631

632

if( h->mb.b_lossless )

633

x264_predict_lossless_16x16( h, i_mode );

634

else

635

h->predict_16x16[i_mode]( p_dst );

568

636

569

637

i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +

570

638

a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );

583

651

if( flags & X264_ANALYSE_I8x8 )

584

652

{

585

653

DECLARE_ALIGNED_16( uint8_t edge[33] );

586

x264_pixel_cmp_t sa8d = (*h->pixf.mbcmp == *h->pixf.sad) ? h->pixf.sad[PIXEL_8x8] : h->pixf.sa8d[PIXEL_8x8];

587

int i_satd_thresh = a->b_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );

654

x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];

655

int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );

588

656

int i_cost = 0;

589

657

b_merged_satd = h->pixf.intra_sa8d_x3_8x8 && h->pixf.mbcmp[0] == h->pixf.satd[0];

590

658

606

674

607

675

if( b_merged_satd && i_max == 9 )

608

676

{

609

int satd[3];

677

int satd[9];

610

678

h->pixf.intra_sa8d_x3_8x8( p_src_by, edge, satd );

611

if( i_pred_mode < 3 )

612

satd[i_pred_mode] -= 3 * a->i_lambda;

679

satd[i_pred_mode] -= 3 * a->i_lambda;

613

680

for( i=2; i>=0; i-- )

614

681

{

615

682

int cost = a->i_satd_i8x8_dir[i][idx] = satd[i] + 4 * a->i_lambda;

625

692

int i_satd;

626

693

int i_mode = predict_mode[i];

627

694

628

h->predict_8x8[i_mode]( p_dst_by, edge );

695

if( h->mb.b_lossless )

696

x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );

697

else

698

h->predict_8x8[i_mode]( p_dst_by, edge );

629

699

630

700

i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE )

631

701

+ a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);

660

730

a->i_satd_i8x8 = COST_MAX;

661

731

i_cost = i_cost * 4/(idx+1);

662

732

}

663

if( X264_MIN(i_cost, a->i_satd_i16x16) > i_satd_inter*(5+a->b_mbrd)/4 )

733

if( X264_MIN(i_cost, a->i_satd_i16x16) > i_satd_inter*(5+!!a->i_mbrd)/4 )

664

734

return;

665

735

}

666

736

670

740

int i_cost;

671

741

int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );

672

742

b_merged_satd = h->pixf.intra_satd_x3_4x4 && h->pixf.mbcmp[0] == h->pixf.satd[0];

673

if( a->b_mbrd )

743

if( a->i_mbrd )

674

744

i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;

675

745

676

746

i_cost = a->i_lambda * 24; /* from JVT (SATD0) */

679

749

680

750

for( idx = 0;; idx++ )

681

751

{

682

int x = block_idx_x[idx];

683

int y = block_idx_y[idx];

684

uint8_t *p_src_by = p_src + 4*x + 4*y*FENC_STRIDE;

685

uint8_t *p_dst_by = p_dst + 4*x + 4*y*FDEC_STRIDE;

752

uint8_t *p_src_by = p_src + block_idx_xy_fenc[idx];

753

uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];

686

754

int i_best = COST_MAX;

687

755

int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );

688

756

694

762

695

763

if( b_merged_satd && i_max >= 6 )

696

764

{

697

int satd[3];

765

int satd[9];

698

766

h->pixf.intra_satd_x3_4x4( p_src_by, p_dst_by, satd );

699

if( i_pred_mode < 3 )

700

satd[i_pred_mode] -= 3 * a->i_lambda;

767

satd[i_pred_mode] -= 3 * a->i_lambda;

701

768

for( i=2; i>=0; i-- )

702

769

COPY2_IF_LT( i_best, satd[i] + 4 * a->i_lambda,

703

770

a->i_predict4x4[idx], i );

710

777

{

711

778

int i_satd;

712

779

int i_mode = predict_mode[i];

713

714

h->predict_4x4[i_mode]( p_dst_by );

780

if( h->mb.b_lossless )

781

x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );

782

else

783

h->predict_4x4[i_mode]( p_dst_by );

715

784

716

785

i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,

717

786

p_src_by, FENC_STRIDE )

781

850

uint8_t *p_dst = h->mb.pic.p_fdec[0];

782

851

783

852

int i, j, idx, x, y;

784

int i_max, i_satd, i_best, i_mode, i_thresh;

853

int i_max, i_mode, i_thresh;

854

uint64_t i_satd, i_best;

785

855

int i_pred_mode;

786

856

int predict_mode[9];

787

857

h->mb.i_skip_intra = 0;

808

878

int i_nnz = 0;

809

879

for( idx = 0; idx < 16; idx++ )

810

880

{

811

uint8_t *p_src_by;

812

uint8_t *p_dst_by;

813

i_best = COST_MAX;

881

uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];

882

i_best = COST_MAX64;

814

883

815

884

i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );

816

x = block_idx_x[idx];

817

y = block_idx_y[idx];

818

885

819

p_src_by = p_src + 4*x + 4*y*FENC_STRIDE;

820

p_dst_by = p_dst + 4*x + 4*y*FDEC_STRIDE;

821

886

predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );

822

887

823

888

if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )

827

892

for( i = 0; i < i_max; i++ )

828

893

{

829

894

i_mode = predict_mode[i];

830

h->predict_4x4[i_mode]( p_dst_by );

895

if( h->mb.b_lossless )

896

x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );

897

else

898

h->predict_4x4[i_mode]( p_dst_by );

831

899

i_satd = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );

832

900

833

901

if( i_best > i_satd )

864

932

int j;

865

933

i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;

866

934

867

i_best = COST_MAX;

935

i_best = COST_MAX64;

868

936

i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );

869

937

x = idx&1;

870

938

y = idx>>1;

879

947

i_mode = predict_mode[i];

880

948

if( a->i_satd_i8x8_dir[i_mode][idx] > i_thresh )

881

949

continue;

882

h->predict_8x8[i_mode]( p_dst_by, edge );

950

if( h->mb.b_lossless )

951

x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );

952

else

953

h->predict_8x8[i_mode]( p_dst_by, edge );

883

954

i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );

884

955

885

956

if( i_best > i_satd )

923

994

924

995

if( i_max > 0 )

925

996

{

926

int i_chroma_lambda = i_qp0_cost2_table[h->mb.i_chroma_qp];

997

int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];

927

998

/* the previous thing encoded was x264_intra_rd(), so the pixels and

928

999

* coefs for the current chroma mode are still around, so we only

929

1000

* have to recount the bits. */

931

1002

for( i = 0; i < i_max; i++ )

932

1003

{

933

1004

i_mode = predict_mode[i];

934

h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );

935

h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );

1005

if( h->mb.b_lossless )

1006

x264_predict_lossless_8x8_chroma( h, i_mode );

1007

else

1008

{

1009

h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );

1010

h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );

1011

}

936

1012

/* if we've already found a mode that needs no residual, then

937

1013

* probably any mode with a residual will be worse.

938

1014

* so avoid dct on the remaining modes to improve speed. */

966

1042

static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )

967

1043

{

968

1044

x264_me_t m;

969

int i_ref;

970

int mvc[7][2], i_mvc;

1045

int i_ref, i_mvc;

1046

DECLARE_ALIGNED_4( int16_t mvc[8][2] );

971

1047

int i_halfpel_thresh = INT_MAX;

972

1048

int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;

973

1049

1009

1085

i_halfpel_thresh += i_ref_cost;

1010

1086

1011

1087

if( m.cost < a->l0.me16x16.cost )

1012

a->l0.me16x16 = m;

1088

h->mc.memcpy_aligned( &a->l0.me16x16, &m, sizeof(x264_me_t) );

1013

1089

1014

1090

/* save mv for predicting neighbors */

1015

a->l0.mvc[i_ref][0][0] =

1016

h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];

1017

a->l0.mvc[i_ref][0][1] =

1018

h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];

1091

*(uint32_t*)a->l0.mvc[i_ref][0] =

1092

*(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;

1019

1093

}

1020

1094

1021

1095

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );

1022

1096

assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 );

1023

1097

1024

1098

h->mb.i_type = P_L0;

1025

if( a->b_mbrd && a->l0.me16x16.i_ref == 0

1026

&& a->l0.me16x16.mv[0] == h->mb.cache.pskip_mv[0]

1027

&& a->l0.me16x16.mv[1] == h->mb.cache.pskip_mv[1] )

1099

if( a->i_mbrd )

1028

1100

{

1029

h->mb.i_partition = D_16x16;

1030

x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );

1031

a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );

1101

x264_mb_cache_fenc_satd( h );

1102

if( a->l0.me16x16.i_ref == 0 && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )

1103

{

1104

h->mb.i_partition = D_16x16;

1105

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );

1106

a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );

1107

}

1032

1108

}

1033

1109

}

1034

1110

1059

1135

}

1060

1136

1061

1137

for( i_ref = 0; i_ref <= i_maxref; i_ref++ )

1062

{

1063

a->l0.mvc[i_ref][0][0] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0];

1064

a->l0.mvc[i_ref][0][1] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1];

1065

}

1138

*(uint32_t*)a->l0.mvc[i_ref][0] = *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy];

1066

1139

1067

1140

for( i = 0; i < 4; i++ )

1068

1141

{

1077

1150

l0m->cost = INT_MAX;

1078

1151

for( i_ref = 0; i_ref <= i_maxref; i_ref++ )

1079

1152

{

1080

const int i_ref_cost = REF_COST( 0, i_ref );

1081

i_halfpel_thresh -= i_ref_cost;

1082

m.i_ref_cost = i_ref_cost;

1083

m.i_ref = i_ref;

1084

1085

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*x8, 8*y8 );

1086

x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );

1087

x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );

1088

x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_halfpel_thresh );

1089

1090

m.cost += i_ref_cost;

1091

i_halfpel_thresh += i_ref_cost;

1092

*(uint64_t*)a->l0.mvc[i_ref][i+1] = *(uint64_t*)m.mv;

1093

1094

if( m.cost < l0m->cost )

1095

*l0m = m;

1153

const int i_ref_cost = REF_COST( 0, i_ref );

1154

i_halfpel_thresh -= i_ref_cost;

1155

m.i_ref_cost = i_ref_cost;

1156

m.i_ref = i_ref;

1157

1158

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*x8, 8*y8 );

1159

x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );

1160

x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );

1161

x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_halfpel_thresh );

1162

1163

m.cost += i_ref_cost;

1164

i_halfpel_thresh += i_ref_cost;

1165

*(uint32_t*)a->l0.mvc[i_ref][i+1] = *(uint32_t*)m.mv;

1166

1167

if( m.cost < l0m->cost )

1168

h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );

1096

1169

}

1097

x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv[0], l0m->mv[1] );

1170

x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv );

1098

1171

x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, l0m->i_ref );

1099

1172

1100

1173

/* mb type cost */

1103

1176

1104

1177

a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +

1105

1178

a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;

1179

/* P_8x8 ref0 has no ref cost */

1180

if( !h->param.b_cabac && !(a->l0.me8x8[0].i_ref | a->l0.me8x8[1].i_ref |

1181

a->l0.me8x8[2].i_ref | a->l0.me8x8[3].i_ref) )

1182

a->l0.i_cost8x8 -= REF_COST( 0, 0 ) * 4;

1106

1183

h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =

1107

1184

h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;

1108

1185

}

1110

1187

static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )

1111

1188

{

1112

1189

const int i_ref = a->l0.me16x16.i_ref;

1113

const int i_ref_cost = REF_COST( 0, i_ref );

1190

const int i_ref_cost = h->param.b_cabac || i_ref ? REF_COST( 0, i_ref ) : 0;

1114

1191

uint8_t **p_fref = h->mb.pic.p_fref[0][i_ref];

1115

1192

uint8_t **p_fenc = h->mb.pic.p_fenc;

1116

1193

int i_mvc;

1117

int (*mvc)[2] = a->l0.mvc[i_ref];

1194

int16_t (*mvc)[2] = a->l0.mvc[i_ref];

1118

1195

int i;

1119

1196

1120

1197

/* XXX Needed for x264_mb_predict_mv */

1121

1198

h->mb.i_partition = D_8x8;

1122

1199

1123

1200

i_mvc = 1;

1124

*(uint64_t*)mvc[0] = *(uint64_t*)a->l0.me16x16.mv;

1201

*(uint32_t*)mvc[0] = *(uint32_t*)a->l0.me16x16.mv;

1125

1202

1126

1203

for( i = 0; i < 4; i++ )

1127

1204

{

1139

1216

x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );

1140

1217

x264_me_search( h, m, mvc, i_mvc );

1141

1218

1142

x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );

1219

x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, m->mv );

1143

1220

1144

*(uint64_t*)mvc[i_mvc] = *(uint64_t*)m->mv;

1221

*(uint32_t*)mvc[i_mvc] = *(uint32_t*)m->mv;

1145

1222

i_mvc++;

1146

1223

1147

1224

/* mb type cost */

1149

1226

m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];

1150

1227

}

1151

1228

1229

a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +

1230

a->l0.me8x8[2].cost + a->l0.me8x8[3].cost;

1152

1231

/* theoretically this should include 4*ref_cost,

1153

1232

* but 3 seems a better approximation of cabac. */

1154

a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +

1155

a->l0.me8x8[2].cost + a->l0.me8x8[3].cost -

1156

REF_COST( 0, a->l0.me16x16.i_ref );

1233

if( h->param.b_cabac )

1234

a->l0.i_cost8x8 -= i_ref_cost;

1157

1235

h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =

1158

1236

h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;

1159

1237

}

1162

1240

{

1163

1241

x264_me_t m;

1164

1242

uint8_t **p_fenc = h->mb.pic.p_fenc;

1165

DECLARE_ALIGNED_8( int mvc[3][2] );

1243

DECLARE_ALIGNED_4( int16_t mvc[3][2] );

1166

1244

int i, j;

1167

1245

1168

1246

/* XXX Needed for x264_mb_predict_mv */

1181

1259

l0m->cost = INT_MAX;

1182

1260

for( j = 0; j < i_ref8s; j++ )

1183

1261

{

1184

const int i_ref = ref8[j];

1185

const int i_ref_cost = REF_COST( 0, i_ref );

1186

m.i_ref_cost = i_ref_cost;

1187

m.i_ref = i_ref;

1188

1189

/* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */

1190

*(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];

1191

*(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][2*i+1];

1192

*(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][2*i+2];

1193

1194

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );

1195

x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );

1196

x264_mb_predict_mv( h, 0, 8*i, 4, m.mvp );

1197

x264_me_search( h, &m, mvc, 3 );

1198

1199

m.cost += i_ref_cost;

1200

1201

if( m.cost < l0m->cost )

1202

*l0m = m;

1262

const int i_ref = ref8[j];

1263

const int i_ref_cost = REF_COST( 0, i_ref );

1264

m.i_ref_cost = i_ref_cost;

1265

m.i_ref = i_ref;

1266

1267

/* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */

1268

*(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];

1269

*(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][2*i+1];

1270

*(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][2*i+2];

1271

1272

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );

1273

x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );

1274

x264_mb_predict_mv( h, 0, 8*i, 4, m.mvp );

1275

x264_me_search( h, &m, mvc, 3 );

1276

1277

m.cost += i_ref_cost;

1278

1279

if( m.cost < l0m->cost )

1280

h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );

1203

1281

}

1204

x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, l0m->mv[0], l0m->mv[1] );

1282

x264_macroblock_cache_mv_ptr( h, 0, 2*i, 4, 2, 0, l0m->mv );

1205

1283

x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, l0m->i_ref );

1206

1284

}

1207

1285

1212

1290

{

1213

1291

x264_me_t m;

1214

1292

uint8_t **p_fenc = h->mb.pic.p_fenc;

1215

DECLARE_ALIGNED_8( int mvc[3][2] );

1293

DECLARE_ALIGNED_4( int16_t mvc[3][2] );

1216

1294

int i, j;

1217

1295

1218

1296

/* XXX Needed for x264_mb_predict_mv */

1231

1309

l0m->cost = INT_MAX;

1232

1310

for( j = 0; j < i_ref8s; j++ )

1233

1311

{

1234

const int i_ref = ref8[j];

1235

const int i_ref_cost = REF_COST( 0, i_ref );

1236

m.i_ref_cost = i_ref_cost;

1237

m.i_ref = i_ref;

1238

1239

*(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];

1240

*(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][i+1];

1241

*(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][i+3];

1242

1243

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );

1244

x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );

1245

x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );

1246

x264_me_search( h, &m, mvc, 3 );

1247

1248

m.cost += i_ref_cost;

1249

1250

if( m.cost < l0m->cost )

1251

*l0m = m;

1312

const int i_ref = ref8[j];

1313

const int i_ref_cost = REF_COST( 0, i_ref );

1314

m.i_ref_cost = i_ref_cost;

1315

m.i_ref = i_ref;

1316

1317

*(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];

1318

*(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][i+1];

1319

*(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][i+3];

1320

1321

LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );

1322

x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );

1323

x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );

1324

x264_me_search( h, &m, mvc, 3 );

1325

1326

m.cost += i_ref_cost;

1327

1328

if( m.cost < l0m->cost )

1329

h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );

1252

1330

}

1253

x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, l0m->mv[0], l0m->mv[1] );

1331

x264_macroblock_cache_mv_ptr( h, 2*i, 0, 2, 4, 0, l0m->mv );

1254

1332

x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, l0m->i_ref );

1255

1333

}

1256

1334

1319

1397

x264_mb_predict_mv( h, 0, idx, 1, m->mvp );

1320

1398

x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );

1321

1399

1322

x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );

1400

x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 1, 0, m->mv );

1323

1401

}

1324

1402

a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +

1325

1403

a->l0.me4x4[i8x8][1].cost +

1359

1437

x264_mb_predict_mv( h, 0, idx, 2, m->mvp );

1360

1438

x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );

1361

1439

1362

x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );

1440

x264_macroblock_cache_mv_ptr( h, x4, y4, 2, 1, 0, m->mv );

1363

1441

}

1364

1442

a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +

1365

1443

REF_COST( 0, i_ref ) +

1396

1474

x264_mb_predict_mv( h, 0, idx, 1, m->mvp );

1397

1475

x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );

1398

1476

1399

x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );

1477

x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 2, 0, m->mv );

1400

1478

}

1401

1479

a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +

1402

1480

REF_COST( 0, i_ref ) +

1428

1506

}

1429

1507

}

1430

1508

1431

#define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \

1432

{ \

1433

if( h->param.analyse.b_weighted_bipred ) \

1434

h->mc.avg_weight[size]( pix1, stride1, src2, stride2, \

1435

h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \

1436

else \

1437

h->mc.avg[size]( pix1, stride1, src2, stride2 ); \

1438

}

1509

#define WEIGHTED_AVG( size, pix, stride, src1, stride1, src2, stride2 ) \

1510

{ \

1511

h->mc.avg[size]( pix, stride, src1, stride1, src2, stride2, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \

1512

}

1439

1513

1440

1514

static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )

1441

1515

{

1516

DECLARE_ALIGNED_16( uint8_t pix0[16*16] );

1442

1517

DECLARE_ALIGNED_16( uint8_t pix1[16*16] );

1443

DECLARE_ALIGNED_16( uint8_t pix2[16*16] );

1444

uint8_t *src2;

1445

int stride2 = 16;

1446

int weight;

1518

uint8_t *src0, *src1;

1519

int stride0 = 16, stride1 = 16;

1447

1520

1448

1521

x264_me_t m;

1449

int i_ref;

1450

int mvc[8][2], i_mvc;

1522

int i_ref, i_mvc;

1523

DECLARE_ALIGNED_4( int16_t mvc[9][2] );

1451

1524

int i_halfpel_thresh = INT_MAX;

1452

1525

int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;

1453

1526

1472

1545

if( m.cost < a->l0.me16x16.cost )

1473

1546

{

1474

1547

a->l0.i_ref = i_ref;

1475

a->l0.me16x16 = m;

1548

h->mc.memcpy_aligned( &a->l0.me16x16, &m, sizeof(x264_me_t) );

1476

1549

}

1477

1550

1478

1551

/* save mv for predicting neighbors */

1479

h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];

1480

h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];

1552

*(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;

1481

1553

}

1482

1554

/* subtract ref cost, so we don't have to add it for the other MB types */

1483

1555

a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );

1500

1572

if( m.cost < a->l1.me16x16.cost )

1501

1573

{

1502

1574

a->l1.i_ref = i_ref;

1503

a->l1.me16x16 = m;

1575

h->mc.memcpy_aligned( &a->l1.me16x16, &m, sizeof(x264_me_t) );

1504

1576

}

1505

1577

1506

1578

/* save mv for predicting neighbors */

1507

h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];

1508

h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];

1579

*(uint32_t*)h->mb.mvr[1][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;

1509

1580

}

1510

1581

/* subtract ref cost, so we don't have to add it for the other MB types */

1511

1582

a->l1.me16x16.cost -= REF_COST( 1, a->l1.i_ref );

1515

1586

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );

1516

1587

1517

1588

/* get cost of BI mode */

1518

weight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];

1519

if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )

1520

{

1521

/* l0 reference is halfpel, so get_ref on it will make it faster */

1522

src2 =

1523

h->mc.get_ref( pix2, &stride2,

1524

h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],

1525

a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],

1526

16, 16 );

1527

h->mc.mc_luma( pix1, 16,

1528

h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],

1529

a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],

1530

16, 16 );

1531

weight = 64 - weight;

1532

}

1533

else

1534

{

1535

/* if l0 was qpel, we'll use get_ref on l1 instead */

1536

h->mc.mc_luma( pix1, 16,

1537

h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],

1538

a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],

1539

16, 16 );

1540

src2 =

1541

h->mc.get_ref( pix2, &stride2,

1542

h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],

1543

a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],

1544

16, 16 );

1545

}

1546

1547

if( h->param.analyse.b_weighted_bipred )

1548

h->mc.avg_weight[PIXEL_16x16]( pix1, 16, src2, stride2, weight );

1549

else

1550

h->mc.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );

1551

1552

a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix1, 16 )

1589

src0 = h->mc.get_ref( pix0, &stride0,

1590

h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],

1591

a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16 );

1592

src1 = h->mc.get_ref( pix1, &stride1,

1593

h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],

1594

a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16 );

1595

1596

h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );

1597

1598

a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )

1553

1599

+ REF_COST( 0, a->l0.i_ref )

1554

1600

+ REF_COST( 1, a->l1.i_ref )

1555

1601

+ a->l0.me16x16.cost_mv

1569

1615

switch( h->mb.i_sub_partition[i] )

1570

1616

{

1571

1617

case D_L0_8x8:

1572

x264_macroblock_cache_mv( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );

1618

x264_macroblock_cache_mv_ptr( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv );

1573

1619

break;

1574

1620

case D_L0_8x4:

1575

x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv[0], a->l0.me8x4[i][0].mv[1] );

1576

x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv[0], a->l0.me8x4[i][1].mv[1] );

1621

x264_macroblock_cache_mv_ptr( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv );

1622

x264_macroblock_cache_mv_ptr( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv );

1577

1623

break;

1578

1624

case D_L0_4x8:

1579

x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv[0], a->l0.me4x8[i][0].mv[1] );

1580

x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv[0], a->l0.me4x8[i][1].mv[1] );

1625

x264_macroblock_cache_mv_ptr( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv );

1626

x264_macroblock_cache_mv_ptr( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv );

1581

1627

break;

1582

1628

case D_L0_4x4:

1583

x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv[0], a->l0.me4x4[i][0].mv[1] );

1584

x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv[0], a->l0.me4x4[i][1].mv[1] );

1585

x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv[0], a->l0.me4x4[i][2].mv[1] );

1586

x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv[0], a->l0.me4x4[i][3].mv[1] );

1629

x264_macroblock_cache_mv_ptr( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv );

1630

x264_macroblock_cache_mv_ptr( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv );

1631

x264_macroblock_cache_mv_ptr( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv );

1632

x264_macroblock_cache_mv_ptr( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv );

1587

1633

break;

1588

1634

default:

1589

1635

x264_log( h, X264_LOG_ERROR, "internal error\n" );

1595

1641

if( x264_mb_partition_listX_table[0][part] ) \

1596

1642

{ \

1597

1643

x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \

1598

x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \

1644

x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 0, me0.mv ); \

1599

1645

} \

1600

1646

else \

1601

1647

{ \

1602

1648

x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \

1603

x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \

1649

x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0 ); \

1604

1650

if( b_mvd ) \

1605

x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \

1651

x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0 ); \

1606

1652

} \

1607

1653

if( x264_mb_partition_listX_table[1][part] ) \

1608

1654

{ \

1609

1655

x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \

1610

x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \

1656

x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 1, me1.mv ); \

1611

1657

} \

1612

1658

else \

1613

1659

{ \

1614

1660

x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \

1615

x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \

1661

x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0 ); \

1616

1662

if( b_mvd ) \

1617

x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \

1663

x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0 ); \

1618

1664

}

1619

1665

1620

1666

static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )

1626

1672

x264_mb_load_mv_direct8x8( h, i );

1627

1673

if( b_mvd )

1628

1674

{

1629

x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );

1630

x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );

1675

x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0 );

1676

x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0 );

1631

1677

x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );

1632

1678

}

1633

1679

}

1665

1711

const int y8 = i/2;

1666

1712

int i_part_cost;

1667

1713

int i_part_cost_bi = 0;

1714

int stride[2] = {8,8};

1715

uint8_t *src[2];

1668

1716

1669

1717

for( l = 0; l < 2; l++ )

1670

1718

{

1680

1728

x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );

1681

1729

x264_me_search( h, m, &lX->me16x16.mv, 1 );

1682

1730

1683

x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );

1731

x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, l, m->mv );

1684

1732

1685

1733

/* BI mode */

1686

h->mc.mc_luma( pix[l], 8, m->p_fref, m->i_stride[0],

1687

m->mv[0], m->mv[1], 8, 8 );

1734

src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],

1735

m->mv[0], m->mv[1], 8, 8 );

1688

1736

i_part_cost_bi += m->cost_mv;

1689

1737

/* FIXME: ref cost */

1690

1738

}

1691

1692

WEIGHTED_AVG( PIXEL_8x8, pix[0], 8, pix[1], 8 );

1739

h->mc.avg[PIXEL_8x8]( pix[0], 8, src[0], stride[0], src[1], stride[1], h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );

1693

1740

i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x8]( a->l0.me8x8[i].p_fenc[0], FENC_STRIDE, pix[0], 8 )

1694

1741

+ a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];

1695

1742

a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];

1715

1762

uint8_t **p_fref[2] =

1716

1763

{ h->mb.pic.p_fref[0][a->l0.i_ref],

1717

1764

h->mb.pic.p_fref[1][a->l1.i_ref] };

1718

DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );

1719

DECLARE_ALIGNED_8( int mvc[2][2] );

1765

DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );

1766

DECLARE_ALIGNED_4( int16_t mvc[2][2] );

1720

1767

int i, l;

1721

1768

1722

1769

h->mb.i_partition = D_16x8;

1726

1773

{

1727

1774

int i_part_cost;

1728

1775

int i_part_cost_bi = 0;

1776

int stride[2] = {16,16};

1777

uint8_t *src[2];

1729

1778

1730

1779

/* TODO: check only the list(s) that were used in b8x8? */

1731

1780

for( l = 0; l < 2; l++ )

1739

1788

LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );

1740

1789

LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );

1741

1790

1742

*(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[2*i].mv;

1743

*(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[2*i+1].mv;

1791

*(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[2*i].mv;

1792

*(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[2*i+1].mv;

1744

1793

1745

1794

x264_mb_predict_mv( h, l, 8*i, 2, m->mvp );

1746

1795

x264_me_search( h, m, mvc, 2 );

1747

1796

1748

1797

/* BI mode */

1749

h->mc.mc_luma( pix[l], 16, m->p_fref, m->i_stride[0],

1750

m->mv[0], m->mv[1], 16, 8 );

1798

src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],

1799

m->mv[0], m->mv[1], 16, 8 );

1751

1800

/* FIXME: ref cost */

1752

1801

i_part_cost_bi += m->cost_mv;

1753

1802

}

1754

1755

WEIGHTED_AVG( PIXEL_16x8, pix[0], 16, pix[1], 16 );

1803

h->mc.avg[PIXEL_16x8]( pix[0], 16, src[0], stride[0], src[1], stride[1], h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );

1756

1804

i_part_cost_bi += h->pixf.mbcmp[PIXEL_16x8]( a->l0.me16x8[i].p_fenc[0], FENC_STRIDE, pix[0], 16 );

1757

1805

1758

1806

i_part_cost = a->l0.me16x8[i].cost;

1785

1833

{ h->mb.pic.p_fref[0][a->l0.i_ref],

1786

1834

h->mb.pic.p_fref[1][a->l1.i_ref] };

1787

1835

DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );

1788

DECLARE_ALIGNED_8( int mvc[2][2] );

1836

DECLARE_ALIGNED_4( int16_t mvc[2][2] );

1789

1837

int i, l;

1790

1838

1791

1839

h->mb.i_partition = D_8x16;

1795

1843

{

1796

1844

int i_part_cost;

1797

1845

int i_part_cost_bi = 0;

1846

int stride[2] = {8,8};

1847

uint8_t *src[2];

1798

1848

1799

1849

for( l = 0; l < 2; l++ )

1800

1850

{

1807

1857

LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );

1808

1858

LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );

1809

1859

1810

*(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[i].mv;

1811

*(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[i+2].mv;

1860

*(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[i].mv;

1861

*(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[i+2].mv;

1812

1862

1813

1863

x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );

1814

1864

x264_me_search( h, m, mvc, 2 );

1815

1865

1816

1866

/* BI mode */

1817

h->mc.mc_luma( pix[l], 8, m->p_fref, m->i_stride[0],

1818

m->mv[0], m->mv[1], 8, 16 );

1867

src[l] = h->mc.get_ref( pix[l], &stride[l], m->p_fref, m->i_stride[0],

1868

m->mv[0], m->mv[1], 8, 16 );

1819

1869

/* FIXME: ref cost */

1820

1870

i_part_cost_bi += m->cost_mv;

1821

1871

}

1822

1872

1823

WEIGHTED_AVG( PIXEL_8x16, pix[0], 8, pix[1], 8 );

1873

h->mc.avg[PIXEL_8x16]( pix[0], 8, src[0], stride[0], src[1], stride[1], h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );

1824

1874

i_part_cost_bi += h->pixf.mbcmp[PIXEL_8x16]( a->l0.me8x16[i].p_fenc[0], FENC_STRIDE, pix[0], 8 );

1825

1875

1826

1876

i_part_cost = a->l0.me8x16[i].cost;

1881

1931

if( a->l0.i_cost8x8 <= thresh )

1882

1932

{

1883

1933

h->mb.i_type = P_8x8;

1884

x264_analyse_update_cache( h, a );

1934

h->mb.i_partition = D_8x8;

1935

if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )

1936

{

1937

int i;

1938

x264_macroblock_cache_ref( h, 0, 0, 2, 2, 0, a->l0.me8x8[0].i_ref );

1939

x264_macroblock_cache_ref( h, 2, 0, 2, 2, 0, a->l0.me8x8[1].i_ref );

1940

x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, a->l0.me8x8[2].i_ref );

1941

x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, a->l0.me8x8[3].i_ref );

1942

for( i = 0; i < 4; i++ )

1943

{

1944

int costs[4] = {a->l0.i_cost4x4[i], a->l0.i_cost8x4[i], a->l0.i_cost4x8[i], a->l0.me8x8[i].cost};

1945

int thresh = X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4;

1946

int subtype, btype = D_L0_8x8;

1947

uint64_t bcost = COST_MAX64;

1948

for( subtype = D_L0_4x4; subtype <= D_L0_8x8; subtype++ )

1949

{

1950

uint64_t cost;

1951

if( costs[subtype] > thresh || (subtype == D_L0_8x8 && bcost == COST_MAX64) )

1952

continue;

1953

h->mb.i_sub_partition[i] = subtype;

1954

x264_mb_cache_mv_p8x8( h, a, i );

1955

cost = x264_rd_cost_part( h, a->i_lambda2, i<<2, PIXEL_8x8 );

1956

COPY2_IF_LT( bcost, cost, btype, subtype );

1957

}

1958

h->mb.i_sub_partition[i] = btype;

1959

x264_mb_cache_mv_p8x8( h, a, i );

1960

}

1961

}

1962

else

1963

x264_analyse_update_cache( h, a );

1885

1964

a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 );

1886

1887

if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )

1888

{

1889

/* FIXME: RD per subpartition */

1890

int part_bak[4];

1891

int i, i_cost;

1892

int b_sub8x8 = 0;

1893

for( i=0; i<4; i++ )

1894

{

1895

part_bak[i] = h->mb.i_sub_partition[i];

1896

b_sub8x8 |= (part_bak[i] != D_L0_8x8);

1897

}

1898

if( b_sub8x8 )

1899

{

1900

h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =

1901

h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;

1902

i_cost = x264_rd_cost_mb( h, a->i_lambda2 );

1903

if( a->l0.i_cost8x8 < i_cost )

1904

{

1905

for( i=0; i<4; i++ )

1906

h->mb.i_sub_partition[i] = part_bak[i];

1907

}

1908

else

1909

a->l0.i_cost8x8 = i_cost;

1910

}

1911

}

1912

1965

}

1913

1966

else

1914

1967

a->l0.i_cost8x8 = COST_MAX;

1916

1969

1917

1970

static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )

1918

1971

{

1919

int thresh = i_satd_inter * 17/16;

1972

int thresh = i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16;

1920

1973

1921

1974

if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )

1922

1975

{

1923

1976

h->mb.i_type = B_DIRECT;

1977

/* Assumes direct/skip MC is still in fdec */

1978

/* Requires b-rdo to be done before intra analysis */

1979

h->mb.b_skip_mc = 1;

1924

1980

x264_analyse_update_cache( h, a );

1925

1981

a->i_rd16x16direct = x264_rd_cost_mb( h, a->i_lambda2 );

1982

h->mb.b_skip_mc = 0;

1926

1983

}

1927

1984

1928

1985

//FIXME not all the update_cache calls are needed

1980

2037

}

1981

2038

}

1982

2039

1983

static void refine_bidir( x264_t *h, x264_mb_analysis_t *a )

2040

static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a )

1984

2041

{

1985

2042

const int i_biweight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];

1986

2043

int i;

1987

2044

2045

if( IS_INTRA(h->mb.i_type) )

2046

return;

2047

1988

2048

switch( h->mb.i_partition )

1989

2049

{

1990

2050

case D_16x16:

1991

2051

if( h->mb.i_type == B_BI_BI )

1992

x264_me_refine_bidir( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );

2052

x264_me_refine_bidir_satd( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );

1993

2053

break;

1994

2054

case D_16x8:

1995

2055

for( i=0; i<2; i++ )

1996

2056

if( a->i_mb_partition16x8[i] == D_BI_8x8 )

1997

x264_me_refine_bidir( h, &a->l0.me16x8[i], &a->l1.me16x8[i], i_biweight );

2057

x264_me_refine_bidir_satd( h, &a->l0.me16x8[i], &a->l1.me16x8[i], i_biweight );

1998

2058

break;

1999

2059

case D_8x16:

2000

2060

for( i=0; i<2; i++ )

2001

2061

if( a->i_mb_partition8x16[i] == D_BI_8x8 )

2002

x264_me_refine_bidir( h, &a->l0.me8x16[i], &a->l1.me8x16[i], i_biweight );

2062

x264_me_refine_bidir_satd( h, &a->l0.me8x16[i], &a->l1.me8x16[i], i_biweight );

2003

2063

break;

2004

2064

case D_8x8:

2005

2065

for( i=0; i<4; i++ )

2006

2066

if( h->mb.i_sub_partition[i] == D_BI_8x8 )

2007

x264_me_refine_bidir( h, &a->l0.me8x8[i], &a->l1.me8x8[i], i_biweight );

2067

x264_me_refine_bidir_satd( h, &a->l0.me8x8[i], &a->l1.me8x8[i], i_biweight );

2008

2068

break;

2009

2069

}

2010

2070

}

2011

2071

2012

2072

static inline void x264_mb_analyse_transform( x264_t *h )

2013

2073

{

2014

if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 )

2074

if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 && !h->mb.b_lossless )

2015

2075

{

2016

2076

int i_cost4, i_cost8;

2017

/* FIXME only luma mc is needed */

2077

/* Only luma MC is really needed, but the full MC is re-used in macroblock_encode. */

2018

2078

x264_mb_mc( h );

2019

2079

2020

2080

i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,

2023

2083

h->mb.pic.p_fdec[0], FDEC_STRIDE );

2024

2084

2025

2085

h->mb.b_transform_8x8 = i_cost8 < i_cost4;

2086

h->mb.b_skip_mc = 1;

2026

2087

}

2027

2088

}

2028

2089

2070

2131

/*--------------------------- Do the analysis ---------------------------*/

2071

2132

if( h->sh.i_type == SLICE_TYPE_I )

2072

2133

{

2134

if( analysis.i_mbrd )

2135

x264_mb_cache_fenc_satd( h );

2073

2136

x264_mb_analyse_intra( h, &analysis, COST_MAX );

2074

if( analysis.b_mbrd )

2137

if( analysis.i_mbrd )

2075

2138

x264_intra_rd( h, &analysis, COST_MAX );

2076

2139

2077

2140

i_cost = analysis.i_satd_i16x16;

2078

2141

h->mb.i_type = I_16x16;

2079

if( analysis.i_satd_i4x4 < i_cost )

2080

{

2081

i_cost = analysis.i_satd_i4x4;

2082

h->mb.i_type = I_4x4;

2083

}

2084

if( analysis.i_satd_i8x8 < i_cost )

2085

h->mb.i_type = I_8x8;

2142

COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 );

2143

COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 );

2144

if( analysis.i_satd_pcm < i_cost )

2145

h->mb.i_type = I_PCM;

2086

2146

2087

if( h->mb.i_subpel_refine >= 7 )

2147

else if( analysis.i_mbrd >= 2 )

2088

2148

x264_intra_rd_refine( h, &analysis );

2089

2149

}

2090

2150

else if( h->sh.i_type == SLICE_TYPE_P )

2196

2256

2197

2257

/* refine qpel */

2198

2258

//FIXME mb_type costs?

2199

if( analysis.b_mbrd )

2259

if( analysis.i_mbrd )

2200

2260

{

2201

2261

/* refine later */

2202

2262

}

2275

2335

analysis.i_satd_i8x8,

2276

2336

analysis.i_satd_i4x4 );

2277

2337

2278

if( analysis.b_mbrd )

2338

if( analysis.i_mbrd )

2279

2339

{

2280

2340

x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );

2281

2341

i_type = P_L0;

2295

2355

i_intra_cost = analysis.i_satd_i16x16;

2296

2356

COPY2_IF_LT( i_intra_cost, analysis.i_satd_i8x8, i_intra_type, I_8x8 );

2297

2357

COPY2_IF_LT( i_intra_cost, analysis.i_satd_i4x4, i_intra_type, I_4x4 );

2358

COPY2_IF_LT( i_intra_cost, analysis.i_satd_pcm, i_intra_type, I_PCM );

2298

2359

COPY2_IF_LT( i_cost, i_intra_cost, i_type, i_intra_type );

2299

2360

2300

2361

if( i_intra_cost == COST_MAX )

2305

2366

h->stat.frame.i_inter_cost += i_cost;

2306

2367

h->stat.frame.i_mbs_analysed++;

2307

2368

2308

if( h->mb.i_subpel_refine >= 7 )

2369

if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM )

2309

2370

{

2310

2371

if( IS_INTRA( h->mb.i_type ) )

2311

2372

{

2314

2375

else if( i_partition == D_16x16 )

2315

2376

{

2316

2377

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );

2317

x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0 );

2378

x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );

2318

2379

}

2319

2380

else if( i_partition == D_16x8 )

2320

2381

{

2382

h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =

2383

h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;

2321

2384

x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );

2322

2385

x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );

2323

x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0 );

2324

x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 2 );

2386

x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 );

2387

x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 );

2325

2388

}

2326

2389

else if( i_partition == D_8x16 )

2327

2390

{

2391

h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =

2392

h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;

2328

2393

x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );

2329

2394

x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );

2330

x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0 );

2331

x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 1 );

2395

x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 );

2396

x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 );

2332

2397

}

2333

2398

else if( i_partition == D_8x8 )

2334

2399

{

2335

2400

int i8x8;

2336

2401

x264_analyse_update_cache( h, &analysis );

2337

2402

for( i8x8 = 0; i8x8 < 4; i8x8++ )

2338

if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )

2339

x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8 );

2403

{

2404

if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )

2405

{

2406

x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 );

2407

}

2408

else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 )

2409

{

2410

x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );

2411

x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );

2412

}

2413

else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 )

2414

{

2415

x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );

2416

x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );

2417

}

2418

else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 )

2419

{

2420

x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );

2421

x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );

2422

x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );

2423

x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );

2424

}

2425

}

2340

2426

}

2341

2427

}

2342

2428

}

2346

2432

int i_bskip_cost = COST_MAX;

2347

2433

int b_skip = 0;

2348

2434

2435

if( analysis.i_mbrd )

2436

x264_mb_cache_fenc_satd( h );

2437

2349

2438

h->mb.i_type = B_SKIP;

2350

2439

if( h->mb.b_direct_auto_write )

2351

2440

{

2379

2468

{

2380

2469

/* chance of skip is too small to bother */

2381

2470

}

2382

else if( analysis.b_mbrd )

2471

else if( analysis.i_mbrd )

2383

2472

{

2384

2473

i_bskip_cost = ssd_mb( h );

2385

2386

2474

/* 6 = minimum cavlc cost of a non-skipped MB */

2387

if( i_bskip_cost <= ((6 * analysis.i_lambda2 + 128) >> 8) )

2388

{

2389

h->mb.i_type = B_SKIP;

2390

x264_analyse_update_cache( h, &analysis );

2391

return;

2392

}

2475

b_skip = h->mb.b_skip_mc = i_bskip_cost <= ((6 * analysis.i_lambda2 + 128) >> 8);

2393

2476

}

2394

2477

else if( !h->mb.b_direct_auto_write )

2395

2478

{

2404

2487

const unsigned int flags = h->param.analyse.inter;

2405

2488

int i_type;

2406

2489

int i_partition;

2490

int i_satd_inter = 0; // shut up uninitialized warning

2491

h->mb.b_skip_mc = 0;

2407

2492

2408

2493

x264_mb_analyse_load_costs( h, &analysis );

2409

2494

2421

2506

COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI );

2422

2507

COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT );

2423

2508

2424

if( analysis.b_mbrd && analysis.i_cost16x16direct <= i_cost * 33/32 )

2509

if( analysis.i_mbrd && analysis.i_cost16x16direct <= i_cost * 33/32 )

2425

2510

{

2426

2511

x264_mb_analyse_b_rd( h, &analysis, i_cost );

2427

2512

if( i_bskip_cost < analysis.i_rd16x16direct &&

2463

2548

}

2464

2549

}

2465

2550

2466

if( analysis.b_mbrd )

2551

if( analysis.i_mbrd )

2467

2552

{

2468

2553

/* refine later */

2469

2554

}

2546

2631

}

2547

2632

}

2548

2633

2549

x264_mb_analyse_intra( h, &analysis, i_cost );

2550

2551

if( analysis.b_mbrd )

2634

if( analysis.i_mbrd )

2552

2635

{

2553

int i_satd_inter = i_cost;

2636

i_satd_inter = i_cost;

2554

2637

x264_mb_analyse_b_rd( h, &analysis, i_satd_inter );

2555

2638

i_type = B_SKIP;

2556

2639

i_cost = i_bskip_cost;

2565

2648

2566

2649

h->mb.i_type = i_type;

2567

2650

h->mb.i_partition = i_partition;

2651

}

2652

2653

x264_mb_analyse_intra( h, &analysis, i_satd_inter );

2654

2655

if( analysis.i_mbrd )

2656

{

2568

2657

x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );

2569

2658

x264_intra_rd( h, &analysis, i_satd_inter * 17/16 );

2570

2659

}

2572

2661

COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );

2573

2662

COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );

2574

2663

COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );

2664

COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM );

2575

2665

2576

2666

h->mb.i_type = i_type;

2577

2667

h->mb.i_partition = i_partition;

2578

2668

2579

if( h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) )

2669

if( analysis.i_mbrd >= 2 && IS_INTRA( i_type ) && i_type != I_PCM )

2580

2670

x264_intra_rd_refine( h, &analysis );

2581

else if( h->param.analyse.b_bidir_me )

2582

refine_bidir( h, &analysis );

2671

if( h->mb.i_subpel_refine >= 5 )

2672

x264_refine_bidir( h, &analysis );

2673

2674

if( analysis.i_mbrd >= 2 && i_type > B_DIRECT && i_type < B_SKIP )

2675

{

2676

const int i_biweight = h->mb.bipred_weight[analysis.l0.i_ref][analysis.l1.i_ref];

2677

x264_analyse_update_cache( h, &analysis );

2678

2679

if( i_partition == D_16x16 )

2680

{

2681

if( i_type == B_L0_L0 )

2682

x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );

2683

else if( i_type == B_L1_L1 )

2684

x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );

2685

else if( i_type == B_BI_BI )

2686

x264_me_refine_bidir_rd( h, &analysis.l0.me16x16, &analysis.l1.me16x16, i_biweight, 0, analysis.i_lambda2 );

2687

}

2688

else if( i_partition == D_16x8 )

2689

{

2690

for( i = 0; i < 2; i++ )

2691

{

2692

h->mb.i_sub_partition[i*2] = h->mb.i_sub_partition[i*2+1] = analysis.i_mb_partition16x8[i];

2693

if( analysis.i_mb_partition16x8[i] == D_L0_8x8 )

2694

x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[i], analysis.i_lambda2, i*8, 0 );

2695

else if( analysis.i_mb_partition16x8[i] == D_L1_8x8 )

2696

x264_me_refine_qpel_rd( h, &analysis.l1.me16x8[i], analysis.i_lambda2, i*8, 1 );

2697

else if( analysis.i_mb_partition16x8[i] == D_BI_8x8 )

2698

x264_me_refine_bidir_rd( h, &analysis.l0.me16x8[i], &analysis.l1.me16x8[i], i_biweight, i*2, analysis.i_lambda2 );

2699

}

2700

}

2701

else if( i_partition == D_8x16 )

2702

{

2703

for( i = 0; i < 2; i++ )

2704

{

2705

h->mb.i_sub_partition[i] = h->mb.i_sub_partition[i+2] = analysis.i_mb_partition8x16[i];

2706

if( analysis.i_mb_partition8x16[i] == D_L0_8x8 )

2707

x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[i], analysis.i_lambda2, i*4, 0 );

2708

else if( analysis.i_mb_partition8x16[i] == D_L1_8x8 )

2709

x264_me_refine_qpel_rd( h, &analysis.l1.me8x16[i], analysis.i_lambda2, i*4, 1 );

2710

else if( analysis.i_mb_partition8x16[i] == D_BI_8x8 )

2711

x264_me_refine_bidir_rd( h, &analysis.l0.me8x16[i], &analysis.l1.me8x16[i], i_biweight, i, analysis.i_lambda2 );

2712

}

2713

}

2714

else if( i_partition == D_8x8 )

2715

{

2716

for( i = 0; i < 4; i++ )

2717

{

2718

if( h->mb.i_sub_partition[i] == D_L0_8x8 )

2719

x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i], analysis.i_lambda2, i*4, 0 );

2720

else if( h->mb.i_sub_partition[i] == D_L1_8x8 )

2721

x264_me_refine_qpel_rd( h, &analysis.l1.me8x8[i], analysis.i_lambda2, i*4, 1 );

2722

else if( h->mb.i_sub_partition[i] == D_BI_8x8 )

2723

x264_me_refine_bidir_rd( h, &analysis.l0.me8x8[i], &analysis.l1.me8x8[i], i_biweight, i, analysis.i_lambda2 );

2724

}

2725

}

2726

}

2583

2727

}

2584

2728

}

2585

2729

2586

2730

x264_analyse_update_cache( h, &analysis );

2587

2731

2588

if( !analysis.b_mbrd )

2732

if( !analysis.i_mbrd )

2589

2733

x264_mb_analyse_transform( h );

2590

2734

2591

2735

h->mb.b_trellis = h->param.analyse.i_trellis;

2592

h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction;

2736

h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;

2737

if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )

2738

x264_psy_trellis_init( h, 0 );

2593

2739

if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )

2594

2740

h->mb.i_skip_intra = 0;

2595

2741

}

2618

2764

x264_mb_analyse_intra_chroma( h, a );

2619

2765

break;

2620

2766

2767

case I_PCM:

2768

break;

2769

2621

2770

case P_L0:

2622

2771

switch( h->mb.i_partition )

2623

2772

{

2624

2773

case D_16x16:

2625

2774

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );

2626

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );

2775

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );

2627

2776

break;

2628

2777

2629

2778

case D_16x8:

2630

2779

x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].i_ref );

2631

2780

x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].i_ref );

2632

x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv[0], a->l0.me16x8[0].mv[1] );

2633

x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv[0], a->l0.me16x8[1].mv[1] );

2781

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv );

2782

x264_macroblock_cache_mv_ptr( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv );

2634

2783

break;

2635

2784

2636

2785

case D_8x16:

2637

2786

x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].i_ref );

2638

2787

x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].i_ref );

2639

x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv[0], a->l0.me8x16[0].mv[1] );

2640

x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv[0], a->l0.me8x16[1].mv[1] );

2788

x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv );

2789

x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv );

2641

2790

break;

2642

2791

2643

2792

default:

2659

2808

{

2660

2809

h->mb.i_partition = D_16x16;

2661

2810

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );

2662

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv[0],

2663

h->mb.cache.pskip_mv[1] );

2811

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv );

2664

2812

break;

2665

2813

}

2666

2814

2686

2834

{

2687

2835

case B_L0_L0:

2688

2836

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );

2689

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );

2837

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );

2690

2838

2691

2839

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );

2692

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );

2693

x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );

2840

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0 );

2841

x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0 );

2694

2842

break;

2695

2843

case B_L1_L1:

2696

2844

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );

2697

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );

2698

x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );

2845

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0 );

2846

x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0 );

2699

2847

2700

2848

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );

2701

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );

2849

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );

2702

2850

break;

2703

2851

case B_BI_BI:

2704

2852

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );

2705

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );

2853

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );

2706

2854

2707

2855

x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );

2708

x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );

2856

x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );

2709

2857

break;

2710

2858

}

2711

2859

break;

Older »