358
#define MAX_LENGTH (X264_BFRAME_MAX*4)
366
/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
367
* re-running lookahead. */
368
static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
369
int p0, int p1, int b )
372
int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
374
for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
376
row_satd[ h->mb.i_mb_y ] = 0;
377
for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
379
int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
380
int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
381
float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
382
i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
383
row_satd[ h->mb.i_mb_y ] += i_mb_cost;
384
if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
385
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
386
h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
388
i_score += i_mb_cost;
395
static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
397
x264_frame_t *refs[2] = {frames[p0],frames[p1]};
398
int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
399
int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
401
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
403
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
405
int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
406
int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
407
int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
408
int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
409
/* The approximate amount of data that this block contains. */
410
int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
412
/* Divide by 64 for per-pixel summing. */
413
propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
415
/* Don't propagate for an intra block. */
416
if( inter_cost < intra_cost )
419
mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
420
mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
423
mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
424
mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
427
/* Follow the MVs to the previous frame(s). */
428
for( list = 0; list < 2; list++ )
429
if( (lists_used >> list)&1 )
433
int listamount = propagate_amount;
434
int mbx = (x>>5)+h->mb.i_mb_x;
435
int mby = ((y>>5)+h->mb.i_mb_y);
436
int idx0 = mbx + mby*h->mb.i_mb_stride;
438
int idx2 = idx0 + h->mb.i_mb_stride;
439
int idx3 = idx0 + h->mb.i_mb_stride + 1;
440
int idx0weight = (32-(y&31))*(32-(x&31));
441
int idx1weight = (32-(y&31))*(x&31);
442
int idx2weight = (y&31)*(32-(x&31));
443
int idx3weight = (y&31)*(x&31);
445
/* Apply bipred weighting. */
446
if( lists_used == 3 )
447
listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
449
#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
451
/* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
453
if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
455
CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
456
CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
457
CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
458
CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
460
else /* Check offsets individually */
462
if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
463
CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
464
if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
465
CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
466
if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
467
CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
468
if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
469
CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
477
static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
479
int i, idx = !b_intra;
480
int last_nonb, cur_nonb = 1;
482
x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
485
while( i > 0 && frames[i]->i_type == X264_TYPE_B )
492
memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
496
while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
500
x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
501
memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
502
x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
503
while( frames[i]->i_type == X264_TYPE_B && i > 0 )
505
x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
506
memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
507
x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
510
last_nonb = cur_nonb;
514
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
516
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
518
int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
519
int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
523
int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
524
float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
525
/* Allow the constant to be adjusted via qcompress, since the two
526
* concepts are very similar. */
527
frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
360
533
static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
497
661
frames[j+1] = h->frames.next[j];
498
662
keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
499
663
num_frames = X264_MIN( j, keyint_limit );
500
if( num_frames == 0 )
665
if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
503
668
x264_lowres_context_init( h, &a );
504
669
idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
506
if( num_frames == 1 )
671
if( num_frames == 1 && !h->param.rc.b_mb_tree )
509
673
frames[1]->i_type = X264_TYPE_P;
510
674
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
511
675
frames[1]->i_type = idr_frame_type;
515
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
518
int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
519
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
521
frames[1]->i_type = idr_frame_type;
524
num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
525
assert(num_bframes < num_frames);
679
/* This is important psy-wise: if we have a non-scenecut keyframe,
680
* there will be significant visual artifacts if the frames just before
681
* go down in quality due to being referenced less, despite it being
682
* more RD-optimal. */
683
if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
686
char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
689
int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
690
int num_analysed_frames = num_frames;
692
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
694
frames[1]->i_type = idr_frame_type;
698
if( h->param.i_bframe )
700
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
702
/* Perform the frametype analysis. */
703
for( n = 2; n < num_frames-1; n++ )
704
x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
705
num_bframes = strspn( best_paths[num_frames-2], "B" );
706
/* Load the results of the analysis into the frame types. */
707
for( j = 1; j < num_frames; j++ )
708
frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
709
frames[num_frames]->i_type = X264_TYPE_P;
711
else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
713
for( i = 0; i < num_frames-(2-!i); )
715
cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
716
if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
718
frames[i+1]->i_type = X264_TYPE_P;
719
frames[i+2]->i_type = X264_TYPE_P;
724
cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
725
cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
726
cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
728
if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
730
frames[i+1]->i_type = X264_TYPE_P;
731
frames[i+2]->i_type = X264_TYPE_P;
736
// arbitrary and untuned
737
#define INTER_THRESH 300
738
#define P_SENS_BIAS (50 - h->param.i_bframe_bias)
739
frames[i+1]->i_type = X264_TYPE_B;
740
frames[i+2]->i_type = X264_TYPE_P;
742
for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
744
int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
745
int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
747
if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
749
frames[j]->i_type = X264_TYPE_P;
753
frames[j]->i_type = X264_TYPE_B;
757
frames[i+!i]->i_type = X264_TYPE_P;
759
while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
764
num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
765
for( j = 1; j < num_frames; j++ )
766
frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
767
frames[num_frames]->i_type = X264_TYPE_P;
770
/* Check scenecut on the first minigop. */
527
771
for( j = 1; j < num_bframes+1; j++ )
529
772
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
531
774
frames[j]->i_type = X264_TYPE_P;
534
frames[j]->i_type = X264_TYPE_B;
536
frames[num_bframes+1]->i_type = X264_TYPE_P;
538
else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
540
cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
541
if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
544
cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
545
cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
546
cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
548
if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
551
// arbitrary and untuned
552
#define INTER_THRESH 300
553
#define P_SENS_BIAS (50 - h->param.i_bframe_bias)
554
frames[1]->i_type = X264_TYPE_B;
556
for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
558
int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
559
int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
561
if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
563
frames[j]->i_type = X264_TYPE_P;
775
num_analysed_frames = j;
567
frames[j]->i_type = X264_TYPE_B;
779
reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
572
int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
573
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
575
frames[1]->i_type = idr_frame_type;
579
for( j = 1; j < max_bframes+1; j++ )
581
if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
783
for( j = 1; j < num_frames; j++ )
784
frames[j]->i_type = X264_TYPE_P;
785
reset_start = !keyframe + 1;
788
/* Perform the actual macroblock tree analysis.
789
* Don't go farther than the lookahead parameter; this helps in short GOPs. */
790
if( h->param.rc.b_mb_tree )
791
x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
793
/* Enforce keyframe limit. */
794
if( h->param.i_bframe )
795
for( j = 0; j <= num_bframes; j++ )
796
if( j+1 > keyint_limit )
583
frames[j]->i_type = X264_TYPE_P;
799
frames[j]->i_type = X264_TYPE_P;
800
frames[j+1]->i_type = idr_frame_type;
586
frames[j]->i_type = X264_TYPE_B;
588
frames[max_bframes+1]->i_type = X264_TYPE_P;
805
/* Restore frametypes for all frames that haven't actually been decided yet. */
806
for( j = reset_start; j <= num_frames; j++ )
807
frames[j]->i_type = X264_TYPE_AUTO;
592
810
void x264_slicetype_decide( x264_t *h )