513
ysh = 3 - (chroma_idc == 2 /* yuv422 */);
514
if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
515
515
// chroma offset when predicting from a field of opposite parity
516
my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
516
my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
517
517
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
519
src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
520
src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
520
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
521
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
523
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
524
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
525
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
526
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
524
527
src_cb= s->edge_emu_buffer;
526
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
529
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
530
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
529
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
533
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
534
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
535
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
530
536
src_cr= s->edge_emu_buffer;
532
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
538
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
539
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
535
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
536
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
537
int x_offset, int y_offset,
538
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
539
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
540
int list0, int list1, int pixel_shift, int chroma444){
542
static av_always_inline void
543
mc_part_std(H264Context *h, int n, int square, int height, int delta,
544
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
545
int x_offset, int y_offset,
546
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
547
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
548
int list0, int list1, int pixel_shift, int chroma_idc)
541
550
MpegEncContext * const s = &h->s;
542
551
qpel_mc_func *qpix_op= qpix_put;
543
552
h264_chroma_mc_func chroma_op= chroma_put;
545
554
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
555
if (chroma_idc == 3 /* yuv444 */) {
547
556
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
548
557
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
558
} else if (chroma_idc == 2 /* yuv422 */) {
559
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
560
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
561
} else /* yuv420 */ {
550
562
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
551
563
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
567
579
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
568
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
580
mc_dir_part(h, ref, n, square, height, delta, 1,
569
581
dest_y, dest_cb, dest_cr, x_offset, y_offset,
570
qpix_op, chroma_op, pixel_shift, chroma444);
582
qpix_op, chroma_op, pixel_shift, chroma_idc);
574
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
575
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
576
int x_offset, int y_offset,
577
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
578
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
579
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
580
int list0, int list1, int pixel_shift, int chroma444){
586
static av_always_inline void
587
mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
588
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
589
int x_offset, int y_offset,
590
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
591
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
592
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
593
int list0, int list1, int pixel_shift, int chroma_idc){
581
594
MpegEncContext * const s = &h->s;
583
597
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
598
if (chroma_idc == 3 /* yuv444 */) {
599
chroma_height = height;
585
600
chroma_weight_avg = luma_weight_avg;
586
601
chroma_weight_op = luma_weight_op;
587
602
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
588
603
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
604
} else if (chroma_idc == 2 /* yuv422 */) {
605
chroma_height = height;
606
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
607
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
608
} else /* yuv420 */ {
609
chroma_height = height >> 1;
590
610
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
591
611
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
602
622
int refn0 = h->ref_cache[0][ scan8[n] ];
603
623
int refn1 = h->ref_cache[1][ scan8[n] ];
605
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
625
mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
606
626
dest_y, dest_cb, dest_cr,
607
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
608
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
627
x_offset, y_offset, qpix_put, chroma_put,
628
pixel_shift, chroma_idc);
629
mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
609
630
tmp_y, tmp_cb, tmp_cr,
610
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
631
x_offset, y_offset, qpix_put, chroma_put,
632
pixel_shift, chroma_idc);
612
634
if(h->use_weight == 2){
613
635
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
614
636
int weight1 = 64 - weight0;
615
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
616
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
617
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
637
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize,
638
height, 5, weight0, weight1, 0);
639
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
640
chroma_height, 5, weight0, weight1, 0);
641
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
642
chroma_height, 5, weight0, weight1, 0);
619
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
644
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
620
645
h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
621
646
h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
622
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
647
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
623
648
h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
624
649
h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
625
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
650
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
626
651
h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
627
652
h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
630
655
int list = list1 ? 1 : 0;
631
656
int refn = h->ref_cache[list][ scan8[n] ];
632
657
Picture *ref= &h->ref_list[list][refn];
633
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
658
mc_dir_part(h, ref, n, square, height, delta, list,
634
659
dest_y, dest_cb, dest_cr, x_offset, y_offset,
635
qpix_put, chroma_put, pixel_shift, chroma444);
660
qpix_put, chroma_put, pixel_shift, chroma_idc);
637
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
662
luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
638
663
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
639
664
if(h->use_weight_chroma){
640
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
665
chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
641
666
h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
642
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
667
chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
643
668
h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
648
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
649
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
650
int x_offset, int y_offset,
651
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
652
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
653
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
654
int list0, int list1, int pixel_shift, int chroma444){
673
static av_always_inline void
674
mc_part(H264Context *h, int n, int square, int height, int delta,
675
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
676
int x_offset, int y_offset,
677
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
678
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
679
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
680
int list0, int list1, int pixel_shift, int chroma_idc)
655
682
if((h->use_weight==2 && list0 && list1
656
683
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
657
684
|| h->use_weight==1)
658
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
685
mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
659
686
x_offset, y_offset, qpix_put, chroma_put,
660
weight_op[0], weight_op[3], weight_avg[0],
661
weight_avg[3], list0, list1, pixel_shift, chroma444);
687
weight_op[0], weight_op[1], weight_avg[0],
688
weight_avg[1], list0, list1, pixel_shift, chroma_idc);
663
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
690
mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
664
691
x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
665
chroma_avg, list0, list1, pixel_shift, chroma444);
692
chroma_avg, list0, list1, pixel_shift, chroma_idc);
668
static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
695
static av_always_inline void
696
prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
669
698
/* fetch pixels for estimated mv 4 macroblocks ahead
670
699
* optimized for 64byte cache lines */
671
700
MpegEncContext * const s = &h->s;
690
719
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
691
720
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
692
721
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
693
int pixel_shift, int chroma444){
722
int pixel_shift, int chroma_idc)
694
724
MpegEncContext * const s = &h->s;
695
725
const int mb_xy= h->mb_xy;
696
const int mb_type= s->current_picture.mb_type[mb_xy];
726
const int mb_type = s->current_picture.f.mb_type[mb_xy];
698
728
assert(IS_INTER(mb_type));
700
if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
730
if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
701
731
await_references(h);
702
prefetch_motion(h, 0, pixel_shift, chroma444);
732
prefetch_motion(h, 0, pixel_shift, chroma_idc);
704
734
if(IS_16X16(mb_type)){
705
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
735
mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
706
736
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
707
737
weight_op, weight_avg,
708
738
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
709
pixel_shift, chroma444);
739
pixel_shift, chroma_idc);
710
740
}else if(IS_16X8(mb_type)){
711
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
741
mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
712
742
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
713
&weight_op[1], &weight_avg[1],
743
weight_op, weight_avg,
714
744
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
715
pixel_shift, chroma444);
716
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
745
pixel_shift, chroma_idc);
746
mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
717
747
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
718
&weight_op[1], &weight_avg[1],
748
weight_op, weight_avg,
719
749
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
720
pixel_shift, chroma444);
750
pixel_shift, chroma_idc);
721
751
}else if(IS_8X16(mb_type)){
722
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
752
mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
723
753
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
724
&weight_op[2], &weight_avg[2],
754
&weight_op[1], &weight_avg[1],
725
755
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
726
pixel_shift, chroma444);
727
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
756
pixel_shift, chroma_idc);
757
mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
728
758
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
729
&weight_op[2], &weight_avg[2],
759
&weight_op[1], &weight_avg[1],
730
760
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
731
pixel_shift, chroma444);
761
pixel_shift, chroma_idc);
741
771
int y_offset= (i&2)<<1;
743
773
if(IS_SUB_8X8(sub_mb_type)){
744
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
774
mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
745
775
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
746
&weight_op[3], &weight_avg[3],
776
&weight_op[1], &weight_avg[1],
747
777
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
748
pixel_shift, chroma444);
778
pixel_shift, chroma_idc);
749
779
}else if(IS_SUB_8X4(sub_mb_type)){
750
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
751
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
752
&weight_op[4], &weight_avg[4],
753
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
754
pixel_shift, chroma444);
755
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
756
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
757
&weight_op[4], &weight_avg[4],
758
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
759
pixel_shift, chroma444);
780
mc_part(h, n , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
781
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
782
&weight_op[1], &weight_avg[1],
783
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
784
pixel_shift, chroma_idc);
785
mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
786
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
787
&weight_op[1], &weight_avg[1],
788
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
789
pixel_shift, chroma_idc);
760
790
}else if(IS_SUB_4X8(sub_mb_type)){
761
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
762
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
763
&weight_op[5], &weight_avg[5],
764
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
765
pixel_shift, chroma444);
766
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
767
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
768
&weight_op[5], &weight_avg[5],
769
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
770
pixel_shift, chroma444);
791
mc_part(h, n , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
792
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
793
&weight_op[2], &weight_avg[2],
794
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
795
pixel_shift, chroma_idc);
796
mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
797
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
798
&weight_op[2], &weight_avg[2],
799
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
800
pixel_shift, chroma_idc);
773
803
assert(IS_SUB_4X4(sub_mb_type));
774
804
for(j=0; j<4; j++){
775
805
int sub_x_offset= x_offset + 2*(j&1);
776
806
int sub_y_offset= y_offset + (j&2);
777
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
807
mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
778
808
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
779
&weight_op[6], &weight_avg[6],
809
&weight_op[2], &weight_avg[2],
780
810
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
781
pixel_shift, chroma444);
811
pixel_shift, chroma_idc);
787
prefetch_motion(h, 1, pixel_shift, chroma444);
790
#define hl_motion_fn(sh, bits) \
791
static av_always_inline void hl_motion_ ## bits(H264Context *h, \
793
uint8_t *dest_cb, uint8_t *dest_cr, \
794
qpel_mc_func (*qpix_put)[16], \
795
h264_chroma_mc_func (*chroma_put), \
796
qpel_mc_func (*qpix_avg)[16], \
797
h264_chroma_mc_func (*chroma_avg), \
798
h264_weight_func *weight_op, \
799
h264_biweight_func *weight_avg, \
802
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
803
qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
817
prefetch_motion(h, 1, pixel_shift, chroma_idc);
820
static av_always_inline void
821
hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
822
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
823
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
824
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
827
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
828
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
831
static av_always_inline void
832
hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
833
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
834
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
835
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
838
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
839
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
808
842
static void free_tables(H264Context *h, int free_rbsp){
1406
1450
assert(pics <= MAX_DELAYED_PIC_COUNT);
1408
1452
h->delayed_pic[pics++] = cur;
1409
if(cur->reference == 0)
1410
cur->reference = DELAYED_PIC_REF;
1453
if (cur->f.reference == 0)
1454
cur->f.reference = DELAYED_PIC_REF;
1456
/* Frame reordering. This code takes pictures from coding order and sorts
1457
* them by their incremental POC value into display order. It supports POC
1458
* gaps, MMCO reset codes and random resets.
1459
* A "display group" can start either with a IDR frame (f.key_frame = 1),
1460
* and/or can be closed down with a MMCO reset code. In sequences where
1461
* there is no delay, we can't detect that (since the frame was already
1462
* output to the user), so we also set h->mmco_reset to detect the MMCO
1464
* FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
1465
* we increase the delay between input and output. All frames affected by
1466
* the lag (e.g. those that should have been output before another frame
1467
* that we already returned to the user) will be dropped. This is a bug
1468
* that we will fix later. */
1469
for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
1470
cnt += out->poc < h->last_pocs[i];
1471
invalid += out->poc == INT_MIN;
1473
if (!h->mmco_reset && !cur->f.key_frame && cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
1476
h->delayed_pic[pics - 2]->mmco_reset = 2;
1478
if (h->mmco_reset || cur->f.key_frame) {
1479
for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1480
h->last_pocs[i] = INT_MIN;
1482
invalid = MAX_DELAYED_PIC_COUNT;
1412
1484
out = h->delayed_pic[0];
1414
for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
1486
for (i = 1; i < MAX_DELAYED_PIC_COUNT && h->delayed_pic[i] &&
1487
!h->delayed_pic[i-1]->mmco_reset && !h->delayed_pic[i]->f.key_frame; i++)
1415
1489
if(h->delayed_pic[i]->poc < out->poc){
1416
1490
out = h->delayed_pic[i];
1419
if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
1420
h->next_outputed_poc= INT_MIN;
1421
out_of_order = out->poc < h->next_outputed_poc;
1494
if (s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
1495
h->next_outputed_poc = INT_MIN;
1496
out_of_order = !out->f.key_frame && !h->mmco_reset && (out->poc < h->next_outputed_poc);
1423
1498
if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
1425
else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
1427
((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
1428
|| cur->pict_type == AV_PICTURE_TYPE_B)))
1500
else if (out_of_order && pics-1 == s->avctx->has_b_frames &&
1501
s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
1502
if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
1503
s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt);
1506
} else if (s->low_delay &&
1507
((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) ||
1508
cur->f.pict_type == AV_PICTURE_TYPE_B)) {
1430
1509
s->low_delay = 0;
1431
1510
s->avctx->has_b_frames++;
1434
if(out_of_order || pics > s->avctx->has_b_frames){
1435
out->reference &= ~DELAYED_PIC_REF;
1513
if(pics > s->avctx->has_b_frames){
1514
out->f.reference &= ~DELAYED_PIC_REF;
1436
1515
out->owner2 = s; // for frame threading, the owner must be the second field's thread
1437
1516
// or else the first thread can release the picture and reuse it unsafely
1438
1517
for(i=out_idx; h->delayed_pic[i]; i++)
1439
1518
h->delayed_pic[i] = h->delayed_pic[i+1];
1520
memmove(h->last_pocs, &h->last_pocs[1], sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
1521
h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
1441
1522
if(!out_of_order && pics > s->avctx->has_b_frames){
1442
1523
h->next_output_pic = out;
1443
if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
1444
h->next_outputed_poc = INT_MIN;
1446
h->next_outputed_poc = out->poc;
1524
if (out->mmco_reset) {
1526
h->next_outputed_poc = out->poc;
1527
h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
1529
h->next_outputed_poc = INT_MIN;
1532
if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
1533
h->next_outputed_poc = INT_MIN;
1535
h->next_outputed_poc = out->poc;
1448
1540
av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
3246
static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
3247
int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
3249
int b_stride = h->b_stride;
3250
int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
3251
int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
3252
if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
3253
if(USES_LIST(top_type, list)){
3254
const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
3255
const int b8_xy= 4*top_xy + 2;
3256
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3257
AV_COPY128(mv_dst - 1*8, s->current_picture.f.motion_val[list][b_xy + 0]);
3259
ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
3261
ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
3263
AV_ZERO128(mv_dst - 1*8);
3264
AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3267
if(!IS_INTERLACED(mb_type^left_type[LTOP])){
3268
if(USES_LIST(left_type[LTOP], list)){
3269
const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
3270
const int b8_xy= 4*left_xy[LTOP] + 1;
3271
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3272
AV_COPY32(mv_dst - 1 + 0, s->current_picture.f.motion_val[list][b_xy + b_stride*0]);
3273
AV_COPY32(mv_dst - 1 + 8, s->current_picture.f.motion_val[list][b_xy + b_stride*1]);
3274
AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride*2]);
3275
AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride*3]);
3277
ref_cache[-1 + 8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*0]];
3279
ref_cache[-1 + 24]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*1]];
3281
AV_ZERO32(mv_dst - 1 + 0);
3282
AV_ZERO32(mv_dst - 1 + 8);
3283
AV_ZERO32(mv_dst - 1 +16);
3284
AV_ZERO32(mv_dst - 1 +24);
3288
ref_cache[-1 + 24]= LIST_NOT_USED;
3293
if(!USES_LIST(mb_type, list)){
3294
fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
3295
AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3296
AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3297
AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3298
AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3303
int8_t *ref = &s->current_picture.f.ref_index[list][4*mb_xy];
3304
int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3305
uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
3306
uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
3307
AV_WN32A(&ref_cache[0*8], ref01);
3308
AV_WN32A(&ref_cache[1*8], ref01);
3309
AV_WN32A(&ref_cache[2*8], ref23);
3310
AV_WN32A(&ref_cache[3*8], ref23);
3314
int16_t (*mv_src)[2] = &s->current_picture.f.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
3315
AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
3316
AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
3317
AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
3318
AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
3058
* @return non zero if the loop filter can be skiped
3324
* @return non zero if the loop filter can be skipped
3060
3326
static int fill_filter_caches(H264Context *h, int mb_type){
3061
3327
MpegEncContext * const s = &h->s;
3062
3328
const int mb_xy= h->mb_xy;
3063
int top_xy, left_xy[2];
3064
int top_type, left_type[2];
3329
int top_xy, left_xy[LEFT_MBS];
3330
int top_type, left_type[LEFT_MBS];
3066
3334
top_xy = mb_xy - (s->mb_stride << MB_FIELD);
3068
//FIXME deblocking could skip the intra and nnz parts.
3070
3336
/* Wow, what a mess, why didn't they simplify the interlacing & intra
3071
3337
* stuff, I can't imagine that these complex rules are worth it. */
3073
left_xy[1] = left_xy[0] = mb_xy-1;
3339
left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
3074
3340
if(FRAME_MBAFF){
3075
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
3341
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
3076
3342
const int curr_mb_field_flag = IS_INTERLACED(mb_type);
3078
3344
if (left_mb_field_flag != curr_mb_field_flag) {
3079
left_xy[0] -= s->mb_stride;
3345
left_xy[LTOP] -= s->mb_stride;
3082
3348
if(curr_mb_field_flag){
3083
top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
3349
top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
3085
3351
if (left_mb_field_flag != curr_mb_field_flag) {
3086
left_xy[1] += s->mb_stride;
3352
left_xy[LBOT] += s->mb_stride;
3091
3357
h->top_mb_xy = top_xy;
3092
h->left_mb_xy[0] = left_xy[0];
3093
h->left_mb_xy[1] = left_xy[1];
3358
h->left_mb_xy[LTOP] = left_xy[LTOP];
3359
h->left_mb_xy[LBOT] = left_xy[LBOT];
3095
3361
//for sufficiently low qp, filtering wouldn't do anything
3096
3362
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
3097
3363
int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
3098
int qp = s->current_picture.qscale_table[mb_xy];
3364
int qp = s->current_picture.f.qscale_table[mb_xy];
3099
3365
if(qp <= qp_thresh
3100
&& (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
3101
&& (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
3366
&& (left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh)
3367
&& (top_xy < 0 || ((qp + s->current_picture.f.qscale_table[top_xy ] + 1) >> 1) <= qp_thresh)) {
3102
3368
if(!FRAME_MBAFF)
3104
if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
3105
&& (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
3370
if ((left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LBOT] ] + 1) >> 1) <= qp_thresh) &&
3371
(top_xy < s->mb_stride || ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
3110
top_type = s->current_picture.mb_type[top_xy] ;
3111
left_type[0] = s->current_picture.mb_type[left_xy[0]];
3112
left_type[1] = s->current_picture.mb_type[left_xy[1]];
3376
top_type = s->current_picture.f.mb_type[top_xy];
3377
left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
3378
left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
3113
3379
if(h->deblocking_filter == 2){
3114
if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
3115
if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
3380
if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
3381
if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
3117
if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
3118
if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
3383
if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
3384
if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
3120
h->top_type = top_type ;
3121
h->left_type[0]= left_type[0];
3122
h->left_type[1]= left_type[1];
3386
h->top_type = top_type;
3387
h->left_type[LTOP]= left_type[LTOP];
3388
h->left_type[LBOT]= left_type[LBOT];
3124
3390
if(IS_INTRA(mb_type))
3127
AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
3128
AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
3129
AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
3130
AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
3393
fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
3394
if(h->list_count == 2)
3395
fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
3397
nnz = h->non_zero_count[mb_xy];
3398
nnz_cache = h->non_zero_count_cache;
3399
AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
3400
AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
3401
AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
3402
AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
3132
3403
h->cbp= h->cbp_table[mb_xy];
3136
for(list=0; list<h->list_count; list++){
3139
int16_t (*mv_dst)[2];
3140
int16_t (*mv_src)[2];
3142
if(!USES_LIST(mb_type, list)){
3143
fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
3144
AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3145
AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3146
AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3147
AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3151
ref = &s->current_picture.ref_index[list][4*mb_xy];
3153
int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3154
AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3155
AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3157
AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3158
AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3161
b_stride = h->b_stride;
3162
mv_dst = &h->mv_cache[list][scan8[0]];
3163
mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
3165
AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
3180
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
3182
AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
3406
nnz = h->non_zero_count[top_xy];
3407
AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
3186
h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
3187
h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
3188
h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
3189
h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
3410
if(left_type[LTOP]){
3411
nnz = h->non_zero_count[left_xy[LTOP]];
3412
nnz_cache[3+8*1]= nnz[3+0*4];
3413
nnz_cache[3+8*2]= nnz[3+1*4];
3414
nnz_cache[3+8*3]= nnz[3+2*4];
3415
nnz_cache[3+8*4]= nnz[3+3*4];
3192
3418
// CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
3193
3419
if(!CABAC && h->pps.transform_8x8_mode){
3194
3420
if(IS_8x8DCT(top_type)){
3195
h->non_zero_count_cache[4+8*0]=
3196
h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
3197
h->non_zero_count_cache[6+8*0]=
3198
h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
3200
if(IS_8x8DCT(left_type[0])){
3201
h->non_zero_count_cache[3+8*1]=
3202
h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
3204
if(IS_8x8DCT(left_type[1])){
3205
h->non_zero_count_cache[3+8*3]=
3206
h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
3422
nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
3424
nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
3426
if(IS_8x8DCT(left_type[LTOP])){
3428
nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
3430
if(IS_8x8DCT(left_type[LBOT])){
3432
nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
3209
3435
if(IS_8x8DCT(mb_type)){
3210
h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
3211
h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12;
3213
h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
3214
h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
3216
h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
3217
h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
3219
h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
3220
h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
3224
if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
3226
for(list=0; list<h->list_count; list++){
3227
if(USES_LIST(top_type, list)){
3228
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
3229
const int b8_xy= 4*top_xy + 2;
3230
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3231
AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
3232
h->ref_cache[list][scan8[0] + 0 - 1*8]=
3233
h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
3234
h->ref_cache[list][scan8[0] + 2 - 1*8]=
3235
h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
3237
AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
3238
AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3241
if(!IS_INTERLACED(mb_type^left_type[0])){
3242
if(USES_LIST(left_type[0], list)){
3243
const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
3244
const int b8_xy= 4*left_xy[0] + 1;
3245
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3246
AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
3247
AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
3248
AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
3249
AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
3250
h->ref_cache[list][scan8[0] - 1 + 0 ]=
3251
h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
3252
h->ref_cache[list][scan8[0] - 1 +16 ]=
3253
h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
3255
AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
3256
AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
3257
AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
3258
AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
3259
h->ref_cache[list][scan8[0] - 1 + 0 ]=
3260
h->ref_cache[list][scan8[0] - 1 + 8 ]=
3261
h->ref_cache[list][scan8[0] - 1 + 16 ]=
3262
h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
3436
nnz_cache[scan8[0 ]]= nnz_cache[scan8[1 ]]=
3437
nnz_cache[scan8[2 ]]= nnz_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12;
3439
nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
3440
nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
3442
nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
3443
nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
3445
nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
3446
nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;