368
364
int direct_spatial_mv_pred;
370
366
int col_fieldoff;
371
int dist_scale_factor[16];
367
int dist_scale_factor[32];
372
368
int dist_scale_factor_field[2][32];
373
int map_col_to_list0[2][16+32];
374
int map_col_to_list0_field[2][2][16+32];
369
int map_col_to_list0[2][16 + 32];
370
int map_col_to_list0_field[2][2][16 + 32];
377
373
* num_ref_idx_l0/1_active_minus1 + 1
379
unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
375
unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
380
376
unsigned int list_count;
381
uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type
382
Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs.
383
Reordered version of default_ref_list
384
according to picture reordering in slice header */
385
int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
377
uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type
378
Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs.
379
* Reordered version of default_ref_list
380
* according to picture reordering in slice header */
381
int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
388
384
GetBitContext intra_gb;
389
385
GetBitContext inter_gb;
390
386
GetBitContext *intra_gb_ptr;
391
387
GetBitContext *inter_gb_ptr;
393
DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
394
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
395
DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
389
DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
390
DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2];
391
DCTELEM mb_padding[256 * 2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
400
396
CABACContext cabac;
401
uint8_t cabac_state[1024];
397
uint8_t cabac_state[1024];
403
/* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
399
/* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */
408
404
/* chroma_pred_mode for i4x4 or i16x16, else 0 */
409
uint8_t *chroma_pred_mode_table;
410
int last_qscale_diff;
411
uint8_t (*mvd_table[2])[2];
412
DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
413
uint8_t *direct_table;
414
uint8_t direct_cache[5*8];
405
uint8_t *chroma_pred_mode_table;
406
int last_qscale_diff;
407
uint8_t (*mvd_table[2])[2];
408
DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2];
409
uint8_t *direct_table;
410
uint8_t direct_cache[5 * 8];
416
412
uint8_t zigzag_scan[16];
417
413
uint8_t zigzag_scan8x8[64];
728
733
#define LUMA_DC_BLOCK_INDEX 48
729
734
#define CHROMA_DC_BLOCK_INDEX 49
731
//This table must be here because scan8[constant] must be known at compiletime
732
static const uint8_t scan8[16*3 + 3]={
733
4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
734
6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
735
4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
736
6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
737
4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
738
6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
739
4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
740
6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
741
4+11*8, 5+11*8, 4+12*8, 5+12*8,
742
6+11*8, 7+11*8, 6+12*8, 7+12*8,
743
4+13*8, 5+13*8, 4+14*8, 5+14*8,
744
6+13*8, 7+13*8, 6+14*8, 7+14*8,
745
0+ 0*8, 0+ 5*8, 0+10*8
736
// This table must be here because scan8[constant] must be known at compiletime
737
static const uint8_t scan8[16 * 3 + 3] = {
738
4 + 1 * 8, 5 + 1 * 8, 4 + 2 * 8, 5 + 2 * 8,
739
6 + 1 * 8, 7 + 1 * 8, 6 + 2 * 8, 7 + 2 * 8,
740
4 + 3 * 8, 5 + 3 * 8, 4 + 4 * 8, 5 + 4 * 8,
741
6 + 3 * 8, 7 + 3 * 8, 6 + 4 * 8, 7 + 4 * 8,
742
4 + 6 * 8, 5 + 6 * 8, 4 + 7 * 8, 5 + 7 * 8,
743
6 + 6 * 8, 7 + 6 * 8, 6 + 7 * 8, 7 + 7 * 8,
744
4 + 8 * 8, 5 + 8 * 8, 4 + 9 * 8, 5 + 9 * 8,
745
6 + 8 * 8, 7 + 8 * 8, 6 + 9 * 8, 7 + 9 * 8,
746
4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8,
747
6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8,
748
4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8,
749
6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8,
750
0 + 0 * 8, 0 + 5 * 8, 0 + 10 * 8
748
static av_always_inline uint32_t pack16to32(int a, int b){
753
static av_always_inline uint32_t pack16to32(int a, int b)
749
755
#if HAVE_BIGENDIAN
750
return (b&0xFFFF) + (a<<16);
756
return (b & 0xFFFF) + (a << 16);
752
return (a&0xFFFF) + (b<<16);
758
return (a & 0xFFFF) + (b << 16);
756
static av_always_inline uint16_t pack8to16(int a, int b){
762
static av_always_inline uint16_t pack8to16(int a, int b)
757
764
#if HAVE_BIGENDIAN
758
return (b&0xFF) + (a<<8);
765
return (b & 0xFF) + (a << 8);
760
return (a&0xFF) + (b<<8);
767
return (a & 0xFF) + (b << 8);
765
772
* Get the chroma qp.
767
static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
774
static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale)
768
776
return h->pps.chroma_qp_table[t][qscale];
772
780
* Get the predicted intra4x4 prediction mode.
774
static av_always_inline int pred_intra_mode(H264Context *h, int n){
775
const int index8= scan8[n];
776
const int left= h->intra4x4_pred_mode_cache[index8 - 1];
777
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
778
const int min= FFMIN(left, top);
780
tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
782
if(min<0) return DC_PRED;
786
static av_always_inline void write_back_intra_pred_mode(H264Context *h){
787
int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
788
int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
790
AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
791
i4x4[4]= i4x4_cache[7+8*3];
792
i4x4[5]= i4x4_cache[7+8*2];
793
i4x4[6]= i4x4_cache[7+8*1];
796
static av_always_inline void write_back_non_zero_count(H264Context *h){
797
const int mb_xy= h->mb_xy;
798
uint8_t *nnz = h->non_zero_count[mb_xy];
782
static av_always_inline int pred_intra_mode(H264Context *h, int n)
784
const int index8 = scan8[n];
785
const int left = h->intra4x4_pred_mode_cache[index8 - 1];
786
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
787
const int min = FFMIN(left, top);
789
tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min);
797
static av_always_inline void write_back_intra_pred_mode(H264Context *h)
799
int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
800
int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
802
AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
803
i4x4[4] = i4x4_cache[7 + 8 * 3];
804
i4x4[5] = i4x4_cache[7 + 8 * 2];
805
i4x4[6] = i4x4_cache[7 + 8 * 1];
808
static av_always_inline void write_back_non_zero_count(H264Context *h)
810
const int mb_xy = h->mb_xy;
811
uint8_t *nnz = h->non_zero_count[mb_xy];
799
812
uint8_t *nnz_cache = h->non_zero_count_cache;
801
AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
802
AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
803
AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
804
AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
805
AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
806
AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
807
AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
808
AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
814
AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]);
815
AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]);
816
AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]);
817
AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]);
818
AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]);
819
AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]);
820
AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]);
821
AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]);
810
if(!h->s.chroma_y_shift){
811
AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
812
AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
813
AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
814
AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
823
if (!h->s.chroma_y_shift) {
824
AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]);
825
AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]);
826
AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]);
827
AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]);
818
static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
819
int b_xy, int b8_xy, int mb_type, int list )
831
static av_always_inline void write_back_motion_list(H264Context *h,
832
MpegEncContext *const s,
835
int mb_type, int list)
821
int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
822
int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
823
AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
824
AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
825
AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
826
AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
828
uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
829
uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
837
int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
838
int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]];
839
AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0);
840
AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1);
841
AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2);
842
AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3);
844
uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy
845
: h->mb2br_xy[h->mb_xy]];
846
uint8_t(*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
847
if (IS_SKIP(mb_type)) {
831
848
AV_ZERO128(mvd_dst);
833
AV_COPY64(mvd_dst, mvd_src + 8*3);
834
AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
835
AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
836
AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
850
AV_COPY64(mvd_dst, mvd_src + 8 * 3);
851
AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0);
852
AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1);
853
AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2);
841
858
int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy];
842
859
int8_t *ref_cache = h->ref_cache[list];
843
ref_index[0+0*2]= ref_cache[scan8[0]];
844
ref_index[1+0*2]= ref_cache[scan8[4]];
845
ref_index[0+1*2]= ref_cache[scan8[8]];
846
ref_index[1+1*2]= ref_cache[scan8[12]];
860
ref_index[0 + 0 * 2] = ref_cache[scan8[0]];
861
ref_index[1 + 0 * 2] = ref_cache[scan8[4]];
862
ref_index[0 + 1 * 2] = ref_cache[scan8[8]];
863
ref_index[1 + 1 * 2] = ref_cache[scan8[12]];
850
static av_always_inline void write_back_motion(H264Context *h, int mb_type){
851
MpegEncContext * const s = &h->s;
852
const int b_stride = h->b_stride;
853
const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
854
const int b8_xy= 4*h->mb_xy;
867
static av_always_inline void write_back_motion(H264Context *h, int mb_type)
869
MpegEncContext *const s = &h->s;
870
const int b_stride = h->b_stride;
871
const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy
872
const int b8_xy = 4 * h->mb_xy;
856
if(USES_LIST(mb_type, 0)){
874
if (USES_LIST(mb_type, 0)) {
857
875
write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
859
877
fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy],
860
878
2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
862
if(USES_LIST(mb_type, 1)){
880
if (USES_LIST(mb_type, 1))
863
881
write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
866
if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
868
uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
869
direct_table[1] = h->sub_mb_type[1]>>1;
870
direct_table[2] = h->sub_mb_type[2]>>1;
871
direct_table[3] = h->sub_mb_type[3]>>1;
883
if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) {
884
if (IS_8X8(mb_type)) {
885
uint8_t *direct_table = &h->direct_table[4 * h->mb_xy];
886
direct_table[1] = h->sub_mb_type[1] >> 1;
887
direct_table[2] = h->sub_mb_type[2] >> 1;
888
direct_table[3] = h->sub_mb_type[3] >> 1;
876
static av_always_inline int get_dct8x8_allowed(H264Context *h){
877
if(h->sps.direct_8x8_inference_flag)
878
return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
893
static av_always_inline int get_dct8x8_allowed(H264Context *h)
895
if (h->sps.direct_8x8_inference_flag)
896
return !(AV_RN64A(h->sub_mb_type) &
897
((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) *
898
0x0001000100010001ULL));
880
return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
900
return !(AV_RN64A(h->sub_mb_type) &
901
((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) *
902
0x0001000100010001ULL));
883
905
#endif /* AVCODEC_H264_H */