379
385
PutBitContext pb, vs_pb;
380
386
GetBitContext gb;
381
387
BlockInfo mb_data[5 * 6], *mb, *mb1;
382
DCTELEM sblock[5*6][64] __align8;
383
uint8_t mb_bit_buffer[80 + 4] __align8; /* allow some slack */
384
uint8_t vs_bit_buffer[5 * 80 + 4] __align8; /* allow some slack */
388
DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]);
389
DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */
390
DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */
385
391
const int log2_blocksize= 3-s->avctx->lowres;
387
393
assert((((int)mb_bit_buffer)&7)==0);
488
495
v = *mb_pos_ptr++;
491
y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize);
492
if (s->sys->pix_fmt == PIX_FMT_YUV411P)
498
if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
499
y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<<log2_blocksize);
493
500
c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
495
c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize);
501
} else { /* 4:1:1 or 4:2:0 */
502
y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize);
503
if (s->sys->pix_fmt == PIX_FMT_YUV411P)
504
c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
506
c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize);
496
508
for(j = 0;j < 6; j++) {
497
509
idct_put = s->idct_put[mb->dct_mode && log2_blocksize==3];
499
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
500
/* NOTE: at end of line, the macroblock is handled as 420 */
501
idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block);
503
idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize),
510
if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
511
if (j == 0 || j == 2) {
513
idct_put(y_ptr + ((j >> 1)<<log2_blocksize),
504
514
s->picture.linesize[0], block);
507
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
508
uint64_t aligned_pixels[64/8];
509
uint8_t *pixels= (uint8_t*)aligned_pixels;
510
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
512
/* NOTE: at end of line, the macroblock is handled as 420 */
513
idct_put(pixels, 8, block);
514
linesize = s->picture.linesize[6 - j];
515
c_ptr = s->picture.data[6 - j] + c_offset;
517
for(y = 0;y < (1<<log2_blocksize); y++) {
518
ptr1= ptr + (1<<(log2_blocksize-1));
519
c_ptr1 = c_ptr + (linesize<<log2_blocksize);
520
for(x=0; x < (1<<(log2_blocksize-1)); x++){
521
c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x];
527
/* don't ask me why they inverted Cb and Cr ! */
528
517
idct_put(s->picture.data[6 - j] + c_offset,
529
518
s->picture.linesize[6 - j], block);
520
/* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */
521
} else { /* 4:1:1 or 4:2:0 */
523
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
524
/* NOTE: at end of line, the macroblock is handled as 420 */
525
idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block);
527
idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize),
528
s->picture.linesize[0], block);
531
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
532
uint64_t aligned_pixels[64/8];
533
uint8_t *pixels= (uint8_t*)aligned_pixels;
534
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
536
/* NOTE: at end of line, the macroblock is handled as 420 */
537
idct_put(pixels, 8, block);
538
linesize = s->picture.linesize[6 - j];
539
c_ptr = s->picture.data[6 - j] + c_offset;
541
for(y = 0;y < (1<<log2_blocksize); y++) {
542
ptr1= ptr + (1<<(log2_blocksize-1));
543
c_ptr1 = c_ptr + (linesize<<log2_blocksize);
544
for(x=0; x < (1<<(log2_blocksize-1)); x++){
545
c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x];
551
/* don't ask me why they inverted Cb and Cr ! */
552
idct_put(s->picture.data[6 - j] + c_offset,
553
s->picture.linesize[6 - j], block);
650
675
static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
651
const uint8_t* zigzag_scan, int bias)
676
const uint8_t* zigzag_scan, const int *weight, int bias)
679
/* We offer two different methods for class number assignment: the
680
method suggested in SMPTE 314M Table 22, and an improved
681
method. The SMPTE method is very conservative; it assigns class
682
3 (i.e. severe quantization) to any block where the largest AC
683
component is greater than 36. ffmpeg's DV encoder tracks AC bit
684
consumption precisely, so there is no need to bias most blocks
685
towards strongly lossy compression. Instead, we assign class 2
686
to most blocks, and use class 3 only when strictly necessary
687
(for blocks whose largest AC component exceeds 255). */
689
#if 0 /* SMPTE spec method */
654
690
static const int classes[] = {12, 24, 36, 0xffff};
691
#else /* improved ffmpeg method */
692
static const int classes[] = {-1, -1, 255, 0xffff};
658
697
bi->mb[0] = blk[0];
745
789
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
747
791
prev= b->prev[a];
792
assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
748
793
for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
751
796
b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
799
if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
800
for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++)
803
assert(b->mb[b->next[k]]);
804
b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
805
-dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]);
806
assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k));
754
809
b->next[prev] = b->next[k];
759
814
size[i] += b->bit_size[a];
817
if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4])
763
} while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
764
(qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
820
} while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]);
823
for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){
825
size[0] = 5*6*4; //EOB
826
for (j=0; j<6*5; j++, b++) {
828
for (k= b->next[prev]; k<64; k= b->next[k]) {
829
if(b->mb[k] < a && b->mb[k] > -a){
830
b->next[prev] = b->next[k];
832
size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
768
* This is a very rough initial implementaion. The performance is
769
* horrible and the weighting is missing. But it's missing from the
770
* decoding step also -- so at least we're on the same page with decoder ;-)
772
840
static inline void dv_encode_video_segment(DVVideoContext *s,
774
842
const uint16_t *mb_pos_ptr)
795
863
v = *mb_pos_ptr++;
798
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
799
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
800
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
801
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
866
if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
867
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4);
869
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
871
if (s->sys->pix_fmt == PIX_FMT_YUV420P) {
872
c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
873
} else { /* 4:2:2 or 4:1:1 */
874
c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8));
802
876
do_edge_wrap = 0;
803
877
qnos[mb_index] = 15; /* No quantization */
804
878
ptr = dif + mb_index*80 + 4;
805
879
for(j = 0;j < 6; j++) {
806
if (j < 4) { /* Four Y blocks */
807
/* NOTE: at end of line, the macroblock is handled as 420 */
808
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
809
data = y_ptr + (j * 8);
881
if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
882
if (j == 0 || j == 2) {
884
data = y_ptr + ((j>>1) * 8);
885
linesize = s->picture.linesize[0];
888
data = s->picture.data[6 - j] + c_offset;
889
linesize = s->picture.linesize[6 - j];
811
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
813
linesize = s->picture.linesize[0];
814
} else { /* Cr and Cb blocks */
815
/* don't ask Fabrice why they inverted Cb and Cr ! */
816
data = s->picture.data[6 - j] + c_offset;
817
linesize = s->picture.linesize[6 - j];
818
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
891
/* j=1 and j=3 are "dummy" blocks, used for AC data only */
896
} else { /* 4:1:1 or 4:2:0 */
897
if (j < 4) { /* Four Y blocks */
898
/* NOTE: at end of line, the macroblock is handled as 420 */
899
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
900
data = y_ptr + (j * 8);
902
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
904
linesize = s->picture.linesize[0];
905
} else { /* Cr and Cb blocks */
906
/* don't ask Fabrice why they inverted Cb and Cr ! */
907
data = s->picture.data[6 - j] + c_offset;
908
linesize = s->picture.linesize[6 - j];
909
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
822
914
/* Everything is set up -- now just copy data -> DCT block */
843
936
enc_blk->partial_bit_buffer = 0;
844
937
enc_blk->cur_ac = 0;
846
s->fdct[enc_blk->dct_mode](block);
940
/* We rely on the fact that encoding all zeros leads to an immediate EOB,
941
which is precisely what the spec calls for in the "dummy" blocks. */
942
memset(block, 0, sizeof(block));
944
s->fdct[enc_blk->dct_mode](block);
848
947
dv_set_class_number(block, enc_blk,
849
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
948
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct,
949
enc_blk->dct_mode ? dv_weight_248 : dv_weight_88,
851
952
init_put_bits(pb, ptr, block_sizes[j]/8);
852
953
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
897
1000
DVVideoContext *s = avctx->priv_data;
898
1001
int slice = (size_t)sl;
899
dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
1003
/* which DIF channel is this? */
1004
int chan = slice / (s->sys->difseg_size * 27);
1006
/* slice within the DIF channel */
1007
int chan_slice = slice % (s->sys->difseg_size * 27);
1009
/* byte offset of this channel's data */
1010
int chan_offset = chan * s->sys->difseg_size * 150 * 80;
1012
dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
900
1013
&s->sys->video_place[slice*5]);
906
1019
DVVideoContext *s = avctx->priv_data;
907
1020
int slice = (size_t)sl;
908
dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
1022
/* which DIF channel is this? */
1023
int chan = slice / (s->sys->difseg_size * 27);
1025
/* slice within the DIF channel */
1026
int chan_slice = slice % (s->sys->difseg_size * 27);
1028
/* byte offset of this channel's data */
1029
int chan_offset = chan * s->sys->difseg_size * 150 * 80;
1031
dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
909
1032
&s->sys->video_place[slice*5]);
913
1036
/* NOTE: exactly one frame must be given (120000 bytes for NTSC,
914
144000 bytes for PAL) */
1037
144000 bytes for PAL - or twice those for 50Mbps) */
915
1038
static int dvvideo_decode_frame(AVCodecContext *avctx,
916
1039
void *data, int *data_size,
917
1040
uint8_t *buf, int buf_size)
970
1093
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
971
s->sys->difseg_size * 27);
1094
s->sys->n_difchan * s->sys->difseg_size * 27);
1098
/* Fill in just enough of the header for dv_frame_profile() to
1099
return the correct result, so that the frame can be decoded
1100
correctly. The rest of the metadata is filled in by the dvvideo
1101
avformat. (this should probably change so that encode_frame()
1102
fills in ALL of the metadata - e.g. for Quicktime-wrapped DV
1105
/* NTSC/PAL format */
1106
buf[3] = s->sys->dsf ? 0x80 : 0x00;
1108
/* 25Mbps or 50Mbps */
1109
buf[80*5 + 48 + 3] = (s->sys->pix_fmt == PIX_FMT_YUV422P) ? 0x4 : 0x0;
974
1111
return s->sys->frame_size;