~ubuntu-branches/ubuntu/saucy/gst-libav1.0/saucy-proposed

« back to all changes in this revision

Viewing changes to gst-libs/ext/libav/libavcodec/svq1enc.c

  • Committer: Package Import Robot
  • Author(s): Sebastian Dröge
  • Date: 2013-07-30 09:00:15 UTC
  • mfrom: (1.1.16) (7.1.7 experimental)
  • Revision ID: package-import@ubuntu.com-20130730090015-sc1ou2yssu7q5w4e
Tags: 1.1.3-1
* New upstream development snapshot:
  + debian/control:
    - Build depend on GStreamer and gst-plugins-base >= 1.1.3.

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
 *   http://www.pcisys.net/~melanson/codecs/
27
27
 */
28
28
 
29
 
 
30
29
#include "avcodec.h"
31
30
#include "dsputil.h"
32
31
#include "mpegvideo.h"
33
32
#include "h263.h"
34
33
#include "internal.h"
35
 
 
36
34
#include "svq1.h"
37
35
#include "svq1enc_cb.h"
38
36
 
39
37
#undef NDEBUG
40
38
#include <assert.h>
41
39
 
42
 
 
43
40
typedef struct SVQ1Context {
44
 
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 
41
    /* FIXME: Needed for motion estimation, should not be used for anything
 
42
     * else, the idea is to make the motion estimation eventually independent
 
43
     * of MpegEncContext, so this will be removed then. */
 
44
    MpegEncContext m;
45
45
    AVCodecContext *avctx;
46
46
    DSPContext dsp;
47
47
    AVFrame picture;
50
50
    PutBitContext pb;
51
51
    GetBitContext gb;
52
52
 
53
 
    PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
 
53
    /* why ooh why this sick breadth first order,
 
54
     * everything is slower and more complex */
 
55
    PutBitContext reorder_pb[6];
54
56
 
55
57
    int frame_width;
56
58
    int frame_height;
87
89
    put_bits(&s->pb, 2, frame_type - 1);
88
90
 
89
91
    if (frame_type == AV_PICTURE_TYPE_I) {
90
 
 
91
92
        /* no checksum since frame code is 0x20 */
92
 
 
93
93
        /* no embedded string either */
94
 
 
95
94
        /* output 5 unknown bits (2 + 2 + 1) */
96
95
        put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
97
96
 
98
 
        i= ff_match_2uint16(ff_svq1_frame_size_table, FF_ARRAY_ELEMS(ff_svq1_frame_size_table), s->frame_width, s->frame_height);
 
97
        i = ff_match_2uint16(ff_svq1_frame_size_table,
 
98
                             FF_ARRAY_ELEMS(ff_svq1_frame_size_table),
 
99
                             s->frame_width, s->frame_height);
99
100
        put_bits(&s->pb, 3, i);
100
101
 
101
 
        if (i == 7)
102
 
        {
103
 
                put_bits(&s->pb, 12, s->frame_width);
104
 
                put_bits(&s->pb, 12, s->frame_height);
 
102
        if (i == 7) {
 
103
            put_bits(&s->pb, 12, s->frame_width);
 
104
            put_bits(&s->pb, 12, s->frame_height);
105
105
        }
106
106
    }
107
107
 
109
109
    put_bits(&s->pb, 2, 0);
110
110
}
111
111
 
112
 
 
113
 
#define QUALITY_THRESHOLD 100
 
112
#define QUALITY_THRESHOLD    100
114
113
#define THRESHOLD_MULTIPLIER 0.6
115
114
 
116
 
static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 
115
static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref,
 
116
                        uint8_t *decoded, int stride, int level,
 
117
                        int threshold, int lambda, int intra)
 
118
{
117
119
    int count, y, x, i, j, split, best_mean, best_score, best_count;
118
120
    int best_vector[6];
119
 
    int block_sum[7]= {0, 0, 0, 0, 0, 0};
120
 
    int w= 2<<((level+2)>>1);
121
 
    int h= 2<<((level+1)>>1);
122
 
    int size=w*h;
 
121
    int block_sum[7] = { 0, 0, 0, 0, 0, 0 };
 
122
    int w            = 2 << (level + 2 >> 1);
 
123
    int h            = 2 << (level + 1 >> 1);
 
124
    int size         = w * h;
123
125
    int16_t block[7][256];
124
126
    const int8_t *codebook_sum, *codebook;
125
 
    const uint16_t (*mean_vlc)[2];
126
 
    const uint8_t (*multistage_vlc)[2];
 
127
    const uint16_t(*mean_vlc)[2];
 
128
    const uint8_t(*multistage_vlc)[2];
127
129
 
128
 
    best_score=0;
129
 
    //FIXME optimize, this doenst need to be done multiple times
130
 
    if(intra){
131
 
        codebook_sum= svq1_intra_codebook_sum[level];
132
 
        codebook= ff_svq1_intra_codebooks[level];
133
 
        mean_vlc= ff_svq1_intra_mean_vlc;
134
 
        multistage_vlc= ff_svq1_intra_multistage_vlc[level];
135
 
        for(y=0; y<h; y++){
136
 
            for(x=0; x<w; x++){
137
 
                int v= src[x + y*stride];
138
 
                block[0][x + w*y]= v;
139
 
                best_score += v*v;
140
 
                block_sum[0] += v;
 
130
    best_score = 0;
 
131
    // FIXME: Optimize, this does not need to be done multiple times.
 
132
    if (intra) {
 
133
        codebook_sum   = svq1_intra_codebook_sum[level];
 
134
        codebook       = ff_svq1_intra_codebooks[level];
 
135
        mean_vlc       = ff_svq1_intra_mean_vlc;
 
136
        multistage_vlc = ff_svq1_intra_multistage_vlc[level];
 
137
        for (y = 0; y < h; y++) {
 
138
            for (x = 0; x < w; x++) {
 
139
                int v = src[x + y * stride];
 
140
                block[0][x + w * y] = v;
 
141
                best_score         += v * v;
 
142
                block_sum[0]       += v;
141
143
            }
142
144
        }
143
 
    }else{
144
 
        codebook_sum= svq1_inter_codebook_sum[level];
145
 
        codebook= ff_svq1_inter_codebooks[level];
146
 
        mean_vlc= ff_svq1_inter_mean_vlc + 256;
147
 
        multistage_vlc= ff_svq1_inter_multistage_vlc[level];
148
 
        for(y=0; y<h; y++){
149
 
            for(x=0; x<w; x++){
150
 
                int v= src[x + y*stride] - ref[x + y*stride];
151
 
                block[0][x + w*y]= v;
152
 
                best_score += v*v;
153
 
                block_sum[0] += v;
 
145
    } else {
 
146
        codebook_sum   = svq1_inter_codebook_sum[level];
 
147
        codebook       = ff_svq1_inter_codebooks[level];
 
148
        mean_vlc       = ff_svq1_inter_mean_vlc + 256;
 
149
        multistage_vlc = ff_svq1_inter_multistage_vlc[level];
 
150
        for (y = 0; y < h; y++) {
 
151
            for (x = 0; x < w; x++) {
 
152
                int v = src[x + y * stride] - ref[x + y * stride];
 
153
                block[0][x + w * y] = v;
 
154
                best_score         += v * v;
 
155
                block_sum[0]       += v;
154
156
            }
155
157
        }
156
158
    }
157
159
 
158
 
    best_count=0;
159
 
    best_score -= (int)(((unsigned)block_sum[0]*block_sum[0])>>(level+3));
160
 
    best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 
160
    best_count  = 0;
 
161
    best_score -= (int)((unsigned)block_sum[0] * block_sum[0] >> (level + 3));
 
162
    best_mean   = block_sum[0] + (size >> 1) >> (level + 3);
161
163
 
162
 
    if(level<4){
163
 
        for(count=1; count<7; count++){
164
 
            int best_vector_score= INT_MAX;
165
 
            int best_vector_sum=-999, best_vector_mean=-999;
166
 
            const int stage= count-1;
 
164
    if (level < 4) {
 
165
        for (count = 1; count < 7; count++) {
 
166
            int best_vector_score = INT_MAX;
 
167
            int best_vector_sum   = -999, best_vector_mean = -999;
 
168
            const int stage       = count - 1;
167
169
            const int8_t *vector;
168
170
 
169
 
            for(i=0; i<16; i++){
170
 
                int sum= codebook_sum[stage*16 + i];
 
171
            for (i = 0; i < 16; i++) {
 
172
                int sum = codebook_sum[stage * 16 + i];
171
173
                int sqr, diff, score;
172
174
 
173
 
                vector = codebook + stage*size*16 + i*size;
174
 
                sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
175
 
                diff= block_sum[stage] - sum;
176
 
                score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
177
 
                if(score < best_vector_score){
178
 
                    int mean= (diff + (size>>1)) >> (level+3);
179
 
                    assert(mean >-300 && mean<300);
180
 
                    mean= av_clip(mean, intra?0:-256, 255);
181
 
                    best_vector_score= score;
182
 
                    best_vector[stage]= i;
183
 
                    best_vector_sum= sum;
184
 
                    best_vector_mean= mean;
 
175
                vector = codebook + stage * size * 16 + i * size;
 
176
                sqr    = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
 
177
                diff   = block_sum[stage] - sum;
 
178
                score  = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
 
179
                if (score < best_vector_score) {
 
180
                    int mean = diff + (size >> 1) >> (level + 3);
 
181
                    assert(mean > -300 && mean < 300);
 
182
                    mean               = av_clip(mean, intra ? 0 : -256, 255);
 
183
                    best_vector_score  = score;
 
184
                    best_vector[stage] = i;
 
185
                    best_vector_sum    = sum;
 
186
                    best_vector_mean   = mean;
185
187
                }
186
188
            }
187
189
            assert(best_vector_mean != -999);
188
 
            vector= codebook + stage*size*16 + best_vector[stage]*size;
189
 
            for(j=0; j<size; j++){
190
 
                block[stage+1][j] = block[stage][j] - vector[j];
191
 
            }
192
 
            block_sum[stage+1]= block_sum[stage] - best_vector_sum;
193
 
            best_vector_score +=
194
 
                lambda*(+ 1 + 4*count
195
 
                        + multistage_vlc[1+count][1]
196
 
                        + mean_vlc[best_vector_mean][1]);
 
190
            vector = codebook + stage * size * 16 + best_vector[stage] * size;
 
191
            for (j = 0; j < size; j++)
 
192
                block[stage + 1][j] = block[stage][j] - vector[j];
 
193
            block_sum[stage + 1] = block_sum[stage] - best_vector_sum;
 
194
            best_vector_score   += lambda *
 
195
                                   (+1 + 4 * count +
 
196
                                    multistage_vlc[1 + count][1]
 
197
                                    + mean_vlc[best_vector_mean][1]);
197
198
 
198
 
            if(best_vector_score < best_score){
199
 
                best_score= best_vector_score;
200
 
                best_count= count;
201
 
                best_mean= best_vector_mean;
 
199
            if (best_vector_score < best_score) {
 
200
                best_score = best_vector_score;
 
201
                best_count = count;
 
202
                best_mean  = best_vector_mean;
202
203
            }
203
204
        }
204
205
    }
205
206
 
206
 
    split=0;
207
 
    if(best_score > threshold && level){
208
 
        int score=0;
209
 
        int offset= (level&1) ? stride*h/2 : w/2;
 
207
    split = 0;
 
208
    if (best_score > threshold && level) {
 
209
        int score  = 0;
 
210
        int offset = level & 1 ? stride * h / 2 : w / 2;
210
211
        PutBitContext backup[6];
211
212
 
212
 
        for(i=level-1; i>=0; i--){
213
 
            backup[i]= s->reorder_pb[i];
214
 
        }
215
 
        score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
216
 
        score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
 
213
        for (i = level - 1; i >= 0; i--)
 
214
            backup[i] = s->reorder_pb[i];
 
215
        score += encode_block(s, src, ref, decoded, stride, level - 1,
 
216
                              threshold >> 1, lambda, intra);
 
217
        score += encode_block(s, src + offset, ref + offset, decoded + offset,
 
218
                              stride, level - 1, threshold >> 1, lambda, intra);
217
219
        score += lambda;
218
220
 
219
 
        if(score < best_score){
220
 
            best_score= score;
221
 
            split=1;
222
 
        }else{
223
 
            for(i=level-1; i>=0; i--){
224
 
                s->reorder_pb[i]= backup[i];
225
 
            }
 
221
        if (score < best_score) {
 
222
            best_score = score;
 
223
            split      = 1;
 
224
        } else {
 
225
            for (i = level - 1; i >= 0; i--)
 
226
                s->reorder_pb[i] = backup[i];
226
227
        }
227
228
    }
228
229
    if (level > 0)
229
230
        put_bits(&s->reorder_pb[level], 1, split);
230
231
 
231
 
    if(!split){
232
 
        assert((best_mean >= 0 && best_mean<256) || !intra);
233
 
        assert(best_mean >= -256 && best_mean<256);
234
 
        assert(best_count >=0 && best_count<7);
235
 
        assert(level<4 || best_count==0);
 
232
    if (!split) {
 
233
        assert(best_mean >= 0 && best_mean < 256 || !intra);
 
234
        assert(best_mean >= -256 && best_mean < 256);
 
235
        assert(best_count >= 0 && best_count < 7);
 
236
        assert(level < 4 || best_count == 0);
236
237
 
237
238
        /* output the encoding */
238
239
        put_bits(&s->reorder_pb[level],
239
 
            multistage_vlc[1 + best_count][1],
240
 
            multistage_vlc[1 + best_count][0]);
 
240
                 multistage_vlc[1 + best_count][1],
 
241
                 multistage_vlc[1 + best_count][0]);
241
242
        put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
242
 
            mean_vlc[best_mean][0]);
 
243
                 mean_vlc[best_mean][0]);
243
244
 
244
 
        for (i = 0; i < best_count; i++){
245
 
            assert(best_vector[i]>=0 && best_vector[i]<16);
 
245
        for (i = 0; i < best_count; i++) {
 
246
            assert(best_vector[i] >= 0 && best_vector[i] < 16);
246
247
            put_bits(&s->reorder_pb[level], 4, best_vector[i]);
247
248
        }
248
249
 
249
 
        for(y=0; y<h; y++){
250
 
            for(x=0; x<w; x++){
251
 
                decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
252
 
            }
253
 
        }
 
250
        for (y = 0; y < h; y++)
 
251
            for (x = 0; x < w; x++)
 
252
                decoded[x + y * stride] = src[x + y * stride] -
 
253
                                          block[best_count][x + w * y] +
 
254
                                          best_mean;
254
255
    }
255
256
 
256
257
    return best_score;
257
258
}
258
259
 
259
 
 
260
 
static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
261
 
    int width, int height, int src_stride, int stride)
 
260
static int svq1_encode_plane(SVQ1Context *s, int plane,
 
261
                             unsigned char *src_plane,
 
262
                             unsigned char *ref_plane,
 
263
                             unsigned char *decoded_plane,
 
264
                             int width, int height, int src_stride, int stride)
262
265
{
263
266
    int x, y;
264
267
    int i;
265
268
    int block_width, block_height;
266
269
    int level;
267
270
    int threshold[6];
268
 
    uint8_t *src = s->scratchbuf + stride * 16;
269
 
    const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
 
271
    uint8_t *src     = s->scratchbuf + stride * 16;
 
272
    const int lambda = (s->picture.quality * s->picture.quality) >>
 
273
                       (2 * FF_LAMBDA_SHIFT);
270
274
 
271
275
    /* figure out the acceptable level thresholds in advance */
272
276
    threshold[5] = QUALITY_THRESHOLD;
273
277
    for (level = 4; level >= 0; level--)
274
278
        threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
275
279
 
276
 
    block_width = (width + 15) / 16;
 
280
    block_width  = (width  + 15) / 16;
277
281
    block_height = (height + 15) / 16;
278
282
 
279
 
    if(s->picture.pict_type == AV_PICTURE_TYPE_P){
280
 
        s->m.avctx= s->avctx;
281
 
        s->m.current_picture_ptr= &s->m.current_picture;
282
 
        s->m.last_picture_ptr   = &s->m.last_picture;
283
 
        s->m.last_picture.f.data[0] = ref_plane;
284
 
        s->m.linesize=
285
 
        s->m.last_picture.f.linesize[0] =
286
 
        s->m.new_picture.f.linesize[0] =
 
283
    if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
 
284
        s->m.avctx                         = s->avctx;
 
285
        s->m.current_picture_ptr           = &s->m.current_picture;
 
286
        s->m.last_picture_ptr              = &s->m.last_picture;
 
287
        s->m.last_picture.f.data[0]        = ref_plane;
 
288
        s->m.linesize                      =
 
289
        s->m.last_picture.f.linesize[0]    =
 
290
        s->m.new_picture.f.linesize[0]     =
287
291
        s->m.current_picture.f.linesize[0] = stride;
288
 
        s->m.width= width;
289
 
        s->m.height= height;
290
 
        s->m.mb_width= block_width;
291
 
        s->m.mb_height= block_height;
292
 
        s->m.mb_stride= s->m.mb_width+1;
293
 
        s->m.b8_stride= 2*s->m.mb_width+1;
294
 
        s->m.f_code=1;
295
 
        s->m.pict_type= s->picture.pict_type;
296
 
        s->m.me_method= s->avctx->me_method;
297
 
        s->m.me.scene_change_score=0;
298
 
        s->m.flags= s->avctx->flags;
299
 
//        s->m.out_format = FMT_H263;
300
 
//        s->m.unrestricted_mv= 1;
301
 
 
302
 
        s->m.lambda= s->picture.quality;
303
 
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
304
 
        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
305
 
 
306
 
        if(!s->motion_val8[plane]){
307
 
            s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
308
 
            s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
 
292
        s->m.width                         = width;
 
293
        s->m.height                        = height;
 
294
        s->m.mb_width                      = block_width;
 
295
        s->m.mb_height                     = block_height;
 
296
        s->m.mb_stride                     = s->m.mb_width + 1;
 
297
        s->m.b8_stride                     = 2 * s->m.mb_width + 1;
 
298
        s->m.f_code                        = 1;
 
299
        s->m.pict_type                     = s->picture.pict_type;
 
300
        s->m.me_method                     = s->avctx->me_method;
 
301
        s->m.me.scene_change_score         = 0;
 
302
        s->m.flags                         = s->avctx->flags;
 
303
        // s->m.out_format                    = FMT_H263;
 
304
        // s->m.unrestricted_mv               = 1;
 
305
        s->m.lambda                        = s->picture.quality;
 
306
        s->m.qscale                        = s->m.lambda * 139 +
 
307
                                             FF_LAMBDA_SCALE * 64 >>
 
308
                                             FF_LAMBDA_SHIFT + 7;
 
309
        s->m.lambda2                       = s->m.lambda * s->m.lambda +
 
310
                                             FF_LAMBDA_SCALE / 2 >>
 
311
                                             FF_LAMBDA_SHIFT;
 
312
 
 
313
        if (!s->motion_val8[plane]) {
 
314
            s->motion_val8[plane]  = av_mallocz((s->m.b8_stride *
 
315
                                                 block_height * 2 + 2) *
 
316
                                                2 * sizeof(int16_t));
 
317
            s->motion_val16[plane] = av_mallocz((s->m.mb_stride *
 
318
                                                 (block_height + 2) + 1) *
 
319
                                                2 * sizeof(int16_t));
309
320
        }
310
321
 
311
 
        s->m.mb_type= s->mb_type;
 
322
        s->m.mb_type = s->mb_type;
312
323
 
313
 
        //dummies, to avoid segfaults
314
 
        s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
315
 
        s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
316
 
        s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
 
324
        // dummies, to avoid segfaults
 
325
        s->m.current_picture.mb_mean   = (uint8_t *)s->dummy;
 
326
        s->m.current_picture.mb_var    = (uint16_t *)s->dummy;
 
327
        s->m.current_picture.mc_mb_var = (uint16_t *)s->dummy;
317
328
        s->m.current_picture.f.mb_type = s->dummy;
318
329
 
319
330
        s->m.current_picture.f.motion_val[0] = s->motion_val8[plane] + 2;
320
 
        s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
321
 
        s->m.dsp= s->dsp; //move
 
331
        s->m.p_mv_table                      = s->motion_val16[plane] +
 
332
                                               s->m.mb_stride + 1;
 
333
        s->m.dsp                             = s->dsp; // move
322
334
        ff_init_me(&s->m);
323
335
 
324
 
        s->m.me.dia_size= s->avctx->dia_size;
325
 
        s->m.first_slice_line=1;
 
336
        s->m.me.dia_size      = s->avctx->dia_size;
 
337
        s->m.first_slice_line = 1;
326
338
        for (y = 0; y < block_height; y++) {
327
 
            s->m.new_picture.f.data[0] = src - y*16*stride; //ugly
328
 
            s->m.mb_y= y;
 
339
            s->m.new_picture.f.data[0] = src - y * 16 * stride; // ugly
 
340
            s->m.mb_y                  = y;
329
341
 
330
 
            for(i=0; i<16 && i + 16*y<height; i++){
331
 
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
332
 
                for(x=width; x<16*block_width; x++)
333
 
                    src[i*stride+x]= src[i*stride+x-1];
 
342
            for (i = 0; i < 16 && i + 16 * y < height; i++) {
 
343
                memcpy(&src[i * stride], &src_plane[(i + 16 * y) * src_stride],
 
344
                       width);
 
345
                for (x = width; x < 16 * block_width; x++)
 
346
                    src[i * stride + x] = src[i * stride + x - 1];
334
347
            }
335
 
            for(; i<16 && i + 16*y<16*block_height; i++)
336
 
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
 
348
            for (; i < 16 && i + 16 * y < 16 * block_height; i++)
 
349
                memcpy(&src[i * stride], &src[(i - 1) * stride],
 
350
                       16 * block_width);
337
351
 
338
352
            for (x = 0; x < block_width; x++) {
339
 
                s->m.mb_x= x;
 
353
                s->m.mb_x = x;
340
354
                ff_init_block_index(&s->m);
341
355
                ff_update_block_index(&s->m);
342
356
 
343
357
                ff_estimate_p_frame_motion(&s->m, x, y);
344
358
            }
345
 
            s->m.first_slice_line=0;
 
359
            s->m.first_slice_line = 0;
346
360
        }
347
361
 
348
362
        ff_fix_long_p_mvs(&s->m);
349
 
        ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
 
363
        ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code,
 
364
                        CANDIDATE_MB_TYPE_INTER, 0);
350
365
    }
351
366
 
352
 
    s->m.first_slice_line=1;
 
367
    s->m.first_slice_line = 1;
353
368
    for (y = 0; y < block_height; y++) {
354
 
        for(i=0; i<16 && i + 16*y<height; i++){
355
 
            memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
356
 
            for(x=width; x<16*block_width; x++)
357
 
                src[i*stride+x]= src[i*stride+x-1];
 
369
        for (i = 0; i < 16 && i + 16 * y < height; i++) {
 
370
            memcpy(&src[i * stride], &src_plane[(i + 16 * y) * src_stride],
 
371
                   width);
 
372
            for (x = width; x < 16 * block_width; x++)
 
373
                src[i * stride + x] = src[i * stride + x - 1];
358
374
        }
359
 
        for(; i<16 && i + 16*y<16*block_height; i++)
360
 
            memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
 
375
        for (; i < 16 && i + 16 * y < 16 * block_height; i++)
 
376
            memcpy(&src[i * stride], &src[(i - 1) * stride], 16 * block_width);
361
377
 
362
 
        s->m.mb_y= y;
 
378
        s->m.mb_y = y;
363
379
        for (x = 0; x < block_width; x++) {
364
 
            uint8_t reorder_buffer[3][6][7*32];
 
380
            uint8_t reorder_buffer[3][6][7 * 32];
365
381
            int count[3][6];
366
 
            int offset = y * 16 * stride + x * 16;
367
 
            uint8_t *decoded= decoded_plane + offset;
368
 
            uint8_t *ref= ref_plane + offset;
369
 
            int score[4]={0,0,0,0}, best;
370
 
            uint8_t *temp = s->scratchbuf;
 
382
            int offset       = y * 16 * stride + x * 16;
 
383
            uint8_t *decoded = decoded_plane + offset;
 
384
            uint8_t *ref     = ref_plane + offset;
 
385
            int score[4]     = { 0, 0, 0, 0 }, best;
 
386
            uint8_t *temp    = s->scratchbuf;
371
387
 
372
 
            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
 
388
            if (s->pb.buf_end - s->pb.buf -
 
389
                (put_bits_count(&s->pb) >> 3) < 3000) { // FIXME: check size
373
390
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
374
391
                return -1;
375
392
            }
376
393
 
377
 
            s->m.mb_x= x;
 
394
            s->m.mb_x = x;
378
395
            ff_init_block_index(&s->m);
379
396
            ff_update_block_index(&s->m);
380
397
 
381
 
            if(s->picture.pict_type == AV_PICTURE_TYPE_I || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
382
 
                for(i=0; i<6; i++){
383
 
                    init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
384
 
                }
385
 
                if(s->picture.pict_type == AV_PICTURE_TYPE_P){
386
 
                    const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
 
398
            if (s->picture.pict_type == AV_PICTURE_TYPE_I ||
 
399
                (s->m.mb_type[x + y * s->m.mb_stride] &
 
400
                 CANDIDATE_MB_TYPE_INTRA)) {
 
401
                for (i = 0; i < 6; i++)
 
402
                    init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i],
 
403
                                  7 * 32);
 
404
                if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
 
405
                    const uint8_t *vlc = ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
387
406
                    put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
388
 
                    score[0]= vlc[1]*lambda;
 
407
                    score[0] = vlc[1] * lambda;
389
408
                }
390
 
                score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
391
 
                for(i=0; i<6; i++){
392
 
                    count[0][i]= put_bits_count(&s->reorder_pb[i]);
 
409
                score[0] += encode_block(s, src + 16 * x, NULL, temp, stride,
 
410
                                         5, 64, lambda, 1);
 
411
                for (i = 0; i < 6; i++) {
 
412
                    count[0][i] = put_bits_count(&s->reorder_pb[i]);
393
413
                    flush_put_bits(&s->reorder_pb[i]);
394
414
                }
395
 
            }else
396
 
                score[0]= INT_MAX;
397
 
 
398
 
            best=0;
399
 
 
400
 
            if(s->picture.pict_type == AV_PICTURE_TYPE_P){
401
 
                const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
 
415
            } else
 
416
                score[0] = INT_MAX;
 
417
 
 
418
            best = 0;
 
419
 
 
420
            if (s->picture.pict_type == AV_PICTURE_TYPE_P) {
 
421
                const uint8_t *vlc = ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
402
422
                int mx, my, pred_x, pred_y, dxy;
403
423
                int16_t *motion_ptr;
404
424
 
405
 
                motion_ptr= ff_h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
406
 
                if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
407
 
                    for(i=0; i<6; i++)
408
 
                        init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
 
425
                motion_ptr = ff_h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
 
426
                if (s->m.mb_type[x + y * s->m.mb_stride] &
 
427
                    CANDIDATE_MB_TYPE_INTER) {
 
428
                    for (i = 0; i < 6; i++)
 
429
                        init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i],
 
430
                                      7 * 32);
409
431
 
410
432
                    put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
411
433
 
412
 
                    s->m.pb= s->reorder_pb[5];
413
 
                    mx= motion_ptr[0];
414
 
                    my= motion_ptr[1];
415
 
                    assert(mx>=-32 && mx<=31);
416
 
                    assert(my>=-32 && my<=31);
417
 
                    assert(pred_x>=-32 && pred_x<=31);
418
 
                    assert(pred_y>=-32 && pred_y<=31);
 
434
                    s->m.pb = s->reorder_pb[5];
 
435
                    mx      = motion_ptr[0];
 
436
                    my      = motion_ptr[1];
 
437
                    assert(mx     >= -32 && mx     <= 31);
 
438
                    assert(my     >= -32 && my     <= 31);
 
439
                    assert(pred_x >= -32 && pred_x <= 31);
 
440
                    assert(pred_y >= -32 && pred_y <= 31);
419
441
                    ff_h263_encode_motion(&s->m, mx - pred_x, 1);
420
442
                    ff_h263_encode_motion(&s->m, my - pred_y, 1);
421
 
                    s->reorder_pb[5]= s->m.pb;
422
 
                    score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
423
 
 
424
 
                    dxy= (mx&1) + 2*(my&1);
425
 
 
426
 
                    s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
427
 
 
428
 
                    score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
429
 
                    best= score[1] <= score[0];
430
 
 
431
 
                    vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
432
 
                    score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
433
 
                    score[2]+= vlc[1]*lambda;
434
 
                    if(score[2] < score[best] && mx==0 && my==0){
435
 
                        best=2;
 
443
                    s->reorder_pb[5] = s->m.pb;
 
444
                    score[1]        += lambda * put_bits_count(&s->reorder_pb[5]);
 
445
 
 
446
                    dxy = (mx & 1) + 2 * (my & 1);
 
447
 
 
448
                    s->dsp.put_pixels_tab[0][dxy](temp + 16,
 
449
                                                  ref + (mx >> 1) +
 
450
                                                  stride * (my >> 1),
 
451
                                                  stride, 16);
 
452
 
 
453
                    score[1] += encode_block(s, src + 16 * x, temp + 16,
 
454
                                             decoded, stride, 5, 64, lambda, 0);
 
455
                    best      = score[1] <= score[0];
 
456
 
 
457
                    vlc       = ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
 
458
                    score[2]  = s->dsp.sse[0](NULL, src + 16 * x, ref,
 
459
                                              stride, 16);
 
460
                    score[2] += vlc[1] * lambda;
 
461
                    if (score[2] < score[best] && mx == 0 && my == 0) {
 
462
                        best = 2;
436
463
                        s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
437
 
                        for(i=0; i<6; i++){
438
 
                            count[2][i]=0;
439
 
                        }
 
464
                        for (i = 0; i < 6; i++)
 
465
                            count[2][i] = 0;
440
466
                        put_bits(&s->pb, vlc[1], vlc[0]);
441
467
                    }
442
468
                }
443
469
 
444
 
                if(best==1){
445
 
                    for(i=0; i<6; i++){
446
 
                        count[1][i]= put_bits_count(&s->reorder_pb[i]);
 
470
                if (best == 1) {
 
471
                    for (i = 0; i < 6; i++) {
 
472
                        count[1][i] = put_bits_count(&s->reorder_pb[i]);
447
473
                        flush_put_bits(&s->reorder_pb[i]);
448
474
                    }
449
 
                }else{
450
 
                    motion_ptr[0                 ] = motion_ptr[1                 ]=
451
 
                    motion_ptr[2                 ] = motion_ptr[3                 ]=
452
 
                    motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
453
 
                    motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
 
475
                } else {
 
476
                    motion_ptr[0]                      =
 
477
                    motion_ptr[1]                      =
 
478
                    motion_ptr[2]                      =
 
479
                    motion_ptr[3]                      =
 
480
                    motion_ptr[0 + 2 * s->m.b8_stride] =
 
481
                    motion_ptr[1 + 2 * s->m.b8_stride] =
 
482
                    motion_ptr[2 + 2 * s->m.b8_stride] =
 
483
                    motion_ptr[3 + 2 * s->m.b8_stride] = 0;
454
484
                }
455
485
            }
456
486
 
457
487
            s->rd_total += score[best];
458
488
 
459
 
            for(i=5; i>=0; i--){
460
 
                avpriv_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
461
 
            }
462
 
            if(best==0){
 
489
            for (i = 5; i >= 0; i--)
 
490
                avpriv_copy_bits(&s->pb, reorder_buffer[best][i],
 
491
                                 count[best][i]);
 
492
            if (best == 0)
463
493
                s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
464
 
            }
465
494
        }
466
 
        s->m.first_slice_line=0;
 
495
        s->m.first_slice_line = 0;
467
496
    }
468
497
    return 0;
469
498
}
470
499
 
471
500
static av_cold int svq1_encode_init(AVCodecContext *avctx)
472
501
{
473
 
    SVQ1Context * const s = avctx->priv_data;
474
 
 
475
 
    dsputil_init(&s->dsp, avctx);
476
 
    avctx->coded_frame= (AVFrame*)&s->picture;
477
 
 
478
 
    s->frame_width = avctx->width;
 
502
    SVQ1Context *const s = avctx->priv_data;
 
503
 
 
504
    ff_dsputil_init(&s->dsp, avctx);
 
505
    avctx->coded_frame = &s->picture;
 
506
 
 
507
    s->frame_width  = avctx->width;
479
508
    s->frame_height = avctx->height;
480
509
 
481
 
    s->y_block_width = (s->frame_width + 15) / 16;
 
510
    s->y_block_width  = (s->frame_width  + 15) / 16;
482
511
    s->y_block_height = (s->frame_height + 15) / 16;
483
512
 
484
 
    s->c_block_width = (s->frame_width / 4 + 15) / 16;
 
513
    s->c_block_width  = (s->frame_width  / 4 + 15) / 16;
485
514
    s->c_block_height = (s->frame_height / 4 + 15) / 16;
486
515
 
487
 
    s->avctx= avctx;
488
 
    s->m.avctx= avctx;
489
 
    s->m.me.temp      =
490
 
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
491
 
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
492
 
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
493
 
    s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
494
 
    s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
495
 
    ff_h263_encode_init(&s->m); //mv_penalty
 
516
    s->avctx               = avctx;
 
517
    s->m.avctx             = avctx;
 
518
    s->m.picture_structure = PICT_FRAME;
 
519
    s->m.me.temp           =
 
520
    s->m.me.scratchpad     = av_mallocz((avctx->width + 64) *
 
521
                                        2 * 16 * 2 * sizeof(uint8_t));
 
522
    s->m.me.map            = av_mallocz(ME_MAP_SIZE * sizeof(uint32_t));
 
523
    s->m.me.score_map      = av_mallocz(ME_MAP_SIZE * sizeof(uint32_t));
 
524
    s->mb_type             = av_mallocz((s->y_block_width + 1) *
 
525
                                        s->y_block_height * sizeof(int16_t));
 
526
    s->dummy               = av_mallocz((s->y_block_width + 1) *
 
527
                                        s->y_block_height * sizeof(int32_t));
 
528
    ff_h263_encode_init(&s->m); // mv_penalty
496
529
 
497
530
    return 0;
498
531
}
499
532
 
500
 
static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
501
 
    int buf_size, void *data)
 
533
static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
534
                             const AVFrame *pict, int *got_packet)
502
535
{
503
 
    SVQ1Context * const s = avctx->priv_data;
504
 
    AVFrame *pict = data;
505
 
    AVFrame * const p= (AVFrame*)&s->picture;
 
536
    SVQ1Context *const s = avctx->priv_data;
 
537
    AVFrame *const p     = &s->picture;
506
538
    AVFrame temp;
507
 
    int i;
508
 
 
509
 
    if(avctx->pix_fmt != PIX_FMT_YUV410P){
 
539
    int i, ret;
 
540
 
 
541
    if (!pkt->data &&
 
542
        (ret = av_new_packet(pkt, s->y_block_width * s->y_block_height *
 
543
                             MAX_MB_BYTES * 3 + FF_MIN_BUFFER_SIZE) < 0)) {
 
544
        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 
545
        return ret;
 
546
    }
 
547
 
 
548
    if (avctx->pix_fmt != AV_PIX_FMT_YUV410P) {
510
549
        av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
511
550
        return -1;
512
551
    }
513
552
 
514
 
    if(!s->current_picture.data[0]){
515
 
        avctx->get_buffer(avctx, &s->current_picture);
516
 
        avctx->get_buffer(avctx, &s->last_picture);
 
553
    if (!s->current_picture.data[0]) {
 
554
        ff_get_buffer(avctx, &s->current_picture);
 
555
        ff_get_buffer(avctx, &s->last_picture);
517
556
        s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16 * 2);
518
557
    }
519
558
 
520
 
    temp= s->current_picture;
521
 
    s->current_picture= s->last_picture;
522
 
    s->last_picture= temp;
523
 
 
524
 
    init_put_bits(&s->pb, buf, buf_size);
525
 
 
526
 
    *p = *pict;
527
 
    p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
 
559
    temp               = s->current_picture;
 
560
    s->current_picture = s->last_picture;
 
561
    s->last_picture    = temp;
 
562
 
 
563
    init_put_bits(&s->pb, pkt->data, pkt->size);
 
564
 
 
565
    *p           = *pict;
 
566
    p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ?
 
567
                   AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
528
568
    p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
529
569
 
530
570
    svq1_write_header(s, p->pict_type);
531
 
    for(i=0; i<3; i++){
532
 
        if(svq1_encode_plane(s, i,
533
 
            s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
534
 
            s->frame_width / (i?4:1), s->frame_height / (i?4:1),
535
 
            s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
536
 
                return -1;
537
 
    }
 
571
    for (i = 0; i < 3; i++)
 
572
        if (svq1_encode_plane(s, i,
 
573
                              s->picture.data[i],
 
574
                              s->last_picture.data[i],
 
575
                              s->current_picture.data[i],
 
576
                              s->frame_width  / (i ? 4 : 1),
 
577
                              s->frame_height / (i ? 4 : 1),
 
578
                              s->picture.linesize[i],
 
579
                              s->current_picture.linesize[i]) < 0)
 
580
            return -1;
538
581
 
539
 
//    avpriv_align_put_bits(&s->pb);
540
 
    while(put_bits_count(&s->pb) & 31)
 
582
    // avpriv_align_put_bits(&s->pb);
 
583
    while (put_bits_count(&s->pb) & 31)
541
584
        put_bits(&s->pb, 1, 0);
542
585
 
543
586
    flush_put_bits(&s->pb);
544
587
 
545
 
    return put_bits_count(&s->pb) / 8;
 
588
    pkt->size = put_bits_count(&s->pb) / 8;
 
589
    if (p->pict_type == AV_PICTURE_TYPE_I)
 
590
        pkt->flags |= AV_PKT_FLAG_KEY;
 
591
    *got_packet = 1;
 
592
 
 
593
    return 0;
546
594
}
547
595
 
548
596
static av_cold int svq1_encode_end(AVCodecContext *avctx)
549
597
{
550
 
    SVQ1Context * const s = avctx->priv_data;
 
598
    SVQ1Context *const s = avctx->priv_data;
551
599
    int i;
552
600
 
553
 
    av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
 
601
    av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
 
602
           s->rd_total / (double)(avctx->width * avctx->height *
 
603
                                  avctx->frame_number));
554
604
 
555
605
    av_freep(&s->m.me.scratchpad);
556
606
    av_freep(&s->m.me.map);
559
609
    av_freep(&s->dummy);
560
610
    av_freep(&s->scratchbuf);
561
611
 
562
 
    for(i=0; i<3; i++){
 
612
    for (i = 0; i < 3; i++) {
563
613
        av_freep(&s->motion_val8[i]);
564
614
        av_freep(&s->motion_val16[i]);
565
615
    }
567
617
    return 0;
568
618
}
569
619
 
570
 
 
571
620
AVCodec ff_svq1_encoder = {
572
621
    .name           = "svq1",
573
622
    .type           = AVMEDIA_TYPE_VIDEO,
574
 
    .id             = CODEC_ID_SVQ1,
 
623
    .id             = AV_CODEC_ID_SVQ1,
575
624
    .priv_data_size = sizeof(SVQ1Context),
576
625
    .init           = svq1_encode_init,
577
 
    .encode         = svq1_encode_frame,
 
626
    .encode2        = svq1_encode_frame,
578
627
    .close          = svq1_encode_end,
579
 
    .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV410P, PIX_FMT_NONE},
580
 
    .long_name= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
 
628
    .pix_fmts       = (const enum PixelFormat[]) { AV_PIX_FMT_YUV410P,
 
629
                                                   AV_PIX_FMT_NONE },
 
630
    .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
581
631
};