~siretart/x264/trunk

« back to all changes in this revision

Viewing changes to encoder/lookahead.c

  • Committer: Fiona Glaser
  • Author(s): Steven Walters
  • Date: 2009-09-02 04:06:20 UTC
  • Revision ID: git-v1:6940dcaef140d8a0c43c9a62db158e9d71a8fdeb
Threaded lookahead
Move lookahead into a separate thread, set to higher priority than the other threads, for optimal performance.
Reduces the amount that lookahead bottlenecks encoding, greatly increasing performance with lookahead-intensive settings (e.g. b-adapt 2) on many-core CPUs.
Buffer size can be controlled with --sync-lookahead, which defaults to auto (threads+bframes buffer size).
Note that this buffer is separate from the rc-lookahead value.
Note also that this does not split lookahead itself into multiple threads yet; this may be added in the future.
Additionally, split frames into "fdec" and "fenc" frame types and keep the two separate.
This split greatly reduces memory usage, which helps compensate for the larger lookahead size.
Extremely special thanks to Michael Kazmier and Alex Giladi of Avail Media, the original authors of this patch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 * lookahead.c: Lookahead slicetype decisions for x264
 
3
 *****************************************************************************
 
4
 * Lookahead.c and associated modifications:
 
5
 *     Copyright (C) 2008 Avail Media
 
6
 *
 
7
 * Authors: Michael Kazmier <mkazmier@availmedia.com>
 
8
 *          Alex Giladi <agiladi@availmedia.com>
 
9
 *          Steven Walters <kemuri9@gmail.com>
 
10
 *
 
11
 * This program is free software; you can redistribute it and/or modify
 
12
 * it under the terms of the GNU General Public License as published by
 
13
 * the Free Software Foundation; either version 2 of the License, or
 
14
 * (at your option) any later version.
 
15
 *
 
16
 * This program is distributed in the hope that it will be useful,
 
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
19
 * GNU General Public License for more details.
 
20
 *
 
21
 * You should have received a copy of the GNU General Public License
 
22
 * along with this program; if not, write to the Free Software
 
23
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 
24
 *****************************************************************************/
 
25
 
 
26
/* LOOKAHEAD (threaded and non-threaded mode)
 
27
 *
 
28
 * Lookahead types:
 
29
 *     [1] Slice type / scene cut;
 
30
 *
 
31
 * In non-threaded mode, we run the existing slicetype decision code as it was.
 
32
 * In threaded mode, we run in a separate thread, that lives between the calls
 
33
 * to x264_encoder_open() and x264_encoder_close(), and performs lookahead for
 
34
 * the number of frames specified in rc_lookahead.  Recommended setting is
 
35
 * # of bframes + # of threads.
 
36
 */
 
37
#include "common/common.h"
 
38
#include "common/cpu.h"
 
39
#include "analyse.h"
 
40
 
 
41
static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
 
42
{
 
43
    int i = count;
 
44
    while( i-- )
 
45
    {
 
46
        assert( dst->i_size != dst->i_max_size );
 
47
        assert( src->i_size );
 
48
        dst->list[ dst->i_size++ ] = x264_frame_shift( src->list );
 
49
        src->i_size--;
 
50
    }
 
51
    if( count )
 
52
    {
 
53
        x264_pthread_cond_broadcast( &dst->cv_fill );
 
54
        x264_pthread_cond_broadcast( &src->cv_empty );
 
55
    }
 
56
}
 
57
 
 
58
static void x264_lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
 
59
{
 
60
    if( h->lookahead->last_nonb )
 
61
        x264_frame_push_unused( h, h->lookahead->last_nonb );
 
62
    h->lookahead->last_nonb = new_nonb;
 
63
    new_nonb->i_reference_count++;
 
64
}
 
65
 
 
66
#ifdef HAVE_PTHREAD
 
67
static void x264_lookahead_slicetype_decide( x264_t *h )
 
68
{
 
69
    int bframes = 0;
 
70
    x264_stack_align( x264_slicetype_decide, h );
 
71
 
 
72
    while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) )
 
73
        bframes++;
 
74
    x264_lookahead_update_last_nonb( h, h->lookahead->next.list[bframes] );
 
75
 
 
76
    x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
 
77
    while( h->lookahead->ofbuf.i_size == h->lookahead->ofbuf.i_max_size )
 
78
        x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex );
 
79
 
 
80
    x264_pthread_mutex_lock( &h->lookahead->next.mutex );
 
81
    x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, bframes + 1 );
 
82
    x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
 
83
 
 
84
    /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
 
85
    if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
 
86
        x264_stack_align( x264_slicetype_analyse, h, 1 );
 
87
 
 
88
    x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
 
89
}
 
90
 
 
91
static void x264_lookahead_thread( x264_t *h )
 
92
{
 
93
    int shift;
 
94
#ifdef HAVE_MMX
 
95
    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
 
96
        x264_cpu_mask_misalign_sse();
 
97
#endif
 
98
    h->lookahead->b_thread_active = 1;
 
99
    while( !h->lookahead->b_exit_thread )
 
100
    {
 
101
        x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
 
102
        x264_pthread_mutex_lock( &h->lookahead->next.mutex );
 
103
        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
 
104
        x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
 
105
        x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
 
106
        if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length )
 
107
        {
 
108
            while( !h->lookahead->ifbuf.i_size && !h->lookahead->b_exit_thread )
 
109
                x264_pthread_cond_wait( &h->lookahead->ifbuf.cv_fill, &h->lookahead->ifbuf.mutex );
 
110
            x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
 
111
        }
 
112
        else
 
113
        {
 
114
            x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
 
115
            x264_lookahead_slicetype_decide( h );
 
116
        }
 
117
    }   /* end of input frames */
 
118
    x264_pthread_mutex_lock( &h->lookahead->next.mutex );
 
119
    x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
 
120
    x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size );
 
121
    x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
 
122
    x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
 
123
    while( h->lookahead->next.i_size )
 
124
        x264_lookahead_slicetype_decide( h );
 
125
    h->lookahead->b_thread_active = 0;
 
126
}
 
127
#endif
 
128
 
 
129
int x264_lookahead_init( x264_t *h, int i_slicetype_length )
 
130
{
 
131
    x264_lookahead_t *look;
 
132
    CHECKED_MALLOCZERO( look, sizeof(x264_lookahead_t) );
 
133
    int i;
 
134
    for( i = 0; i < h->param.i_threads; i++ )
 
135
        h->thread[i]->lookahead = look;
 
136
 
 
137
    look->i_last_idr = - h->param.i_keyint_max;
 
138
    look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
 
139
                               && !h->param.rc.b_stat_read;
 
140
    look->i_slicetype_length = i_slicetype_length;
 
141
 
 
142
    /* init frame lists */
 
143
    if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
 
144
        x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
 
145
        x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
 
146
        goto fail;
 
147
 
 
148
    if( !h->param.i_sync_lookahead )
 
149
        return 0;
 
150
 
 
151
    x264_t *look_h = h->thread[h->param.i_threads];
 
152
    *look_h = *h;
 
153
    if( x264_macroblock_cache_init( look_h ) )
 
154
        goto fail;
 
155
 
 
156
    UNUSED x264_pthread_attr_t attr;
 
157
    if( x264_pthread_attr_init( &attr ) )
 
158
        goto fail;
 
159
#if defined(USE_REAL_PTHREAD) && !defined(SYS_LINUX)
 
160
    int offset = sched_get_priority_max( SCHED_OTHER );
 
161
    x264_log( h, X264_LOG_DEBUG, "setting priority of lookahead thread to %d\n", offset );
 
162
    struct sched_param sp;
 
163
    pthread_attr_getschedparam( &attr, &sp );
 
164
    sp.sched_priority = offset;
 
165
    pthread_attr_setschedparam( &attr, &sp );
 
166
#endif
 
167
 
 
168
    if( x264_pthread_create( &look_h->thread_handle, &attr, (void *)x264_lookahead_thread, look_h ) )
 
169
        goto fail;
 
170
 
 
171
    x264_pthread_attr_destroy( &attr );
 
172
 
 
173
    return 0;
 
174
fail:
 
175
    x264_free( look );
 
176
    return -1;
 
177
}
 
178
 
 
179
void x264_lookahead_delete( x264_t *h )
 
180
{
 
181
    if( h->param.i_sync_lookahead )
 
182
    {
 
183
        h->lookahead->b_exit_thread = 1;
 
184
        x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
 
185
        x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
 
186
        x264_macroblock_cache_end( h->thread[h->param.i_threads] );
 
187
        x264_free( h->thread[h->param.i_threads] );
 
188
    }
 
189
    x264_synch_frame_list_delete( &h->lookahead->ifbuf );
 
190
    x264_synch_frame_list_delete( &h->lookahead->next );
 
191
    x264_synch_frame_list_delete( &h->lookahead->ofbuf );
 
192
    if( h->lookahead->last_nonb )
 
193
        x264_frame_delete( h->lookahead->last_nonb );
 
194
    x264_free( h->lookahead );
 
195
}
 
196
 
 
197
void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
 
198
{
 
199
    if( h->param.i_sync_lookahead )
 
200
        x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
 
201
    else
 
202
        x264_synch_frame_list_push( &h->lookahead->next, frame );
 
203
}
 
204
 
 
205
int x264_lookahead_is_empty( x264_t *h )
 
206
{
 
207
    return !x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) &&
 
208
           !x264_synch_frame_list_get_size( &h->lookahead->next );
 
209
}
 
210
 
 
211
static void x264_lookahead_encoder_shift( x264_t *h )
 
212
{
 
213
    int bframes  = 0;
 
214
    int i_frames = 0;
 
215
 
 
216
    while( h->lookahead->ofbuf.list[i_frames] )
 
217
    {
 
218
        while( h->lookahead->b_thread_active && !h->lookahead->ofbuf.i_size )
 
219
            x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
 
220
        if( IS_X264_TYPE_B( h->lookahead->ofbuf.list[bframes]->i_type ) )
 
221
            bframes++;
 
222
        else
 
223
            break;
 
224
        i_frames++;
 
225
    }
 
226
    if( h->lookahead->ofbuf.list[i_frames] )
 
227
    {
 
228
        x264_frame_push( h->frames.current, x264_frame_shift( &h->lookahead->ofbuf.list[bframes] ) );
 
229
        h->lookahead->ofbuf.i_size--;
 
230
        if( h->param.b_bframe_pyramid && bframes > 1 )
 
231
        {
 
232
            x264_frame_t *mid = x264_frame_shift( &h->lookahead->ofbuf.list[bframes/2] );
 
233
            h->lookahead->ofbuf.i_size--;
 
234
            mid->i_type = X264_TYPE_BREF;
 
235
            x264_frame_push( h->frames.current, mid );
 
236
            bframes--;
 
237
        }
 
238
        while( bframes-- )
 
239
        {
 
240
            x264_frame_push( h->frames.current, x264_frame_shift( h->lookahead->ofbuf.list ) );
 
241
            h->lookahead->ofbuf.i_size--;
 
242
        }
 
243
        x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_empty );
 
244
    }
 
245
}
 
246
 
 
247
void x264_lookahead_get_frames( x264_t *h )
 
248
{
 
249
    if( h->param.i_sync_lookahead )
 
250
    {   /* We have a lookahead thread, so get frames from there */
 
251
        x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
 
252
        while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active )
 
253
            x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
 
254
        x264_lookahead_encoder_shift( h );
 
255
        x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
 
256
    }
 
257
    else
 
258
    {   /* We are not running a lookahead thread, so perform all the slicetype decide on the fly */
 
259
 
 
260
        if( h->frames.current[0] || !h->lookahead->next.i_size )
 
261
            return;
 
262
 
 
263
        x264_stack_align( x264_slicetype_decide, h );
 
264
 
 
265
        int bframes=0;
 
266
        while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) )
 
267
            bframes++;
 
268
 
 
269
        x264_lookahead_update_last_nonb( h, h->lookahead->next.list[bframes] );
 
270
        x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, bframes + 1 );
 
271
 
 
272
        /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
 
273
        if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
 
274
            x264_stack_align( x264_slicetype_analyse, h, 1 );
 
275
 
 
276
        x264_lookahead_encoder_shift( h );
 
277
    }
 
278
}