1
/*****************************************************************************
2
* algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
3
*****************************************************************************
4
* Copyright (C) 2011 the VideoLAN team
5
* $Id: 0d9aa74080c266c0a39cb8b7611c2f05bda141ce $
7
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
9
* This program is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* This program is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with this program; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22
*****************************************************************************/
28
#ifdef CAN_COMPILE_MMXEXT
35
#include <vlc_common.h>
37
#include <vlc_picture.h>
38
#include <vlc_filter.h>
40
#include "deinterlace.h" /* filter_sys_t */
41
#include "helpers.h" /* ComposeFrame() */
43
#include "algo_phosphor.h"
45
/*****************************************************************************
47
*****************************************************************************/
50
* Internal helper function: dims (darkens) the given field
51
* of the given picture.
53
* This is used for simulating CRT light output decay in RenderPhosphor().
55
* The strength "1" is recommended. It's a matter of taste,
56
* so it's parametrized.
58
* Note on chroma formats:
59
* - If input is 4:2:2, all planes are processed.
60
* - If input is 4:2:0, only the luma plane is processed, because both fields
61
* have the same chroma. This will distort colours, especially for high
62
* filter strengths, especially for pixels whose U and/or V values are
63
* far away from the origin (which is at 128 in uint8 format).
65
* @param p_dst Input/output picture. Will be modified in-place.
66
* @param i_field Darken which field? 0 = top, 1 = bottom.
67
* @param i_strength Strength of effect: 1, 2 or 3 (division by 2, 4 or 8).
68
* @see RenderPhosphor()
71
static void DarkenField( picture_t *p_dst, const int i_field,
72
const int i_strength )
74
assert( p_dst != NULL );
75
assert( i_field == 0 || i_field == 1 );
76
assert( i_strength >= 1 && i_strength <= 3 );
78
/* Bitwise ANDing with this clears the i_strength highest bits
80
#ifdef CAN_COMPILE_MMXEXT
81
unsigned u_cpu = vlc_CPU();
82
uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
85
const uint8_t remove_high_u8 = 0xFF >> i_strength;
86
const uint64_t remove_high_u64 = remove_high_u8 *
87
INT64_C(0x0101010101010101);
91
For luma, the operation is just a shift + bitwise AND, so we vectorize
92
even in the C version.
94
There is an MMX version, too, because it performs about twice faster.
96
int i_plane = Y_PLANE;
97
uint8_t *p_out, *p_out_end;
98
int w = p_dst->p[i_plane].i_visible_pitch;
99
p_out = p_dst->p[i_plane].p_pixels;
100
p_out_end = p_out + p_dst->p[i_plane].i_pitch
101
* p_dst->p[i_plane].i_visible_lines;
103
/* skip first line for bottom field */
105
p_out += p_dst->p[i_plane].i_pitch;
107
int wm8 = w % 8; /* remainder */
108
int w8 = w - wm8; /* part of width that is divisible by 8 */
109
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
111
uint64_t *po = (uint64_t *)p_out;
114
#ifdef CAN_COMPILE_MMXEXT
115
if( u_cpu & CPU_CAPABILITY_MMXEXT )
117
movq_m2r( i_strength_u64, mm1 );
118
movq_m2r( remove_high_u64, mm2 );
119
for( ; x < w8; x += 8 )
121
movq_m2r( (*po), mm0 );
123
psrlq_r2r( mm1, mm0 );
124
pand_r2r( mm2, mm0 );
126
movq_r2m( mm0, (*po++) );
132
for( ; x < w8; x += 8, ++po )
133
(*po) = ( ((*po) >> i_strength) & remove_high_u64 );
134
#ifdef CAN_COMPILE_MMXEXT
138
/* handle the width remainder */
139
uint8_t *po_temp = (uint8_t *)po;
140
for( ; x < w; ++x, ++po_temp )
141
(*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
144
/* Process chroma if the field chromas are independent.
146
The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
147
The chroma processing is a bit more complicated than luma,
148
and needs MMX for vectorization.
150
if( p_dst->format.i_chroma == VLC_CODEC_I422 ||
151
p_dst->format.i_chroma == VLC_CODEC_J422 )
153
for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
155
if( i_plane == Y_PLANE )
156
continue; /* luma already handled */
158
int w = p_dst->p[i_plane].i_visible_pitch;
159
#ifdef CAN_COMPILE_MMXEXT
160
int wm8 = w % 8; /* remainder */
161
int w8 = w - wm8; /* part of width that is divisible by 8 */
163
p_out = p_dst->p[i_plane].p_pixels;
164
p_out_end = p_out + p_dst->p[i_plane].i_pitch
165
* p_dst->p[i_plane].i_visible_lines;
167
/* skip first line for bottom field */
169
p_out += p_dst->p[i_plane].i_pitch;
171
for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
175
#ifdef CAN_COMPILE_MMXEXT
176
/* See also easy-to-read C version below. */
177
if( u_cpu & CPU_CAPABILITY_MMXEXT )
179
static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
180
movq_m2r( b128, mm5 );
181
movq_m2r( i_strength_u64, mm6 );
182
movq_m2r( remove_high_u64, mm7 );
184
uint64_t *po = (uint64_t *)p_out;
185
for( ; x < w8; x += 8 )
187
movq_m2r( (*po), mm0 );
189
movq_r2r( mm5, mm2 ); /* 128 */
190
movq_r2r( mm0, mm1 ); /* copy of data */
191
psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
192
psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
195
psrlq_r2r( mm6, mm1 );
196
psrlq_r2r( mm6, mm2 );
197
pand_r2r( mm7, mm1 );
198
pand_r2r( mm7, mm2 );
200
/* collect results from pos./neg. parts */
201
psubb_r2r( mm2, mm1 );
202
paddb_r2r( mm5, mm1 );
204
movq_r2m( mm1, (*po++) );
209
/* C version - handle the width remainder
210
(or everything if no MMX) */
212
for( ; x < w; ++x, ++po )
213
(*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
215
} /* for i_plane... */
218
#ifdef CAN_COMPILE_MMXEXT
219
if( u_cpu & CPU_CAPABILITY_MMXEXT )
224
/*****************************************************************************
226
*****************************************************************************/
228
/* See header for function doc. */
229
int RenderPhosphor( filter_t *p_filter,
231
int i_order, int i_field )
233
assert( p_filter != NULL );
234
assert( p_dst != NULL );
235
assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
236
assert( i_field == 0 || i_field == 1 );
238
filter_sys_t *p_sys = p_filter->p_sys;
240
/* Last two input frames */
241
picture_t *p_in = p_sys->pp_history[HISTORY_SIZE-1];
242
picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
244
/* Use the same input picture as "old" at the first frame after startup */
248
/* If the history mechanism has failed, we can't do anything. */
252
assert( p_old != NULL );
253
assert( p_in != NULL );
255
/* Decide sources for top & bottom fields of output. */
256
picture_t *p_in_top = p_in;
257
picture_t *p_in_bottom = p_in;
258
/* For the first output field this frame,
259
grab "old" field from previous frame. */
262
if( i_field == 0 ) /* rendering top field */
264
else /* i_field == 1, rendering bottom field */
268
compose_chroma_t cc = CC_ALTLINE; /* initialize to prevent compiler warning */
269
switch( p_sys->phosphor.i_chroma_for_420 )
277
else /* i_field == 1 */
278
cc = CC_SOURCE_BOTTOM;
287
/* The above are the only possibilities, if there are no bugs. */
292
ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
294
/* Simulate phosphor light output decay for the old field.
296
The dimmer can also be switched off in the configuration, but that is
297
more of a technical curiosity or an educational toy for advanced users
298
than a useful deinterlacer mode (although it does make telecined
299
material look slightly better than without any filtering).
301
In most use cases the dimmer is used.
303
if( p_sys->phosphor.i_dimmer_strength > 0 )
304
DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );