1
by Jens Kuske
First release |
1 |
/*
|
2 |
* Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
|
|
3 |
*
|
|
4 |
* This library is free software; you can redistribute it and/or
|
|
5 |
* modify it under the terms of the GNU Lesser General Public
|
|
6 |
* License as published by the Free Software Foundation; either
|
|
7 |
* version 2.1 of the License, or (at your option) any later version.
|
|
8 |
*
|
|
9 |
* This library is distributed in the hope that it will be useful,
|
|
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
* Lesser General Public License for more details.
|
|
13 |
*
|
|
14 |
* You should have received a copy of the GNU Lesser General Public
|
|
15 |
* License along with this library; if not, write to the Free Software
|
|
16 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
17 |
*
|
|
18 |
*/
|
|
19 |
||
20 |
#include <stdlib.h> |
|
21 |
#include <string.h> |
|
22 |
#include <unistd.h> |
|
71
by Jens Kuske
Use libcedrus |
23 |
#include <cedrus/cedrus.h> |
24 |
#include <cedrus/cedrus_regs.h> |
|
1
by Jens Kuske
First release |
25 |
#include "vdpau_private.h" |
26 |
||
27 |
static int find_startcode(const uint8_t *data, int len, int start) |
|
28 |
{
|
|
29 |
int pos, zeros = 0; |
|
30 |
for (pos = start; pos < len; pos++) |
|
31 |
{
|
|
32 |
if (data[pos] == 0x00) |
|
33 |
zeros++; |
|
34 |
else if (data[pos] == 0x01 && zeros >= 2) |
|
35 |
return pos - 2; |
|
36 |
else
|
|
37 |
zeros = 0; |
|
38 |
}
|
|
39 |
||
40 |
return -1; |
|
41 |
}
|
|
42 |
||
43 |
static uint32_t get_u(void *regs, int num) |
|
44 |
{
|
|
45 |
writel(0x00000002 | (num << 8), regs + VE_H264_TRIGGER); |
|
46 |
||
7
by Jens Kuske
Fix waiting condition for basic bits read |
47 |
while (readl(regs + VE_H264_STATUS) & (1 << 8)); |
1
by Jens Kuske
First release |
48 |
|
49 |
return readl(regs + VE_H264_BASIC_BITS); |
|
50 |
}
|
|
51 |
||
52 |
static uint32_t get_ue(void *regs) |
|
53 |
{
|
|
54 |
writel(0x00000005, regs + VE_H264_TRIGGER); |
|
55 |
||
7
by Jens Kuske
Fix waiting condition for basic bits read |
56 |
while (readl(regs + VE_H264_STATUS) & (1 << 8)); |
1
by Jens Kuske
First release |
57 |
|
58 |
return readl(regs + VE_H264_BASIC_BITS); |
|
59 |
}
|
|
60 |
||
61 |
static int32_t get_se(void *regs) |
|
62 |
{
|
|
63 |
writel(0x00000004, regs + VE_H264_TRIGGER); |
|
64 |
||
7
by Jens Kuske
Fix waiting condition for basic bits read |
65 |
while (readl(regs + VE_H264_STATUS) & (1 << 8)); |
1
by Jens Kuske
First release |
66 |
|
67 |
return readl(regs + VE_H264_BASIC_BITS); |
|
68 |
}
|
|
69 |
||
39
by Jens Kuske
Add support for interlaced h264 |
70 |
#define PIC_TOP_FIELD 0x1
|
71 |
#define PIC_BOTTOM_FIELD 0x2
|
|
72 |
#define PIC_FRAME 0x3
|
|
1
by Jens Kuske
First release |
73 |
|
74 |
typedef struct |
|
75 |
{
|
|
76 |
video_surface_ctx_t *surface; |
|
77 |
uint16_t top_pic_order_cnt; |
|
78 |
uint16_t bottom_pic_order_cnt; |
|
79 |
uint16_t frame_idx; |
|
39
by Jens Kuske
Add support for interlaced h264 |
80 |
uint8_t field; |
81 |
} h264_picture_t; |
|
1
by Jens Kuske
First release |
82 |
|
83 |
||
84 |
#define SLICE_TYPE_P 0
|
|
85 |
#define SLICE_TYPE_B 1
|
|
86 |
#define SLICE_TYPE_I 2
|
|
87 |
#define SLICE_TYPE_SP 3
|
|
88 |
#define SLICE_TYPE_SI 4
|
|
89 |
||
90 |
typedef struct |
|
91 |
{
|
|
92 |
uint8_t nal_unit_type; |
|
93 |
uint16_t first_mb_in_slice; |
|
94 |
uint8_t slice_type; |
|
95 |
uint8_t pic_parameter_set_id; |
|
96 |
uint16_t frame_num; |
|
97 |
uint8_t field_pic_flag; |
|
98 |
uint8_t bottom_field_flag; |
|
99 |
uint16_t idr_pic_id; |
|
100 |
uint32_t pic_order_cnt_lsb; |
|
101 |
int32_t delta_pic_order_cnt_bottom; |
|
102 |
int32_t delta_pic_order_cnt[2]; |
|
103 |
uint8_t redundant_pic_cnt; |
|
104 |
uint8_t direct_spatial_mv_pred_flag; |
|
105 |
uint8_t num_ref_idx_active_override_flag; |
|
106 |
uint8_t num_ref_idx_l0_active_minus1; |
|
107 |
uint8_t num_ref_idx_l1_active_minus1; |
|
108 |
uint8_t cabac_init_idc; |
|
109 |
int8_t slice_qp_delta; |
|
110 |
uint8_t sp_for_switch_flag; |
|
111 |
int8_t slice_qs_delta; |
|
112 |
uint8_t disable_deblocking_filter_idc; |
|
113 |
int8_t slice_alpha_c0_offset_div2; |
|
114 |
int8_t slice_beta_offset_div2; |
|
115 |
||
116 |
uint8_t luma_log2_weight_denom; |
|
117 |
uint8_t chroma_log2_weight_denom; |
|
118 |
int8_t luma_weight_l0[32]; |
|
119 |
int8_t luma_offset_l0[32]; |
|
120 |
int8_t chroma_weight_l0[32][2]; |
|
121 |
int8_t chroma_offset_l0[32][2]; |
|
122 |
int8_t luma_weight_l1[32]; |
|
123 |
int8_t luma_offset_l1[32]; |
|
124 |
int8_t chroma_weight_l1[32][2]; |
|
125 |
int8_t chroma_offset_l1[32][2]; |
|
126 |
||
39
by Jens Kuske
Add support for interlaced h264 |
127 |
h264_picture_t RefPicList0[32]; |
128 |
h264_picture_t RefPicList1[32]; |
|
1
by Jens Kuske
First release |
129 |
} h264_header_t; |
130 |
||
131 |
typedef struct |
|
132 |
{
|
|
133 |
void *regs; |
|
134 |
h264_header_t header; |
|
135 |
VdpPictureInfoH264 const *info; |
|
136 |
video_surface_ctx_t *output; |
|
137 |
uint8_t picture_width_in_mbs_minus1; |
|
138 |
uint8_t picture_height_in_mbs_minus1; |
|
13
by Jens Kuske
Add support for custom h264 scaling lists |
139 |
uint8_t default_scaling_lists; |
39
by Jens Kuske
Add support for interlaced h264 |
140 |
int video_extra_data_len; |
1
by Jens Kuske
First release |
141 |
|
142 |
int ref_count; |
|
39
by Jens Kuske
Add support for interlaced h264 |
143 |
h264_picture_t ref_pic[16]; |
1
by Jens Kuske
First release |
144 |
} h264_context_t; |
145 |
||
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
146 |
typedef struct |
147 |
{
|
|
71
by Jens Kuske
Use libcedrus |
148 |
cedrus_mem_t *extra_data; |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
149 |
} h264_private_t; |
150 |
||
151 |
static void h264_private_free(decoder_ctx_t *decoder) |
|
152 |
{
|
|
153 |
h264_private_t *decoder_p = (h264_private_t *)decoder->private; |
|
71
by Jens Kuske
Use libcedrus |
154 |
cedrus_mem_free(decoder_p->extra_data); |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
155 |
free(decoder_p); |
156 |
}
|
|
157 |
||
40
by Jens Kuske
Add support for mbaff h264 |
158 |
#define PIC_TYPE_FRAME 0x0
|
159 |
#define PIC_TYPE_FIELD 0x1
|
|
160 |
#define PIC_TYPE_MBAFF 0x2
|
|
161 |
||
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
162 |
typedef struct |
163 |
{
|
|
71
by Jens Kuske
Use libcedrus |
164 |
cedrus_mem_t *extra_data; |
40
by Jens Kuske
Add support for mbaff h264 |
165 |
uint8_t pos; |
166 |
uint8_t pic_type; |
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
167 |
} h264_video_private_t; |
168 |
||
169 |
static void h264_video_private_free(video_surface_ctx_t *surface) |
|
170 |
{
|
|
171 |
h264_video_private_t *surface_p = (h264_video_private_t *)surface->decoder_private; |
|
71
by Jens Kuske
Use libcedrus |
172 |
cedrus_mem_free(surface_p->extra_data); |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
173 |
free(surface_p); |
174 |
}
|
|
175 |
||
40
by Jens Kuske
Add support for mbaff h264 |
176 |
static h264_video_private_t *get_surface_priv(h264_context_t *c, video_surface_ctx_t *surface) |
177 |
{
|
|
178 |
h264_video_private_t *surface_p = surface->decoder_private; |
|
179 |
||
180 |
if (!surface_p) |
|
181 |
{
|
|
182 |
surface_p = calloc(1, sizeof(h264_video_private_t)); |
|
183 |
if (!surface_p) |
|
184 |
return NULL; |
|
185 |
||
71
by Jens Kuske
Use libcedrus |
186 |
surface_p->extra_data = cedrus_mem_alloc(surface->device->cedrus, c->video_extra_data_len * 2); |
40
by Jens Kuske
Add support for mbaff h264 |
187 |
if (!surface_p->extra_data) |
188 |
{
|
|
189 |
free(surface_p); |
|
190 |
return NULL; |
|
191 |
}
|
|
192 |
||
193 |
surface->decoder_private = surface_p; |
|
194 |
surface->decoder_private_free = h264_video_private_free; |
|
195 |
}
|
|
196 |
||
197 |
return surface_p; |
|
198 |
}
|
|
199 |
||
1
by Jens Kuske
First release |
200 |
static void ref_pic_list_modification(h264_context_t *c) |
201 |
{
|
|
202 |
h264_header_t *h = &c->header; |
|
203 |
VdpPictureInfoH264 const *info = c->info; |
|
204 |
const int MaxFrameNum = 1 << (info->log2_max_frame_num_minus4 + 4); |
|
205 |
const int MaxPicNum = (info->field_pic_flag) ? 2 * MaxFrameNum : MaxFrameNum; |
|
206 |
||
207 |
if (h->slice_type != SLICE_TYPE_I && h->slice_type != SLICE_TYPE_SI) |
|
208 |
{
|
|
209 |
int ref_pic_list_modification_flag_l0 = get_u(c->regs, 1); |
|
210 |
if (ref_pic_list_modification_flag_l0) |
|
211 |
{
|
|
212 |
unsigned int modification_of_pic_nums_idc; |
|
213 |
int refIdxL0 = 0; |
|
214 |
unsigned int picNumL0 = info->frame_num; |
|
39
by Jens Kuske
Add support for interlaced h264 |
215 |
if (h->field_pic_flag) |
216 |
picNumL0 = picNumL0 * 2 + 1; |
|
217 |
||
1
by Jens Kuske
First release |
218 |
do
|
219 |
{
|
|
220 |
modification_of_pic_nums_idc = get_ue(c->regs); |
|
221 |
if (modification_of_pic_nums_idc == 0 || modification_of_pic_nums_idc == 1) |
|
222 |
{
|
|
223 |
unsigned int abs_diff_pic_num_minus1 = get_ue(c->regs); |
|
224 |
||
225 |
if (modification_of_pic_nums_idc == 0) |
|
226 |
picNumL0 -= (abs_diff_pic_num_minus1 + 1); |
|
227 |
else
|
|
228 |
picNumL0 += (abs_diff_pic_num_minus1 + 1); |
|
229 |
||
230 |
picNumL0 &= (MaxPicNum - 1); |
|
231 |
||
39
by Jens Kuske
Add support for interlaced h264 |
232 |
int frame_num = picNumL0; |
233 |
int field = PIC_FRAME; |
|
234 |
||
235 |
if (h->field_pic_flag) |
|
236 |
{
|
|
237 |
field = h->bottom_field_flag ? PIC_BOTTOM_FIELD : PIC_TOP_FIELD; |
|
238 |
if (!(frame_num & 1)) |
|
239 |
field ^= PIC_FRAME; |
|
240 |
||
241 |
frame_num /= 2; |
|
242 |
}
|
|
243 |
||
1
by Jens Kuske
First release |
244 |
int i, j; |
245 |
for (i = 0; i < c->ref_count; i++) |
|
246 |
{
|
|
39
by Jens Kuske
Add support for interlaced h264 |
247 |
if (c->ref_pic[i].frame_idx == frame_num) |
1
by Jens Kuske
First release |
248 |
break; |
249 |
}
|
|
250 |
||
251 |
for (j = h->num_ref_idx_l0_active_minus1 + 1; j > refIdxL0; j--) |
|
252 |
h->RefPicList0[j] = h->RefPicList0[j - 1]; |
|
39
by Jens Kuske
Add support for interlaced h264 |
253 |
h->RefPicList0[refIdxL0] = c->ref_pic[i]; |
254 |
if (h->field_pic_flag) |
|
255 |
h->RefPicList0[refIdxL0].field = field; |
|
256 |
i = ++refIdxL0; |
|
1
by Jens Kuske
First release |
257 |
for (j = refIdxL0; j <= h->num_ref_idx_l0_active_minus1 + 1; j++) |
39
by Jens Kuske
Add support for interlaced h264 |
258 |
if (h->RefPicList0[j].frame_idx != frame_num || h->RefPicList0[j].field != field) |
1
by Jens Kuske
First release |
259 |
h->RefPicList0[i++] = h->RefPicList0[j]; |
260 |
}
|
|
261 |
else if (modification_of_pic_nums_idc == 2) |
|
262 |
{
|
|
263 |
VDPAU_DBG("NOT IMPLEMENTED: modification_of_pic_nums_idc == 2"); |
|
264 |
unsigned int long_term_pic_num = get_ue(c->regs); |
|
265 |
}
|
|
266 |
} while (modification_of_pic_nums_idc != 3); |
|
267 |
}
|
|
268 |
}
|
|
269 |
||
270 |
if (h->slice_type == SLICE_TYPE_B) |
|
271 |
{
|
|
272 |
int ref_pic_list_modification_flag_l1 = get_u(c->regs, 1); |
|
273 |
if (ref_pic_list_modification_flag_l1) |
|
274 |
{
|
|
275 |
VDPAU_DBG("NOT IMPLEMENTED: ref_pic_list_modification_flag_l1 == 1"); |
|
276 |
unsigned int modification_of_pic_nums_idc; |
|
277 |
do
|
|
278 |
{
|
|
279 |
modification_of_pic_nums_idc = get_ue(c->regs); |
|
280 |
if (modification_of_pic_nums_idc == 0 || modification_of_pic_nums_idc == 1) |
|
281 |
{
|
|
282 |
unsigned int abs_diff_pic_num_minus1 = get_ue(c->regs); |
|
283 |
}
|
|
284 |
else if (modification_of_pic_nums_idc == 2) |
|
285 |
{
|
|
286 |
unsigned int long_term_pic_num = get_ue(c->regs); |
|
287 |
}
|
|
288 |
} while (modification_of_pic_nums_idc != 3); |
|
289 |
}
|
|
290 |
}
|
|
291 |
}
|
|
292 |
||
293 |
static void pred_weight_table(h264_context_t *c) |
|
294 |
{
|
|
295 |
h264_header_t *h = &c->header; |
|
296 |
int i, j, ChromaArrayType = 1; |
|
297 |
||
298 |
h->luma_log2_weight_denom = get_ue(c->regs); |
|
299 |
if (ChromaArrayType != 0) |
|
300 |
h->chroma_log2_weight_denom = get_ue(c->regs); |
|
301 |
||
302 |
for (i = 0; i < 32; i++) |
|
303 |
{
|
|
304 |
h->luma_weight_l0[i] = (1 << h->luma_log2_weight_denom); |
|
305 |
h->luma_weight_l1[i] = (1 << h->luma_log2_weight_denom); |
|
306 |
h->chroma_weight_l0[i][0] = (1 << h->chroma_log2_weight_denom); |
|
307 |
h->chroma_weight_l1[i][0] = (1 << h->chroma_log2_weight_denom); |
|
308 |
h->chroma_weight_l0[i][1] = (1 << h->chroma_log2_weight_denom); |
|
309 |
h->chroma_weight_l1[i][1] = (1 << h->chroma_log2_weight_denom); |
|
310 |
}
|
|
311 |
||
312 |
for (i = 0; i <= h->num_ref_idx_l0_active_minus1; i++) |
|
313 |
{
|
|
314 |
int luma_weight_l0_flag = get_u(c->regs, 1); |
|
315 |
if (luma_weight_l0_flag) |
|
316 |
{
|
|
317 |
h->luma_weight_l0[i] = get_se(c->regs); |
|
318 |
h->luma_offset_l0[i] = get_se(c->regs); |
|
319 |
}
|
|
320 |
if (ChromaArrayType != 0) |
|
321 |
{
|
|
322 |
int chroma_weight_l0_flag = get_u(c->regs, 1); |
|
323 |
if (chroma_weight_l0_flag) |
|
324 |
for (j = 0; j < 2; j++) |
|
325 |
{
|
|
326 |
h->chroma_weight_l0[i][j] = get_se(c->regs); |
|
327 |
h->chroma_offset_l0[i][j] = get_se(c->regs); |
|
328 |
}
|
|
329 |
}
|
|
330 |
}
|
|
331 |
||
332 |
if (h->slice_type == SLICE_TYPE_B) |
|
333 |
for (i = 0; i <= h->num_ref_idx_l1_active_minus1; i++) |
|
334 |
{
|
|
335 |
int luma_weight_l1_flag = get_u(c->regs, 1); |
|
336 |
if (luma_weight_l1_flag) |
|
337 |
{
|
|
338 |
h->luma_weight_l1[i] = get_se(c->regs); |
|
339 |
h->luma_offset_l1[i] = get_se(c->regs); |
|
340 |
}
|
|
341 |
if (ChromaArrayType != 0) |
|
342 |
{
|
|
343 |
int chroma_weight_l1_flag = get_u(c->regs, 1); |
|
344 |
if (chroma_weight_l1_flag) |
|
345 |
for (j = 0; j < 2; j++) |
|
346 |
{
|
|
347 |
h->chroma_weight_l1[i][j] = get_se(c->regs); |
|
348 |
h->chroma_offset_l1[i][j] = get_se(c->regs); |
|
349 |
}
|
|
350 |
}
|
|
351 |
}
|
|
352 |
||
353 |
writel(((h->chroma_log2_weight_denom & 0xf) << 4) |
|
354 |
| ((h->luma_log2_weight_denom & 0xf) << 0) |
|
355 |
, c->regs + VE_H264_PRED_WEIGHT); |
|
356 |
||
357 |
writel(VE_SRAM_H264_PRED_WEIGHT_TABLE, c->regs + VE_H264_RAM_WRITE_PTR); |
|
358 |
for (i = 0; i < 32; i++) |
|
359 |
writel(((h->luma_offset_l0[i] & 0x1ff) << 16) |
|
360 |
| (h->luma_weight_l0[i] & 0xff), c->regs + VE_H264_RAM_WRITE_DATA); |
|
361 |
for (i = 0; i < 32; i++) |
|
362 |
for (j = 0; j < 2; j++) |
|
363 |
writel(((h->chroma_offset_l0[i][j] & 0x1ff) << 16) |
|
364 |
| (h->chroma_weight_l0[i][j] & 0xff), c->regs + VE_H264_RAM_WRITE_DATA); |
|
365 |
for (i = 0; i < 32; i++) |
|
366 |
writel(((h->luma_offset_l1[i] & 0x1ff) << 16) |
|
367 |
| (h->luma_weight_l1[i] & 0xff), c->regs + VE_H264_RAM_WRITE_DATA); |
|
368 |
for (i = 0; i < 32; i++) |
|
369 |
for (j = 0; j < 2; j++) |
|
370 |
writel(((h->chroma_offset_l1[i][j] & 0x1ff) << 16) |
|
371 |
| (h->chroma_weight_l1[i][j] & 0xff), c->regs + VE_H264_RAM_WRITE_DATA); |
|
372 |
}
|
|
373 |
||
374 |
static void dec_ref_pic_marking(h264_context_t *c) |
|
375 |
{
|
|
376 |
h264_header_t *h = &c->header; |
|
377 |
// only reads bits to allow decoding, doesn't mark anything
|
|
378 |
if (h->nal_unit_type == 5) |
|
379 |
{
|
|
380 |
get_u(c->regs, 1); |
|
381 |
get_u(c->regs, 1); |
|
382 |
}
|
|
383 |
else
|
|
384 |
{
|
|
385 |
int adaptive_ref_pic_marking_mode_flag = get_u(c->regs, 1); |
|
386 |
if (adaptive_ref_pic_marking_mode_flag) |
|
387 |
{
|
|
388 |
unsigned int memory_management_control_operation; |
|
389 |
do
|
|
390 |
{
|
|
391 |
memory_management_control_operation = get_ue(c->regs); |
|
392 |
if (memory_management_control_operation == 1 || memory_management_control_operation == 3) |
|
393 |
{
|
|
394 |
get_ue(c->regs); |
|
395 |
}
|
|
396 |
if (memory_management_control_operation == 2) |
|
397 |
{
|
|
398 |
get_ue(c->regs); |
|
399 |
}
|
|
400 |
if (memory_management_control_operation == 3 || memory_management_control_operation == 6) |
|
401 |
{
|
|
402 |
get_ue(c->regs); |
|
403 |
}
|
|
404 |
if (memory_management_control_operation == 4) |
|
405 |
{
|
|
406 |
get_ue(c->regs); |
|
407 |
}
|
|
408 |
} while (memory_management_control_operation != 0); |
|
409 |
}
|
|
410 |
}
|
|
411 |
}
|
|
412 |
||
39
by Jens Kuske
Add support for interlaced h264 |
413 |
static int pic_order_cnt(const h264_picture_t *pic) |
414 |
{
|
|
415 |
if (pic->field == PIC_FRAME) |
|
416 |
return min(pic->top_pic_order_cnt, pic->bottom_pic_order_cnt); |
|
417 |
else if (pic->field == PIC_TOP_FIELD) |
|
418 |
return pic->top_pic_order_cnt; |
|
419 |
else
|
|
420 |
return pic->bottom_pic_order_cnt; |
|
421 |
}
|
|
422 |
||
423 |
static int sort_ref_pics_by_poc(const void *p1, const void *p2) |
|
424 |
{
|
|
425 |
const h264_picture_t *r1 = p1; |
|
426 |
const h264_picture_t *r2 = p2; |
|
427 |
||
428 |
return pic_order_cnt(r1) - pic_order_cnt(r2); |
|
429 |
}
|
|
430 |
||
431 |
static int sort_ref_pics_by_frame_num(const void *p1, const void *p2) |
|
432 |
{
|
|
433 |
const h264_picture_t *r1 = p1; |
|
434 |
const h264_picture_t *r2 = p2; |
|
37
by Jens Kuske
Fix h264 default reference picture list generation |
435 |
|
436 |
return r1->frame_idx - r2->frame_idx; |
|
437 |
}
|
|
438 |
||
39
by Jens Kuske
Add support for interlaced h264 |
439 |
static void split_ref_fields(h264_picture_t *out, h264_picture_t **in, int len, int cur_field) |
440 |
{
|
|
441 |
int even = 0, odd = 0; |
|
442 |
int index = 0; |
|
443 |
||
444 |
while (even < len || odd < len) |
|
445 |
{
|
|
446 |
while (even < len && !(in[even]->field & cur_field)) |
|
447 |
even++; |
|
448 |
if (even < len) |
|
449 |
{
|
|
450 |
out[index] = *in[even++]; |
|
451 |
out[index].field = cur_field; |
|
452 |
index++; |
|
453 |
}
|
|
454 |
||
455 |
while (odd < len && !(in[odd]->field & (cur_field ^ PIC_FRAME))) |
|
456 |
odd++; |
|
457 |
if (odd < len) |
|
458 |
{
|
|
459 |
out[index] = *in[odd++]; |
|
460 |
out[index].field = cur_field ^ PIC_FRAME; |
|
461 |
index++; |
|
462 |
}
|
|
463 |
}
|
|
464 |
}
|
|
465 |
||
37
by Jens Kuske
Fix h264 default reference picture list generation |
466 |
static void fill_default_ref_pic_list(h264_context_t *c) |
467 |
{
|
|
468 |
h264_header_t *h = &c->header; |
|
469 |
VdpPictureInfoH264 const *info = c->info; |
|
39
by Jens Kuske
Add support for interlaced h264 |
470 |
int cur_field = h->field_pic_flag ? (h->bottom_field_flag ? PIC_BOTTOM_FIELD : PIC_TOP_FIELD) : PIC_FRAME; |
37
by Jens Kuske
Fix h264 default reference picture list generation |
471 |
|
472 |
if (h->slice_type == SLICE_TYPE_P) |
|
473 |
{
|
|
39
by Jens Kuske
Add support for interlaced h264 |
474 |
qsort(c->ref_pic, c->ref_count, sizeof(c->ref_pic[0]), &sort_ref_pics_by_frame_num); |
37
by Jens Kuske
Fix h264 default reference picture list generation |
475 |
|
476 |
int i; |
|
477 |
int ptr0 = 0; |
|
39
by Jens Kuske
Add support for interlaced h264 |
478 |
h264_picture_t *sorted[16]; |
479 |
for (i = 0; i < c->ref_count; i++) |
|
480 |
{
|
|
481 |
if (c->ref_pic[c->ref_count - 1 - i].frame_idx <= info->frame_num) |
|
482 |
sorted[ptr0++] = &c->ref_pic[c->ref_count - 1 - i]; |
|
483 |
}
|
|
484 |
for (i = 0; i < c->ref_count; i++) |
|
485 |
{
|
|
486 |
if (c->ref_pic[c->ref_count - 1 - i].frame_idx > info->frame_num) |
|
487 |
sorted[ptr0++] = &c->ref_pic[c->ref_count - 1 - i]; |
|
488 |
}
|
|
489 |
||
490 |
split_ref_fields(h->RefPicList0, sorted, c->ref_count, cur_field); |
|
37
by Jens Kuske
Fix h264 default reference picture list generation |
491 |
}
|
492 |
else if (h->slice_type == SLICE_TYPE_B) |
|
493 |
{
|
|
39
by Jens Kuske
Add support for interlaced h264 |
494 |
qsort(c->ref_pic, c->ref_count, sizeof(c->ref_pic[0]), &sort_ref_pics_by_poc); |
495 |
||
496 |
int cur_poc; |
|
497 |
if (h->field_pic_flag) |
|
498 |
cur_poc = (uint16_t)info->field_order_cnt[cur_field == PIC_BOTTOM_FIELD]; |
|
499 |
else
|
|
500 |
cur_poc = min((uint16_t)info->field_order_cnt[0], (uint16_t)info->field_order_cnt[1]); |
|
501 |
||
37
by Jens Kuske
Fix h264 default reference picture list generation |
502 |
int i; |
503 |
int ptr0 = 0, ptr1 = 0; |
|
39
by Jens Kuske
Add support for interlaced h264 |
504 |
h264_picture_t *sorted[2][16]; |
505 |
for (i = 0; i < c->ref_count; i++) |
|
506 |
{
|
|
507 |
if (pic_order_cnt(&c->ref_pic[c->ref_count - 1 - i]) <= cur_poc) |
|
508 |
sorted[0][ptr0++] = &c->ref_pic[c->ref_count - 1 - i]; |
|
509 |
||
510 |
if (pic_order_cnt(&c->ref_pic[i]) > cur_poc) |
|
511 |
sorted[1][ptr1++] = &c->ref_pic[i]; |
|
512 |
}
|
|
513 |
for (i = 0; i < c->ref_count; i++) |
|
514 |
{
|
|
515 |
if (pic_order_cnt(&c->ref_pic[i]) > cur_poc) |
|
516 |
sorted[0][ptr0++] = &c->ref_pic[i]; |
|
517 |
||
518 |
if (pic_order_cnt(&c->ref_pic[c->ref_count - 1 - i]) <= cur_poc) |
|
519 |
sorted[1][ptr1++] = &c->ref_pic[c->ref_count - 1 - i]; |
|
520 |
}
|
|
521 |
||
522 |
split_ref_fields(h->RefPicList0, sorted[0], c->ref_count, cur_field); |
|
523 |
split_ref_fields(h->RefPicList1, sorted[1], c->ref_count, cur_field); |
|
37
by Jens Kuske
Fix h264 default reference picture list generation |
524 |
}
|
525 |
}
|
|
526 |
||
1
by Jens Kuske
First release |
527 |
static void decode_slice_header(h264_context_t *c) |
528 |
{
|
|
529 |
h264_header_t *h = &c->header; |
|
530 |
VdpPictureInfoH264 const *info = c->info; |
|
531 |
h->num_ref_idx_l0_active_minus1 = info->num_ref_idx_l0_active_minus1; |
|
532 |
h->num_ref_idx_l1_active_minus1 = info->num_ref_idx_l1_active_minus1; |
|
533 |
||
534 |
h->first_mb_in_slice = get_ue(c->regs); |
|
535 |
h->slice_type = get_ue(c->regs); |
|
536 |
if (h->slice_type >= 5) |
|
537 |
h->slice_type -= 5; |
|
538 |
h->pic_parameter_set_id = get_ue(c->regs); |
|
539 |
||
540 |
// separate_colour_plane_flag isn't available in VDPAU
|
|
541 |
/*if (separate_colour_plane_flag == 1)
|
|
542 |
colour_plane_id u(2)*/
|
|
543 |
||
544 |
h->frame_num = get_u(c->regs, info->log2_max_frame_num_minus4 + 4); |
|
545 |
||
546 |
if (!info->frame_mbs_only_flag) |
|
547 |
{
|
|
548 |
h->field_pic_flag = get_u(c->regs, 1); |
|
549 |
if (h->field_pic_flag) |
|
550 |
h->bottom_field_flag = get_u(c->regs, 1); |
|
551 |
}
|
|
552 |
||
553 |
if (h->nal_unit_type == 5) |
|
554 |
h->idr_pic_id = get_ue(c->regs); |
|
555 |
||
556 |
if (info->pic_order_cnt_type == 0) |
|
557 |
{
|
|
558 |
h->pic_order_cnt_lsb = get_u(c->regs, info->log2_max_pic_order_cnt_lsb_minus4 + 4); |
|
559 |
if (info->pic_order_present_flag && !info->field_pic_flag) |
|
560 |
h->delta_pic_order_cnt_bottom = get_se(c->regs); |
|
561 |
}
|
|
562 |
||
563 |
if (info->pic_order_cnt_type == 1 && !info->delta_pic_order_always_zero_flag) |
|
564 |
{
|
|
565 |
h->delta_pic_order_cnt[0] = get_se(c->regs); |
|
566 |
if (info->pic_order_present_flag && !info->field_pic_flag) |
|
567 |
h->delta_pic_order_cnt[1] = get_se(c->regs); |
|
568 |
}
|
|
569 |
||
570 |
if (info->redundant_pic_cnt_present_flag) |
|
571 |
h->redundant_pic_cnt = get_ue(c->regs); |
|
572 |
||
573 |
if (h->slice_type == SLICE_TYPE_B) |
|
574 |
h->direct_spatial_mv_pred_flag = get_u(c->regs, 1); |
|
575 |
||
576 |
if (h->slice_type == SLICE_TYPE_P || h->slice_type == SLICE_TYPE_SP || h->slice_type == SLICE_TYPE_B) |
|
577 |
{
|
|
578 |
h->num_ref_idx_active_override_flag = get_u(c->regs, 1); |
|
579 |
if (h->num_ref_idx_active_override_flag) |
|
580 |
{
|
|
581 |
h->num_ref_idx_l0_active_minus1 = get_ue(c->regs); |
|
582 |
if (h->slice_type == SLICE_TYPE_B) |
|
583 |
h->num_ref_idx_l1_active_minus1 = get_ue(c->regs); |
|
584 |
}
|
|
585 |
}
|
|
586 |
||
37
by Jens Kuske
Fix h264 default reference picture list generation |
587 |
fill_default_ref_pic_list(c); |
588 |
||
1
by Jens Kuske
First release |
589 |
if (h->nal_unit_type == 20) |
590 |
{}//ref_pic_list_mvc_modification(); // specified in Annex H |
|
591 |
else
|
|
592 |
ref_pic_list_modification(c); |
|
593 |
||
594 |
if ((info->weighted_pred_flag && (h->slice_type == SLICE_TYPE_P || h->slice_type == SLICE_TYPE_SP)) || (info->weighted_bipred_idc == 1 && h->slice_type == SLICE_TYPE_B)) |
|
595 |
pred_weight_table(c); |
|
596 |
||
597 |
if (info->is_reference) |
|
598 |
dec_ref_pic_marking(c); |
|
599 |
||
600 |
if (info->entropy_coding_mode_flag && h->slice_type != SLICE_TYPE_I && h->slice_type != SLICE_TYPE_SI) |
|
601 |
h->cabac_init_idc = get_ue(c->regs); |
|
602 |
||
603 |
h->slice_qp_delta = get_se(c->regs); |
|
604 |
||
605 |
if (h->slice_type == SLICE_TYPE_SP || h->slice_type == SLICE_TYPE_SI) |
|
606 |
{
|
|
607 |
if (h->slice_type == SLICE_TYPE_SP) |
|
608 |
h->sp_for_switch_flag = get_u(c->regs, 1); |
|
609 |
h->slice_qs_delta = get_se(c->regs); |
|
610 |
}
|
|
611 |
||
612 |
if (info->deblocking_filter_control_present_flag) |
|
613 |
{
|
|
614 |
h->disable_deblocking_filter_idc = get_ue(c->regs); |
|
615 |
if (h->disable_deblocking_filter_idc != 1) |
|
616 |
{
|
|
617 |
h->slice_alpha_c0_offset_div2 = get_se(c->regs); |
|
618 |
h->slice_beta_offset_div2 = get_se(c->regs); |
|
619 |
}
|
|
620 |
}
|
|
621 |
||
622 |
// num_slice_groups_minus1, slice_group_map_type, slice_group_map_type aren't available in VDPAU
|
|
623 |
/*if (num_slice_groups_minus1 > 0 && slice_group_map_type >= 3 && slice_group_map_type <= 5)
|
|
624 |
slice_group_change_cycle u(v)*/
|
|
625 |
}
|
|
626 |
||
627 |
||
68
by Jens Kuske
Add missing error check in h264 decoder |
628 |
static int fill_frame_lists(h264_context_t *c) |
1
by Jens Kuske
First release |
629 |
{
|
630 |
int i; |
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
631 |
h264_video_private_t *output_p = (h264_video_private_t *)c->output->decoder_private; |
1
by Jens Kuske
First release |
632 |
|
633 |
// collect reference frames
|
|
39
by Jens Kuske
Add support for interlaced h264 |
634 |
h264_picture_t *frame_list[18]; |
1
by Jens Kuske
First release |
635 |
memset(frame_list, 0, sizeof(frame_list)); |
636 |
||
39
by Jens Kuske
Add support for interlaced h264 |
637 |
int output_placed = 0; |
638 |
||
1
by Jens Kuske
First release |
639 |
for (i = 0; i < 16; i++) |
640 |
{
|
|
641 |
const VdpReferenceFrameH264 *rf = &(c->info->referenceFrames[i]); |
|
642 |
if (rf->surface != VDP_INVALID_HANDLE) |
|
643 |
{
|
|
644 |
if (rf->is_long_term) |
|
645 |
VDPAU_DBG("NOT IMPLEMENTED: We got a longterm reference!"); |
|
646 |
||
647 |
video_surface_ctx_t *surface = handle_get(rf->surface); |
|
40
by Jens Kuske
Add support for mbaff h264 |
648 |
h264_video_private_t *surface_p = get_surface_priv(c, surface); |
68
by Jens Kuske
Add missing error check in h264 decoder |
649 |
if (!surface_p) |
650 |
return 0; |
|
1
by Jens Kuske
First release |
651 |
|
39
by Jens Kuske
Add support for interlaced h264 |
652 |
if (surface == c->output) |
653 |
output_placed = 1; |
|
654 |
||
655 |
c->ref_pic[c->ref_count].surface = surface; |
|
656 |
c->ref_pic[c->ref_count].top_pic_order_cnt = rf->field_order_cnt[0]; |
|
657 |
c->ref_pic[c->ref_count].bottom_pic_order_cnt = rf->field_order_cnt[1]; |
|
658 |
c->ref_pic[c->ref_count].frame_idx = rf->frame_idx; |
|
659 |
c->ref_pic[c->ref_count].field = |
|
660 |
(rf->top_is_reference ? PIC_TOP_FIELD : 0) | |
|
661 |
(rf->bottom_is_reference ? PIC_BOTTOM_FIELD : 0); |
|
1
by Jens Kuske
First release |
662 |
|
39
by Jens Kuske
Add support for interlaced h264 |
663 |
frame_list[surface_p->pos] = &c->ref_pic[c->ref_count]; |
1
by Jens Kuske
First release |
664 |
c->ref_count++; |
665 |
}
|
|
666 |
}
|
|
667 |
||
668 |
// write picture buffer list
|
|
669 |
writel(VE_SRAM_H264_FRAMEBUFFER_LIST, c->regs + VE_H264_RAM_WRITE_PTR); |
|
670 |
||
671 |
for (i = 0; i < 18; i++) |
|
672 |
{
|
|
673 |
if (!output_placed && !frame_list[i]) |
|
674 |
{
|
|
675 |
writel((uint16_t)c->info->field_order_cnt[0], c->regs + VE_H264_RAM_WRITE_DATA); |
|
676 |
writel((uint16_t)c->info->field_order_cnt[1], c->regs + VE_H264_RAM_WRITE_DATA); |
|
40
by Jens Kuske
Add support for mbaff h264 |
677 |
writel(output_p->pic_type << 8, c->regs + VE_H264_RAM_WRITE_DATA); |
71
by Jens Kuske
Use libcedrus |
678 |
writel(cedrus_mem_get_bus_addr(c->output->rec), c->regs + VE_H264_RAM_WRITE_DATA); |
679 |
writel(cedrus_mem_get_bus_addr(c->output->rec) + c->output->luma_size, c->regs + VE_H264_RAM_WRITE_DATA); |
|
680 |
writel(cedrus_mem_get_bus_addr(output_p->extra_data), c->regs + VE_H264_RAM_WRITE_DATA); |
|
681 |
writel(cedrus_mem_get_bus_addr(output_p->extra_data) + c->video_extra_data_len, c->regs + VE_H264_RAM_WRITE_DATA); |
|
1
by Jens Kuske
First release |
682 |
writel(0, c->regs + VE_H264_RAM_WRITE_DATA); |
683 |
||
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
684 |
output_p->pos = i; |
1
by Jens Kuske
First release |
685 |
output_placed = 1; |
686 |
}
|
|
687 |
else if (!frame_list[i]) |
|
688 |
{
|
|
689 |
int j; |
|
690 |
for (j = 0; j < 8; j++) |
|
691 |
writel(0x0, c->regs + VE_H264_RAM_WRITE_DATA); |
|
692 |
}
|
|
693 |
else
|
|
694 |
{
|
|
695 |
video_surface_ctx_t *surface = frame_list[i]->surface; |
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
696 |
h264_video_private_t *surface_p = (h264_video_private_t *)surface->decoder_private; |
1
by Jens Kuske
First release |
697 |
|
698 |
writel(frame_list[i]->top_pic_order_cnt, c->regs + VE_H264_RAM_WRITE_DATA); |
|
699 |
writel(frame_list[i]->bottom_pic_order_cnt, c->regs + VE_H264_RAM_WRITE_DATA); |
|
40
by Jens Kuske
Add support for mbaff h264 |
700 |
writel(surface_p->pic_type << 8, c->regs + VE_H264_RAM_WRITE_DATA); |
71
by Jens Kuske
Use libcedrus |
701 |
writel(cedrus_mem_get_bus_addr(surface->rec), c->regs + VE_H264_RAM_WRITE_DATA); |
702 |
writel(cedrus_mem_get_bus_addr(surface->rec) + surface->luma_size, c->regs + VE_H264_RAM_WRITE_DATA); |
|
703 |
writel(cedrus_mem_get_bus_addr(surface_p->extra_data), c->regs + VE_H264_RAM_WRITE_DATA); |
|
704 |
writel(cedrus_mem_get_bus_addr(surface_p->extra_data) + c->video_extra_data_len, c->regs + VE_H264_RAM_WRITE_DATA); |
|
1
by Jens Kuske
First release |
705 |
writel(0, c->regs + VE_H264_RAM_WRITE_DATA); |
706 |
}
|
|
707 |
}
|
|
708 |
||
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
709 |
// output index
|
710 |
writel(output_p->pos, c->regs + VE_H264_OUTPUT_FRAME_IDX); |
|
68
by Jens Kuske
Add missing error check in h264 decoder |
711 |
|
712 |
return 1; |
|
1
by Jens Kuske
First release |
713 |
}
|
714 |
||
13
by Jens Kuske
Add support for custom h264 scaling lists |
715 |
// VDPAU does not tell us if the scaling lists are default or custom
|
716 |
static int check_scaling_lists(h264_context_t *c) |
|
717 |
{
|
|
718 |
const uint32_t *sl4 = (uint32_t *)&c->info->scaling_lists_4x4[0][0]; |
|
719 |
const uint32_t *sl8 = (uint32_t *)&c->info->scaling_lists_8x8[0][0]; |
|
720 |
||
721 |
int i; |
|
722 |
for (i = 0; i < 6 * 16 / 4; i++) |
|
723 |
if (sl4[i] != 0x10101010) |
|
724 |
return 0; |
|
725 |
||
726 |
for (i = 0; i < 2 * 64 / 4; i++) |
|
727 |
if (sl8[i] != 0x10101010) |
|
728 |
return 0; |
|
729 |
||
730 |
return 1; |
|
731 |
}
|
|
732 |
||
28
by rellla
Codestyling of function definition. |
733 |
static VdpStatus h264_decode(decoder_ctx_t *decoder, |
734 |
VdpPictureInfo const *_info, |
|
735 |
const int len, |
|
736 |
video_surface_ctx_t *output) |
|
1
by Jens Kuske
First release |
737 |
{
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
738 |
h264_private_t *decoder_p = (h264_private_t *)decoder->private; |
739 |
VdpPictureInfoH264 const *info = (VdpPictureInfoH264 const *)_info; |
|
740 |
||
38
by Jens Kuske
Don't overwrite VideoSurface data while it is still needed for OutputSurface |
741 |
VdpStatus ret = yuv_prepare(output); |
742 |
if (ret != VDP_STATUS_OK) |
|
743 |
return ret; |
|
744 |
||
59.1.4
by Jens Kuske
Add reconstruct buffer to video surface |
745 |
ret = rec_prepare(output); |
746 |
if (ret != VDP_STATUS_OK) |
|
747 |
return ret; |
|
748 |
||
1
by Jens Kuske
First release |
749 |
h264_context_t *c = calloc(1, sizeof(h264_context_t)); |
750 |
c->picture_width_in_mbs_minus1 = (decoder->width - 1) / 16; |
|
40
by Jens Kuske
Add support for mbaff h264 |
751 |
if (!info->frame_mbs_only_flag) |
39
by Jens Kuske
Add support for interlaced h264 |
752 |
c->picture_height_in_mbs_minus1 = ((decoder->height / 2) - 1) / 16; |
753 |
else
|
|
754 |
c->picture_height_in_mbs_minus1 = (decoder->height - 1) / 16; |
|
1
by Jens Kuske
First release |
755 |
c->info = info; |
756 |
c->output = output; |
|
39
by Jens Kuske
Add support for interlaced h264 |
757 |
c->video_extra_data_len = ((decoder->width + 15) / 16) * ((decoder->height + 15) / 16) * 32; |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
758 |
|
40
by Jens Kuske
Add support for mbaff h264 |
759 |
h264_video_private_t *output_p = get_surface_priv(c, output); |
760 |
if (!output_p) |
|
66
by rellla
Add missing free, if getting output surface fails. |
761 |
{
|
68
by Jens Kuske
Add missing error check in h264 decoder |
762 |
ret = VDP_STATUS_RESOURCES; |
763 |
goto err_free; |
|
66
by rellla
Add missing free, if getting output surface fails. |
764 |
}
|
40
by Jens Kuske
Add support for mbaff h264 |
765 |
|
766 |
if (info->field_pic_flag) |
|
767 |
output_p->pic_type = PIC_TYPE_FIELD; |
|
768 |
else if (info->mb_adaptive_frame_field_flag) |
|
769 |
output_p->pic_type = PIC_TYPE_MBAFF; |
|
770 |
else
|
|
771 |
output_p->pic_type = PIC_TYPE_FRAME; |
|
12
by Jens Kuske
Add more h264 decoding parameters |
772 |
|
1
by Jens Kuske
First release |
773 |
// activate H264 engine
|
71
by Jens Kuske
Use libcedrus |
774 |
c->regs = cedrus_ve_get(decoder->device->cedrus, CEDRUS_ENGINE_H264, (decoder->width >= 2048 ? 0x1 : 0x0) << 21); |
1
by Jens Kuske
First release |
775 |
|
9
by Jens Kuske
Add buffers for VE version 0x1625 |
776 |
// some buffers
|
71
by Jens Kuske
Use libcedrus |
777 |
uint32_t extra_buffers = cedrus_mem_get_bus_addr(decoder_p->extra_data); |
9
by Jens Kuske
Add buffers for VE version 0x1625 |
778 |
writel(extra_buffers, c->regs + VE_H264_EXTRA_BUFFER1); |
779 |
writel(extra_buffers + 0x48000, c->regs + VE_H264_EXTRA_BUFFER2); |
|
71
by Jens Kuske
Use libcedrus |
780 |
if (cedrus_get_ve_version(decoder->device->cedrus) == 0x1625 || decoder->width >= 2048) |
9
by Jens Kuske
Add buffers for VE version 0x1625 |
781 |
{
|
782 |
int size = (c->picture_width_in_mbs_minus1 + 32) * 192; |
|
783 |
size = (size + 4095) & ~4095; |
|
19
by Jens Kuske
Add limited support for 4k h264 decoding |
784 |
writel(decoder->width >= 2048 ? 0x5 : 0xa, c->regs + 0x50); |
9
by Jens Kuske
Add buffers for VE version 0x1625 |
785 |
writel(extra_buffers + 0x50000, c->regs + 0x54); |
786 |
writel(extra_buffers + 0x50000 + size, c->regs + 0x58); |
|
787 |
}
|
|
788 |
||
13
by Jens Kuske
Add support for custom h264 scaling lists |
789 |
// write custom scaling lists
|
790 |
if (!(c->default_scaling_lists = check_scaling_lists(c))) |
|
791 |
{
|
|
792 |
const uint32_t *sl4 = (uint32_t *)&c->info->scaling_lists_4x4[0][0]; |
|
793 |
const uint32_t *sl8 = (uint32_t *)&c->info->scaling_lists_8x8[0][0]; |
|
794 |
||
795 |
writel(VE_SRAM_H264_SCALING_LISTS, c->regs + VE_H264_RAM_WRITE_PTR); |
|
796 |
||
797 |
int i; |
|
798 |
for (i = 0; i < 2 * 64 / 4; i++) |
|
799 |
writel(sl8[i], c->regs + VE_H264_RAM_WRITE_DATA); |
|
800 |
||
801 |
for (i = 0; i < 6 * 16 / 4; i++) |
|
802 |
writel(sl4[i], c->regs + VE_H264_RAM_WRITE_DATA); |
|
803 |
}
|
|
804 |
||
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
805 |
// sdctrl
|
806 |
writel(0x00000000, c->regs + VE_H264_SDROT_CTRL); |
|
75
by Jens Kuske
Add A64 support |
807 |
if (cedrus_get_ve_version(decoder->device->cedrus) >= 0x1680) |
59.1.4
by Jens Kuske
Add reconstruct buffer to video surface |
808 |
{
|
71
by Jens Kuske
Use libcedrus |
809 |
writel(cedrus_mem_get_bus_addr(c->output->yuv->data), c->regs + VE_H264_SDROT_LUMA); |
810 |
writel(cedrus_mem_get_bus_addr(c->output->yuv->data) + c->output->luma_size, c->regs + VE_H264_SDROT_CHROMA); |
|
59.1.4
by Jens Kuske
Add reconstruct buffer to video surface |
811 |
writel((0x2 << 30) | (0x1 << 28) | (c->output->chroma_size / 2), c->regs + VE_EXTRA_OUT_FMT_OFFSET); |
812 |
}
|
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
813 |
|
68
by Jens Kuske
Add missing error check in h264 decoder |
814 |
if (!fill_frame_lists(c)) |
815 |
{
|
|
816 |
ret = VDP_STATUS_ERROR; |
|
817 |
goto err_ve_put; |
|
818 |
}
|
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
819 |
|
1
by Jens Kuske
First release |
820 |
unsigned int slice, pos = 0; |
821 |
for (slice = 0; slice < info->slice_count; slice++) |
|
822 |
{
|
|
823 |
h264_header_t *h = &c->header; |
|
824 |
memset(h, 0, sizeof(h264_header_t)); |
|
825 |
||
71
by Jens Kuske
Use libcedrus |
826 |
pos = find_startcode(cedrus_mem_get_pointer(decoder->data), len, pos) + 3; |
1
by Jens Kuske
First release |
827 |
|
71
by Jens Kuske
Use libcedrus |
828 |
h->nal_unit_type = ((uint8_t *)cedrus_mem_get_pointer(decoder->data))[pos++] & 0x1f; |
1
by Jens Kuske
First release |
829 |
|
830 |
if (h->nal_unit_type != 5 && h->nal_unit_type != 1) |
|
2
by Jens Kuske
Fix memory leak in h264_decode |
831 |
{
|
68
by Jens Kuske
Add missing error check in h264 decoder |
832 |
ret = VDP_STATUS_ERROR; |
833 |
goto err_ve_put; |
|
2
by Jens Kuske
Fix memory leak in h264_decode |
834 |
}
|
1
by Jens Kuske
First release |
835 |
|
12
by Jens Kuske
Add more h264 decoding parameters |
836 |
// Enable startcode detect and ??
|
75
by Jens Kuske
Add A64 support |
837 |
writel((0x1 << 25) | (0x1 << 10) | ((cedrus_get_ve_version(decoder->device->cedrus) >= 0x1680) << 9), c->regs + VE_H264_CTRL); |
1
by Jens Kuske
First release |
838 |
|
839 |
// input buffer
|
|
840 |
writel((len - pos) * 8, c->regs + VE_H264_VLD_LEN); |
|
841 |
writel(pos * 8, c->regs + VE_H264_VLD_OFFSET); |
|
71
by Jens Kuske
Use libcedrus |
842 |
uint32_t input_addr = cedrus_mem_get_bus_addr(decoder->data); |
1
by Jens Kuske
First release |
843 |
writel(input_addr + VBV_SIZE - 1, c->regs + VE_H264_VLD_END); |
844 |
writel((input_addr & 0x0ffffff0) | (input_addr >> 28) | (0x7 << 28), c->regs + VE_H264_VLD_ADDR); |
|
845 |
||
846 |
// ?? some sort of reset maybe
|
|
847 |
writel(0x7, c->regs + VE_H264_TRIGGER); |
|
848 |
||
849 |
int i; |
|
850 |
||
851 |
decode_slice_header(c); |
|
852 |
||
853 |
// write RefPicLists
|
|
854 |
if (h->slice_type != SLICE_TYPE_I && h->slice_type != SLICE_TYPE_SI) |
|
855 |
{
|
|
856 |
writel(VE_SRAM_H264_REF_LIST0, c->regs + VE_H264_RAM_WRITE_PTR); |
|
857 |
for (i = 0; i < h->num_ref_idx_l0_active_minus1 + 1; i += 4) |
|
858 |
{
|
|
859 |
int j; |
|
860 |
uint32_t list = 0; |
|
861 |
for (j = 0; j < 4; j++) |
|
39
by Jens Kuske
Add support for interlaced h264 |
862 |
if (h->RefPicList0[i + j].surface) |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
863 |
{
|
39
by Jens Kuske
Add support for interlaced h264 |
864 |
h264_video_private_t *surface_p = (h264_video_private_t *)h->RefPicList0[i + j].surface->decoder_private; |
865 |
list |= ((surface_p->pos * 2 + (h->RefPicList0[i + j].field == PIC_BOTTOM_FIELD)) << (j * 8)); |
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
866 |
}
|
1
by Jens Kuske
First release |
867 |
writel(list, c->regs + VE_H264_RAM_WRITE_DATA); |
868 |
}
|
|
869 |
}
|
|
870 |
if (h->slice_type == SLICE_TYPE_B) |
|
871 |
{
|
|
872 |
writel(VE_SRAM_H264_REF_LIST1, c->regs + VE_H264_RAM_WRITE_PTR); |
|
873 |
for (i = 0; i < h->num_ref_idx_l1_active_minus1 + 1; i += 4) |
|
874 |
{
|
|
875 |
int j; |
|
876 |
uint32_t list = 0; |
|
877 |
for (j = 0; j < 4; j++) |
|
39
by Jens Kuske
Add support for interlaced h264 |
878 |
if (h->RefPicList1[i + j].surface) |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
879 |
{
|
39
by Jens Kuske
Add support for interlaced h264 |
880 |
h264_video_private_t *surface_p = (h264_video_private_t *)h->RefPicList1[i + j].surface->decoder_private; |
881 |
list |= ((surface_p->pos * 2 + (h->RefPicList1[i + j].field == PIC_BOTTOM_FIELD)) << (j * 8)); |
|
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
882 |
}
|
1
by Jens Kuske
First release |
883 |
writel(list, c->regs + VE_H264_RAM_WRITE_DATA); |
884 |
}
|
|
885 |
}
|
|
886 |
||
12
by Jens Kuske
Add more h264 decoding parameters |
887 |
// picture parameters
|
888 |
writel(((info->entropy_coding_mode_flag & 0x1) << 15) |
|
889 |
| ((info->num_ref_idx_l0_active_minus1 & 0x1f) << 10) |
|
890 |
| ((info->num_ref_idx_l1_active_minus1 & 0x1f) << 5) |
|
891 |
| ((info->weighted_pred_flag & 0x1) << 4) |
|
892 |
| ((info->weighted_bipred_idc & 0x3) << 2) |
|
893 |
| ((info->constrained_intra_pred_flag & 0x1) << 1) |
|
894 |
| ((info->transform_8x8_mode_flag & 0x1) << 0) |
|
1
by Jens Kuske
First release |
895 |
, c->regs + VE_H264_PIC_HDR); |
896 |
||
12
by Jens Kuske
Add more h264 decoding parameters |
897 |
// sequence parameters
|
898 |
writel((0x1 << 19) |
|
899 |
| ((c->info->frame_mbs_only_flag & 0x1) << 18) |
|
900 |
| ((c->info->mb_adaptive_frame_field_flag & 0x1) << 17) |
|
901 |
| ((c->info->direct_8x8_inference_flag & 0x1) << 16) |
|
902 |
| ((c->picture_width_in_mbs_minus1 & 0xff) << 8) |
|
903 |
| ((c->picture_height_in_mbs_minus1 & 0xff) << 0) |
|
1
by Jens Kuske
First release |
904 |
, c->regs + VE_H264_FRAME_SIZE); |
905 |
||
12
by Jens Kuske
Add more h264 decoding parameters |
906 |
// slice parameters
|
907 |
writel((((h->first_mb_in_slice % (c->picture_width_in_mbs_minus1 + 1)) & 0xff) << 24) |
|
40
by Jens Kuske
Add support for mbaff h264 |
908 |
| (((h->first_mb_in_slice / (c->picture_width_in_mbs_minus1 + 1)) & 0xff) * |
909 |
(output_p->pic_type == PIC_TYPE_MBAFF ? 2 : 1) << 16) |
|
12
by Jens Kuske
Add more h264 decoding parameters |
910 |
| ((info->is_reference & 0x1) << 12) |
911 |
| ((h->slice_type & 0xf) << 8) |
|
1
by Jens Kuske
First release |
912 |
| ((slice == 0 ? 0x1 : 0x0) << 5) |
12
by Jens Kuske
Add more h264 decoding parameters |
913 |
| ((info->field_pic_flag & 0x1) << 4) |
914 |
| ((info->bottom_field_flag & 0x1) << 3) |
|
915 |
| ((h->direct_spatial_mv_pred_flag & 0x1) << 2) |
|
916 |
| ((h->cabac_init_idc & 0x3) << 0) |
|
1
by Jens Kuske
First release |
917 |
, c->regs + VE_H264_SLICE_HDR); |
918 |
||
12
by Jens Kuske
Add more h264 decoding parameters |
919 |
writel(((h->num_ref_idx_l0_active_minus1 & 0x1f) << 24) |
920 |
| ((h->num_ref_idx_l1_active_minus1 & 0x1f) << 16) |
|
921 |
| ((h->num_ref_idx_active_override_flag & 0x1) << 12) |
|
922 |
| ((h->disable_deblocking_filter_idc & 0x3) << 8) |
|
1
by Jens Kuske
First release |
923 |
| ((h->slice_alpha_c0_offset_div2 & 0xf) << 4) |
924 |
| ((h->slice_beta_offset_div2 & 0xf) << 0) |
|
925 |
, c->regs + VE_H264_SLICE_HDR2); |
|
926 |
||
13
by Jens Kuske
Add support for custom h264 scaling lists |
927 |
writel(((c->default_scaling_lists & 0x1) << 24) |
1
by Jens Kuske
First release |
928 |
| ((info->second_chroma_qp_index_offset & 0x3f) << 16) |
929 |
| ((info->chroma_qp_index_offset & 0x3f) << 8) |
|
12
by Jens Kuske
Add more h264 decoding parameters |
930 |
| (((info->pic_init_qp_minus26 + 26 + h->slice_qp_delta) & 0x3f) << 0) |
1
by Jens Kuske
First release |
931 |
, c->regs + VE_H264_QP_PARAM); |
932 |
||
933 |
// clear status flags
|
|
934 |
writel(readl(c->regs + VE_H264_STATUS), c->regs + VE_H264_STATUS); |
|
935 |
||
936 |
// enable int
|
|
937 |
writel(readl(c->regs + VE_H264_CTRL) | 0x7, c->regs + VE_H264_CTRL); |
|
938 |
||
939 |
// SHOWTIME
|
|
940 |
writel(0x8, c->regs + VE_H264_TRIGGER); |
|
941 |
||
71
by Jens Kuske
Use libcedrus |
942 |
cedrus_ve_wait(decoder->device->cedrus, 1); |
1
by Jens Kuske
First release |
943 |
|
944 |
// clear status flags
|
|
945 |
writel(readl(c->regs + VE_H264_STATUS), c->regs + VE_H264_STATUS); |
|
946 |
||
947 |
pos = (readl(c->regs + VE_H264_VLD_OFFSET) / 8) - 3; |
|
948 |
}
|
|
949 |
||
68
by Jens Kuske
Add missing error check in h264 decoder |
950 |
ret = VDP_STATUS_OK; |
951 |
||
952 |
err_ve_put: |
|
1
by Jens Kuske
First release |
953 |
// stop H264 engine
|
71
by Jens Kuske
Use libcedrus |
954 |
cedrus_ve_put(decoder->device->cedrus); |
68
by Jens Kuske
Add missing error check in h264 decoder |
955 |
err_free: |
2
by Jens Kuske
Fix memory leak in h264_decode |
956 |
free(c); |
68
by Jens Kuske
Add missing error check in h264 decoder |
957 |
return ret; |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
958 |
}
|
959 |
||
960 |
VdpStatus new_decoder_h264(decoder_ctx_t *decoder) |
|
961 |
{
|
|
962 |
h264_private_t *decoder_p = calloc(1, sizeof(h264_private_t)); |
|
963 |
if (!decoder_p) |
|
964 |
return VDP_STATUS_RESOURCES; |
|
965 |
||
966 |
int extra_data_size = 320 * 1024; |
|
71
by Jens Kuske
Use libcedrus |
967 |
if (cedrus_get_ve_version(decoder->device->cedrus) == 0x1625 || decoder->width >= 2048) |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
968 |
{
|
969 |
// Engine version 0x1625 needs two extra buffers
|
|
970 |
extra_data_size += ((decoder->width - 1) / 16 + 32) * 192; |
|
971 |
extra_data_size = (extra_data_size + 4095) & ~4095; |
|
972 |
extra_data_size += ((decoder->width - 1) / 16 + 64) * 80; |
|
973 |
}
|
|
974 |
||
71
by Jens Kuske
Use libcedrus |
975 |
decoder_p->extra_data = cedrus_mem_alloc(decoder->device->cedrus, extra_data_size); |
24
by Jens Kuske
Unify interface to different decoders (now more object-oriented) |
976 |
if (!decoder_p->extra_data) |
977 |
{
|
|
978 |
free(decoder_p); |
|
979 |
return VDP_STATUS_RESOURCES; |
|
980 |
}
|
|
981 |
||
982 |
decoder->decode = h264_decode; |
|
983 |
decoder->private = decoder_p; |
|
984 |
decoder->private_free = h264_private_free; |
|
985 |
return VDP_STATUS_OK; |
|
1
by Jens Kuske
First release |
986 |
}
|