2
* Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4
* This file is part of FFmpeg.
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
#include "rangecoder.h"
28
#include "mpegvideo.h"
33
static const int8_t quant3[256]={
34
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51
static const int8_t quant3b[256]={
52
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
static const int8_t quant3bA[256]={
70
0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
static const int8_t quant5[256]={
88
0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105
static const int8_t quant7[256]={
106
0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123
static const int8_t quant9[256]={
124
0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125
3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141
static const int8_t quant11[256]={
142
0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159
static const int8_t quant13[256]={
160
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161
4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163
5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179
static const uint8_t obmc32[1024]={
180
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
182
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
183
0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
184
0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
185
0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
186
0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
187
0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
188
0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
190
0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191
0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
192
0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
193
0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
194
0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
195
0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196
0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
197
0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
198
0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
199
0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
200
0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
201
0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
202
0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
203
0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
204
0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
205
0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
206
0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
207
0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
208
0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
209
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
210
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
211
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214
static const uint8_t obmc16[256]={
215
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
217
0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218
0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
219
0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
220
0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
221
1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
222
1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223
1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
224
1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
225
0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
226
0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
227
0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
228
0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
229
0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
230
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
234
static const uint8_t obmc32[1024]={
235
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269
static const uint8_t obmc16[256]={
270
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289
static const uint8_t obmc32[1024]={
290
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
292
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
293
0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
294
0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
295
0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
296
0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
297
0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
298
0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
300
0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301
0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
302
0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
303
0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
304
0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
305
0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306
0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
307
0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
308
0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
309
0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
310
0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
311
0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
312
0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
313
0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
314
0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
315
0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
316
0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
317
0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
318
0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
319
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
320
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
321
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324
static const uint8_t obmc16[256]={
325
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
327
0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328
0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
329
0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
330
1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
331
1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
332
0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333
0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
334
1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
335
1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
336
0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
337
0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
338
0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
339
0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
340
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
346
static const uint8_t obmc8[64]={
347
4, 12, 20, 28, 28, 20, 12, 4,
348
12, 36, 60, 84, 84, 60, 36, 12,
349
20, 60,100,140,140,100, 60, 20,
350
28, 84,140,196,196,140, 84, 28,
351
28, 84,140,196,196,140, 84, 28,
352
20, 60,100,140,140,100, 60, 20,
353
12, 36, 60, 84, 84, 60, 36, 12,
354
4, 12, 20, 28, 28, 20, 12, 4,
359
static const uint8_t obmc4[16]={
367
static const uint8_t *obmc_tab[4]={
368
obmc32, obmc16, obmc8, obmc4
371
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373
typedef struct BlockNode{
379
//#define TYPE_SPLIT 1
380
#define BLOCK_INTRA 1
382
//#define TYPE_NOCOLOR 4
383
uint8_t level; //FIXME merge into type?
386
static const BlockNode null_block= { //FIXME add border maybe
387
.color= {128,128,128},
395
#define LOG2_MB_SIZE 4
396
#define MB_SIZE (1<<LOG2_MB_SIZE)
398
typedef struct x_and_coeff{
403
typedef struct SubBand{
408
int qlog; ///< log(qscale)/log[2^(1/6)]
412
int stride_line; ///< Stride measured in lines, not pixels.
413
x_and_coeff * x_coeff;
414
struct SubBand *parent;
415
uint8_t state[/*7*2*/ 7 + 512][32];
418
typedef struct Plane{
421
SubBand band[MAX_DECOMPOSITIONS][4];
424
typedef struct SnowContext{
425
// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
427
AVCodecContext *avctx;
431
AVFrame input_picture; ///< new_picture with the internal linesizes
432
AVFrame current_picture;
433
AVFrame last_picture[MAX_REF_FRAMES];
434
AVFrame mconly_picture;
435
// uint8_t q_context[16];
436
uint8_t header_state[32];
437
uint8_t block_state[128 + 32*128];
441
int spatial_decomposition_type;
442
int last_spatial_decomposition_type;
443
int temporal_decomposition_type;
444
int spatial_decomposition_count;
445
int temporal_decomposition_count;
448
int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
449
uint32_t *ref_scores[MAX_REF_FRAMES];
450
DWTELEM *spatial_dwt_buffer;
454
int spatial_scalability;
464
#define QBIAS_SHIFT 3
468
int last_block_max_depth;
469
Plane plane[MAX_PLANES];
471
#define ME_CACHE_SIZE 1024
472
int me_cache[ME_CACHE_SIZE];
473
int me_cache_generation;
476
MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
487
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
488
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
490
static void iterative_me(SnowContext *s);
492
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
496
buf->base_buffer = base_buffer;
497
buf->line_count = line_count;
498
buf->line_width = line_width;
499
buf->data_count = max_allocated_lines;
500
buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
501
buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
503
for (i = 0; i < max_allocated_lines; i++)
505
buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
508
buf->data_stack_top = max_allocated_lines - 1;
511
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
516
// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
518
assert(buf->data_stack_top >= 0);
519
// assert(!buf->line[line]);
521
return buf->line[line];
523
offset = buf->line_width * line;
524
buffer = buf->data_stack[buf->data_stack_top];
525
buf->data_stack_top--;
526
buf->line[line] = buffer;
528
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
533
static void slice_buffer_release(slice_buffer * buf, int line)
538
assert(line >= 0 && line < buf->line_count);
539
assert(buf->line[line]);
541
offset = buf->line_width * line;
542
buffer = buf->line[line];
543
buf->data_stack_top++;
544
buf->data_stack[buf->data_stack_top] = buffer;
545
buf->line[line] = NULL;
547
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
550
static void slice_buffer_flush(slice_buffer * buf)
553
for (i = 0; i < buf->line_count; i++)
557
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
558
slice_buffer_release(buf, i);
563
static void slice_buffer_destroy(slice_buffer * buf)
566
slice_buffer_flush(buf);
568
for (i = buf->data_count - 1; i >= 0; i--)
570
assert(buf->data_stack[i]);
571
av_freep(&buf->data_stack[i]);
573
assert(buf->data_stack);
574
av_freep(&buf->data_stack);
576
av_freep(&buf->line);
580
// Avoid a name clash on SGI IRIX
583
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
584
static uint8_t qexp[QROOT];
586
static inline int mirror(int v, int m){
587
while((unsigned)v > (unsigned)m){
594
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
598
const int a= FFABS(v);
599
const int e= av_log2(a);
601
const int el= FFMIN(e, 10);
602
put_rac(c, state+0, 0);
605
put_rac(c, state+1+i, 1); //1..10
608
put_rac(c, state+1+9, 1); //1..10
610
put_rac(c, state+1+FFMIN(i,9), 0);
612
for(i=e-1; i>=el; i--){
613
put_rac(c, state+22+9, (a>>i)&1); //22..31
616
put_rac(c, state+22+i, (a>>i)&1); //22..31
620
put_rac(c, state+11 + el, v < 0); //11..21
623
put_rac(c, state+0, 0);
626
put_rac(c, state+1+i, 1); //1..10
628
put_rac(c, state+1+i, 0);
630
for(i=e-1; i>=0; i--){
631
put_rac(c, state+22+i, (a>>i)&1); //22..31
635
put_rac(c, state+11 + e, v < 0); //11..21
638
put_rac(c, state+1+FFMIN(i,9), 1); //1..10
640
put_rac(c, state+1+FFMIN(i,9), 0);
642
for(i=e-1; i>=0; i--){
643
put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
647
put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651
put_rac(c, state+0, 1);
655
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
656
if(get_rac(c, state+0))
661
while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
666
for(i=e-1; i>=0; i--){
667
a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670
if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
677
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
679
int r= log2>=0 ? 1<<log2 : 1;
685
put_rac(c, state+4+log2, 1);
690
put_rac(c, state+4+log2, 0);
692
for(i=log2-1; i>=0; i--){
693
put_rac(c, state+31-i, (v>>i)&1);
697
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
699
int r= log2>=0 ? 1<<log2 : 1;
704
while(get_rac(c, state+4+log2)){
710
for(i=log2-1; i>=0; i--){
711
v+= get_rac(c, state+31-i)<<i;
717
static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
718
const int mirror_left= !highpass;
719
const int mirror_right= (width&1) ^ highpass;
720
const int w= (width>>1) - 1 + (highpass & width);
723
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
725
dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731
dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
735
dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
740
static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
741
const int mirror_left= !highpass;
742
const int mirror_right= (width&1) ^ highpass;
743
const int w= (width>>1) - 1 + (highpass & width);
750
dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
756
int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
759
dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
763
int r= 3*2*ref[w*ref_step];
766
dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
772
static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
773
const int mirror_left= !highpass;
774
const int mirror_right= (width&1) ^ highpass;
775
const int w= (width>>1) - 1 + (highpass & width);
779
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
781
dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
787
dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
791
dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
797
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
800
for(x=start; x<width; x+=2){
804
int x2= x + 2*i - n + 1;
806
else if(x2>=width) x2= 2*width-x2-2;
807
sum += coeffs[i]*(int64_t)dst[x2];
809
if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
810
else dst[x] += (sum + (1<<shift)/2)>>shift;
814
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
816
for(y=start; y<height; y+=2){
817
for(x=0; x<width; x++){
821
int y2= y + 2*i - n + 1;
823
else if(y2>=height) y2= 2*height-y2-2;
824
sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826
if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
827
else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
836
#if 0 // more accurate 9/7
839
#define COEFFS1 (int[]){-25987,-25987}
842
#define COEFFS2 (int[]){-27777,-27777}
845
#define COEFFS3 (int[]){28931,28931}
848
#define COEFFS4 (int[]){14533,14533}
852
#define COEFFS1 (int[]){1,-9,-9,1}
855
#define COEFFS2 (int[]){-1,5,5,-1}
868
#define COEFFS1 (int[]){1,1}
871
#define COEFFS2 (int[]){-1,-1}
884
#define COEFFS2 (int[]){-1,-1}
887
#define COEFFS3 (int[]){-1,-1}
890
#define COEFFS4 (int[]){-5,29,29,-5}
895
#define COEFFS1 (int[]){-203,-203}
898
#define COEFFS2 (int[]){-217,-217}
901
#define COEFFS3 (int[]){113,113}
904
#define COEFFS4 (int[]){227,227}
912
#define COEFFS2 (int[]){-1,-1}
915
#define COEFFS3 (int[]){-1,-1}
918
#define COEFFS4 (int[]){3,3}
922
#define COEFFS1 (int[]){1,-9,-9,1}
925
#define COEFFS2 (int[]){1,1}
935
#define COEFFS1 (int[]){1,-9,-9,1}
938
#define COEFFS2 (int[]){-1,5,5,-1}
946
static void horizontal_decomposeX(DWTELEM *b, int width){
948
const int width2= width>>1;
949
const int w2= (width+1)>>1;
952
inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
953
inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
954
inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
955
inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
957
for(x=0; x<width2; x++){
959
temp[x+w2]= b[2*x + 1];
963
memcpy(b, temp, width*sizeof(int));
966
static void horizontal_composeX(DWTELEM *b, int width){
968
const int width2= width>>1;
970
const int w2= (width+1)>>1;
972
memcpy(temp, b, width*sizeof(int));
973
for(x=0; x<width2; x++){
975
b[2*x + 1]= temp[x+w2];
980
inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
981
inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
982
inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
983
inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
986
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
989
for(y=0; y<height; y++){
990
for(x=0; x<width; x++){
991
buffer[y*stride + x] *= SCALEX;
995
for(y=0; y<height; y++){
996
horizontal_decomposeX(buffer + y*stride, width);
999
inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1000
inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1001
inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1002
inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1005
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1008
inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1009
inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1010
inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1011
inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013
for(y=0; y<height; y++){
1014
horizontal_composeX(buffer + y*stride, width);
1017
for(y=0; y<height; y++){
1018
for(x=0; x<width; x++){
1019
buffer[y*stride + x] /= SCALEX;
1024
static void horizontal_decompose53i(DWTELEM *b, int width){
1025
DWTELEM temp[width];
1026
const int width2= width>>1;
1028
const int w2= (width+1)>>1;
1030
for(x=0; x<width2; x++){
1032
temp[x+w2]= b[2*x + 1];
1046
for(x=1; x+1<width2; x+=2){
1050
A2 += (A1 + A3 + 2)>>2;
1054
A1= temp[x+1+width2];
1057
A4 += (A1 + A3 + 2)>>2;
1063
A2 += (A1 + A3 + 2)>>2;
1068
lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1069
lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1073
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1076
for(i=0; i<width; i++){
1077
b1[i] -= (b0[i] + b2[i])>>1;
1081
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1084
for(i=0; i<width; i++){
1085
b1[i] += (b0[i] + b2[i] + 2)>>2;
1089
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1091
DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1092
DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1094
for(y=-2; y<height; y+=2){
1095
DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1096
DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1099
if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1100
if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1101
STOP_TIMER("horizontal_decompose53i")}
1104
if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1105
if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1106
STOP_TIMER("vertical_decompose53i*")}
1113
static void horizontal_decompose97i(DWTELEM *b, int width){
1114
DWTELEM temp[width];
1115
const int w2= (width+1)>>1;
1117
lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1118
liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1119
lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1120
lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1124
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1127
for(i=0; i<width; i++){
1128
b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1132
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1135
for(i=0; i<width; i++){
1137
b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1139
int r= 3*(b0[i] + b2[i]);
1142
b1[i] += (r+W_CO)>>W_CS;
1147
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1150
for(i=0; i<width; i++){
1152
b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1154
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1159
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162
for(i=0; i<width; i++){
1163
b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1167
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1169
DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1170
DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1171
DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1172
DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1174
for(y=-4; y<height; y+=2){
1175
DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1176
DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1179
if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1180
if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1182
STOP_TIMER("horizontal_decompose97i")
1186
if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1187
if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1188
if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1189
if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1192
STOP_TIMER("vertical_decompose97i")
1202
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1205
for(level=0; level<decomposition_count; level++){
1207
case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1208
case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1209
case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1214
static void horizontal_compose53i(DWTELEM *b, int width){
1215
DWTELEM temp[width];
1216
const int width2= width>>1;
1217
const int w2= (width+1)>>1;
1229
for(x=1; x+1<width2; x+=2){
1233
A2 += (A1 + A3 + 2)>>2;
1237
A1= temp[x+1+width2];
1240
A4 += (A1 + A3 + 2)>>2;
1246
A2 += (A1 + A3 + 2)>>2;
1250
lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1251
lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1253
for(x=0; x<width2; x++){
1255
b[2*x + 1]= temp[x+w2];
1261
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1264
for(i=0; i<width; i++){
1265
b1[i] += (b0[i] + b2[i])>>1;
1269
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1272
for(i=0; i<width; i++){
1273
b1[i] -= (b0[i] + b2[i] + 2)>>2;
1277
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1278
cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1279
cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1283
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1284
cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1285
cs->b1 = buffer + mirror(-1 , height-1)*stride;
1289
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1292
DWTELEM *b0= cs->b0;
1293
DWTELEM *b1= cs->b1;
1294
DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1295
DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1298
if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1299
if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1300
STOP_TIMER("vertical_compose53i*")}
1303
if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1304
if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1305
STOP_TIMER("horizontal_compose53i")}
1312
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1314
DWTELEM *b0= cs->b0;
1315
DWTELEM *b1= cs->b1;
1316
DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1317
DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1320
if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1321
if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1322
STOP_TIMER("vertical_compose53i*")}
1325
if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1326
if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1327
STOP_TIMER("horizontal_compose53i")}
1334
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1336
spatial_compose53i_init(&cs, buffer, height, stride);
1337
while(cs.y <= height)
1338
spatial_compose53i_dy(&cs, buffer, width, height, stride);
1342
void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1343
DWTELEM temp[width];
1344
const int w2= (width+1)>>1;
1346
lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1347
lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1348
liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1349
lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1352
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1355
for(i=0; i<width; i++){
1356
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1360
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1363
for(i=0; i<width; i++){
1365
b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1367
int r= 3*(b0[i] + b2[i]);
1370
b1[i] -= (r+W_CO)>>W_CS;
1375
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1378
for(i=0; i<width; i++){
1380
b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1382
b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1387
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1390
for(i=0; i<width; i++){
1391
b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1395
void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1398
for(i=0; i<width; i++){
1402
b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1404
b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1406
r= 3*(b2[i] + b4[i]);
1409
b3[i] -= (r+W_CO)>>W_CS;
1412
b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1414
b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1416
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1420
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1421
cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1422
cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1423
cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1424
cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1428
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1429
cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1430
cs->b1 = buffer + mirror(-3 , height-1)*stride;
1431
cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1432
cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1436
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1439
DWTELEM *b0= cs->b0;
1440
DWTELEM *b1= cs->b1;
1441
DWTELEM *b2= cs->b2;
1442
DWTELEM *b3= cs->b3;
1443
DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1444
DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1447
if(y>0 && y+4<height){
1448
dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1450
if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1451
if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1452
if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1453
if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1456
STOP_TIMER("vertical_compose97i")}}
1459
if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1460
if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1461
if(width>400 && y+0<(unsigned)height){
1462
STOP_TIMER("horizontal_compose97i")}}
1471
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1473
DWTELEM *b0= cs->b0;
1474
DWTELEM *b1= cs->b1;
1475
DWTELEM *b2= cs->b2;
1476
DWTELEM *b3= cs->b3;
1477
DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1478
DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1481
if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1482
if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1483
if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1484
if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1486
STOP_TIMER("vertical_compose97i")}}
1489
if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1490
if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1491
if(width>400 && b0 <= b2){
1492
STOP_TIMER("horizontal_compose97i")}}
1501
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1503
spatial_compose97i_init(&cs, buffer, height, stride);
1504
while(cs.y <= height)
1505
spatial_compose97i_dy(&cs, buffer, width, height, stride);
1508
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1510
for(level=decomposition_count-1; level>=0; level--){
1512
case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513
case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1514
/* not slicified yet */
1515
case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1516
av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1521
static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1523
for(level=decomposition_count-1; level>=0; level--){
1525
case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1526
case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1527
/* not slicified yet */
1528
case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1533
static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1534
const int support = type==1 ? 3 : 5;
1538
for(level=decomposition_count-1; level>=0; level--){
1539
while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1541
case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1543
case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1551
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1552
const int support = type==1 ? 3 : 5;
1556
for(level=decomposition_count-1; level>=0; level--){
1557
while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1559
case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1561
case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1569
static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1572
for(level=decomposition_count-1; level>=0; level--)
1573
spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1575
dwt_compose_t cs[MAX_DECOMPOSITIONS];
1577
ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1578
for(y=0; y<height; y+=4)
1579
ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1583
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1584
const int w= b->width;
1585
const int h= b->height;
1597
int /*ll=0, */l=0, lt=0, t=0, rt=0;
1598
v= src[x + y*stride];
1601
t= src[x + (y-1)*stride];
1603
lt= src[x - 1 + (y-1)*stride];
1606
rt= src[x + 1 + (y-1)*stride];
1610
l= src[x - 1 + y*stride];
1612
if(orientation==1) ll= src[y + (x-2)*stride];
1613
else ll= src[x - 2 + y*stride];
1619
if(px<b->parent->width && py<b->parent->height)
1620
p= parent[px + py*2*stride];
1622
if(!(/*ll|*/l|lt|t|rt|p)){
1624
runs[run_index++]= run;
1632
max_index= run_index;
1633
runs[run_index++]= run;
1635
run= runs[run_index++];
1637
put_symbol2(&s->c, b->state[30], max_index, 0);
1638
if(run_index <= max_index)
1639
put_symbol2(&s->c, b->state[1], run, 3);
1642
if(s->c.bytestream_end - s->c.bytestream < w*40){
1643
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1648
int /*ll=0, */l=0, lt=0, t=0, rt=0;
1649
v= src[x + y*stride];
1652
t= src[x + (y-1)*stride];
1654
lt= src[x - 1 + (y-1)*stride];
1657
rt= src[x + 1 + (y-1)*stride];
1661
l= src[x - 1 + y*stride];
1663
if(orientation==1) ll= src[y + (x-2)*stride];
1664
else ll= src[x - 2 + y*stride];
1670
if(px<b->parent->width && py<b->parent->height)
1671
p= parent[px + py*2*stride];
1673
if(/*ll|*/l|lt|t|rt|p){
1674
int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1676
put_rac(&s->c, &b->state[0][context], !!v);
1679
run= runs[run_index++];
1681
if(run_index <= max_index)
1682
put_symbol2(&s->c, b->state[1], run, 3);
1690
int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1691
int l2= 2*FFABS(l) + (l<0);
1692
int t2= 2*FFABS(t) + (t<0);
1694
put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1695
put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1703
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1704
// encode_subband_qtree(s, b, src, parent, stride, orientation);
1705
// encode_subband_z0run(s, b, src, parent, stride, orientation);
1706
return encode_subband_c0run(s, b, src, parent, stride, orientation);
1707
// encode_subband_dzr(s, b, src, parent, stride, orientation);
1710
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1711
const int w= b->width;
1712
const int h= b->height;
1717
x_and_coeff *xc= b->x_coeff;
1718
x_and_coeff *prev_xc= NULL;
1719
x_and_coeff *prev2_xc= xc;
1720
x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1721
x_and_coeff *prev_parent_xc= parent_xc;
1723
runs= get_symbol2(&s->c, b->state[30], 0);
1724
if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1729
int lt=0, t=0, rt=0;
1731
if(y && prev_xc->x == 0){
1743
if(prev_xc->x == x + 1)
1749
if(x>>1 > parent_xc->x){
1752
if(x>>1 == parent_xc->x){
1753
p= parent_xc->coeff;
1756
if(/*ll|*/l|lt|t|rt|p){
1757
int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1759
v=get_rac(&s->c, &b->state[0][context]);
1761
v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1762
v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1769
if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1771
v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1772
v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1781
if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1782
else max_run= FFMIN(run, w-x-1);
1784
max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1790
(xc++)->x= w+1; //end marker
1796
while(parent_xc->x != parent->width+1)
1799
prev_parent_xc= parent_xc;
1801
parent_xc= prev_parent_xc;
1806
(xc++)->x= w+1; //end marker
1810
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1811
const int w= b->width;
1813
const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1814
int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1815
int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1820
if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1825
/* If we are on the second or later slice, restore our index. */
1827
new_index = save_state[0];
1830
for(y=start_y; y<h; y++){
1833
DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1834
memset(line, 0, b->width*sizeof(DWTELEM));
1835
v = b->x_coeff[new_index].coeff;
1836
x = b->x_coeff[new_index++].x;
1839
register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1840
register int u= -(v&1);
1841
line[x] = (t^u) - u;
1843
v = b->x_coeff[new_index].coeff;
1844
x = b->x_coeff[new_index++].x;
1847
if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1848
STOP_TIMER("decode_subband")
1851
/* Save our variables for the next slice. */
1852
save_state[0] = new_index;
1857
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1858
int plane_index, level, orientation;
1860
for(plane_index=0; plane_index<3; plane_index++){
1861
for(level=0; level<s->spatial_decomposition_count; level++){
1862
for(orientation=level ? 1:0; orientation<4; orientation++){
1863
memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1867
memset(s->header_state, MID_STATE, sizeof(s->header_state));
1868
memset(s->block_state, MID_STATE, sizeof(s->block_state));
1871
static int alloc_blocks(SnowContext *s){
1872
int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1873
int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1878
s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1882
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1883
uint8_t *bytestream= d->bytestream;
1884
uint8_t *bytestream_start= d->bytestream_start;
1886
d->bytestream= bytestream;
1887
d->bytestream_start= bytestream_start;
1890
//near copy & paste from dsputil, FIXME
1891
static int pix_sum(uint8_t * pix, int line_size, int w)
1896
for (i = 0; i < w; i++) {
1897
for (j = 0; j < w; j++) {
1901
pix += line_size - w;
1906
//near copy & paste from dsputil, FIXME
1907
static int pix_norm1(uint8_t * pix, int line_size, int w)
1910
uint32_t *sq = ff_squareTbl + 256;
1913
for (i = 0; i < w; i++) {
1914
for (j = 0; j < w; j ++) {
1918
pix += line_size - w;
1923
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1924
const int w= s->b_width << s->block_max_depth;
1925
const int rem_depth= s->block_max_depth - level;
1926
const int index= (x + y*w) << rem_depth;
1927
const int block_w= 1<<rem_depth;
1940
for(j=0; j<block_w; j++){
1941
for(i=0; i<block_w; i++){
1942
s->block[index + i + j*w]= block;
1947
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1948
const int offset[3]= {
1950
((y*c->uvstride + x)>>1),
1951
((y*c->uvstride + x)>>1),
1955
c->src[0][i]= src [i];
1956
c->ref[0][i]= ref [i] + offset[i];
1961
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1962
const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1963
if(s->ref_frames == 1){
1964
*mx = mid_pred(left->mx, top->mx, tr->mx);
1965
*my = mid_pred(left->my, top->my, tr->my);
1967
const int *scale = scale_mv_ref[ref];
1968
*mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1969
(top ->mx * scale[top ->ref] + 128) >>8,
1970
(tr ->mx * scale[tr ->ref] + 128) >>8);
1971
*my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1972
(top ->my * scale[top ->ref] + 128) >>8,
1973
(tr ->my * scale[tr ->ref] + 128) >>8);
1980
#define P_TOPRIGHT P[3]
1981
#define P_MEDIAN P[4]
1983
#define FLAG_QPEL 1 //must be 1
1985
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1986
uint8_t p_buffer[1024];
1987
uint8_t i_buffer[1024];
1988
uint8_t p_state[sizeof(s->block_state)];
1989
uint8_t i_state[sizeof(s->block_state)];
1991
uint8_t *pbbak= s->c.bytestream;
1992
uint8_t *pbbak_start= s->c.bytestream_start;
1993
int score, score2, iscore, i_len, p_len, block_s, sum;
1994
const int w= s->b_width << s->block_max_depth;
1995
const int h= s->b_height << s->block_max_depth;
1996
const int rem_depth= s->block_max_depth - level;
1997
const int index= (x + y*w) << rem_depth;
1998
const int block_w= 1<<(LOG2_MB_SIZE - level);
1999
int trx= (x+1)<<rem_depth;
2000
int try= (y+1)<<rem_depth;
2001
const BlockNode *left = x ? &s->block[index-1] : &null_block;
2002
const BlockNode *top = y ? &s->block[index-w] : &null_block;
2003
const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2004
const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2005
const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2006
const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2007
int pl = left->color[0];
2008
int pcb= left->color[1];
2009
int pcr= left->color[2];
2013
const int stride= s->current_picture.linesize[0];
2014
const int uvstride= s->current_picture.linesize[1];
2015
uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2016
s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2017
s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2019
int16_t last_mv[3][2];
2020
int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2021
const int shift= 1+qpel;
2022
MotionEstContext *c= &s->m.me;
2023
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2024
int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2025
int my_context= av_log2(2*FFABS(left->my - top->my));
2026
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027
int ref, best_ref, ref_score, ref_mx, ref_my;
2029
assert(sizeof(s->block_state) >= 256);
2031
set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2035
// clip predictors / edge ?
2037
P_LEFT[0]= left->mx;
2038
P_LEFT[1]= left->my;
2041
P_TOPRIGHT[0]= tr->mx;
2042
P_TOPRIGHT[1]= tr->my;
2044
last_mv[0][0]= s->block[index].mx;
2045
last_mv[0][1]= s->block[index].my;
2046
last_mv[1][0]= right->mx;
2047
last_mv[1][1]= right->my;
2048
last_mv[2][0]= bottom->mx;
2049
last_mv[2][1]= bottom->my;
2056
assert(c-> stride == stride);
2057
assert(c->uvstride == uvstride);
2059
c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2060
c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2061
c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2062
c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2064
c->xmin = - x*block_w - 16+2;
2065
c->ymin = - y*block_w - 16+2;
2066
c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2067
c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2069
if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2070
if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2071
if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2072
if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2073
if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2074
if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2075
if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2077
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2078
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2081
c->pred_x= P_LEFT[0];
2082
c->pred_y= P_LEFT[1];
2084
c->pred_x = P_MEDIAN[0];
2085
c->pred_y = P_MEDIAN[1];
2090
for(ref=0; ref<s->ref_frames; ref++){
2091
init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2093
ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2094
(1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2096
assert(ref_mx >= c->xmin);
2097
assert(ref_mx <= c->xmax);
2098
assert(ref_my >= c->ymin);
2099
assert(ref_my <= c->ymax);
2101
ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2102
ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2103
ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2104
if(s->ref_mvs[ref]){
2105
s->ref_mvs[ref][index][0]= ref_mx;
2106
s->ref_mvs[ref][index][1]= ref_my;
2107
s->ref_scores[ref][index]= ref_score;
2109
if(score > ref_score){
2116
//FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2120
pc.bytestream_start=
2121
pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2122
memcpy(p_state, s->block_state, sizeof(s->block_state));
2124
if(level!=s->block_max_depth)
2125
put_rac(&pc, &p_state[4 + s_context], 1);
2126
put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2127
if(s->ref_frames > 1)
2128
put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2129
pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2130
put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2131
put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2132
p_len= pc.bytestream - pc.bytestream_start;
2133
score += (s->lambda2*(p_len*8
2134
+ (pc.outstanding_count - s->c.outstanding_count)*8
2135
+ (-av_log2(pc.range) + av_log2(s->c.range))
2136
))>>FF_LAMBDA_SHIFT;
2138
block_s= block_w*block_w;
2139
sum = pix_sum(current_data[0], stride, block_w);
2140
l= (sum + block_s/2)/block_s;
2141
iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2143
block_s= block_w*block_w>>2;
2144
sum = pix_sum(current_data[1], uvstride, block_w>>1);
2145
cb= (sum + block_s/2)/block_s;
2146
// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2147
sum = pix_sum(current_data[2], uvstride, block_w>>1);
2148
cr= (sum + block_s/2)/block_s;
2149
// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2152
ic.bytestream_start=
2153
ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2154
memcpy(i_state, s->block_state, sizeof(s->block_state));
2155
if(level!=s->block_max_depth)
2156
put_rac(&ic, &i_state[4 + s_context], 1);
2157
put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2158
put_symbol(&ic, &i_state[32], l-pl , 1);
2159
put_symbol(&ic, &i_state[64], cb-pcb, 1);
2160
put_symbol(&ic, &i_state[96], cr-pcr, 1);
2161
i_len= ic.bytestream - ic.bytestream_start;
2162
iscore += (s->lambda2*(i_len*8
2163
+ (ic.outstanding_count - s->c.outstanding_count)*8
2164
+ (-av_log2(ic.range) + av_log2(s->c.range))
2165
))>>FF_LAMBDA_SHIFT;
2167
// assert(score==256*256*256*64-1);
2168
assert(iscore < 255*255*256 + s->lambda2*10);
2169
assert(iscore >= 0);
2170
assert(l>=0 && l<=255);
2171
assert(pl>=0 && pl<=255);
2174
int varc= iscore >> 8;
2175
int vard= score >> 8;
2176
if (vard <= 64 || vard < varc)
2177
c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2179
c->scene_change_score+= s->m.qscale;
2182
if(level!=s->block_max_depth){
2183
put_rac(&s->c, &s->block_state[4 + s_context], 0);
2184
score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2185
score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2186
score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2187
score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2188
score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2190
if(score2 < score && score2 < iscore)
2195
pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2196
memcpy(pbbak, i_buffer, i_len);
2198
s->c.bytestream_start= pbbak_start;
2199
s->c.bytestream= pbbak + i_len;
2200
set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2201
memcpy(s->block_state, i_state, sizeof(s->block_state));
2204
memcpy(pbbak, p_buffer, p_len);
2206
s->c.bytestream_start= pbbak_start;
2207
s->c.bytestream= pbbak + p_len;
2208
set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2209
memcpy(s->block_state, p_state, sizeof(s->block_state));
2214
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2215
if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2216
return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2218
return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2222
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2223
const int w= s->b_width << s->block_max_depth;
2224
const int rem_depth= s->block_max_depth - level;
2225
const int index= (x + y*w) << rem_depth;
2226
int trx= (x+1)<<rem_depth;
2227
BlockNode *b= &s->block[index];
2228
const BlockNode *left = x ? &s->block[index-1] : &null_block;
2229
const BlockNode *top = y ? &s->block[index-w] : &null_block;
2230
const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2231
const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2232
int pl = left->color[0];
2233
int pcb= left->color[1];
2234
int pcr= left->color[2];
2236
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2237
int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2238
int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2239
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2242
set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2246
if(level!=s->block_max_depth){
2247
if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2248
put_rac(&s->c, &s->block_state[4 + s_context], 1);
2250
put_rac(&s->c, &s->block_state[4 + s_context], 0);
2251
encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2252
encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2253
encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2254
encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2258
if(b->type & BLOCK_INTRA){
2259
pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2260
put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2261
put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2262
put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2263
put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2264
set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2266
pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2267
put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2268
if(s->ref_frames > 1)
2269
put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2270
put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2271
put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2272
set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2276
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2277
const int w= s->b_width << s->block_max_depth;
2278
const int rem_depth= s->block_max_depth - level;
2279
const int index= (x + y*w) << rem_depth;
2280
int trx= (x+1)<<rem_depth;
2281
const BlockNode *left = x ? &s->block[index-1] : &null_block;
2282
const BlockNode *top = y ? &s->block[index-w] : &null_block;
2283
const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2284
const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2285
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2288
set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2292
if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2294
int l = left->color[0];
2295
int cb= left->color[1];
2296
int cr= left->color[2];
2298
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2299
int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2300
int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2302
type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2305
pred_mv(s, &mx, &my, 0, left, top, tr);
2306
l += get_symbol(&s->c, &s->block_state[32], 1);
2307
cb+= get_symbol(&s->c, &s->block_state[64], 1);
2308
cr+= get_symbol(&s->c, &s->block_state[96], 1);
2310
if(s->ref_frames > 1)
2311
ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2312
pred_mv(s, &mx, &my, ref, left, top, tr);
2313
mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2314
my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2316
set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2318
decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2319
decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2320
decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2321
decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2325
static void encode_blocks(SnowContext *s, int search){
2330
if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2334
if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2335
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2339
if(s->avctx->me_method == ME_ITER || !search)
2340
encode_q_branch2(s, 0, x, y);
2342
encode_q_branch (s, 0, x, y);
2347
static void decode_blocks(SnowContext *s){
2354
decode_q_branch(s, 0, x, y);
2359
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2362
for(y=0; y < b_h+5; y++){
2363
for(x=0; x < b_w; x++){
2370
// int am= 9*(a1+a2) - (a0+a3);
2371
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2372
// int am= 18*(a2+a3) - 2*(a1+a4);
2373
// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2374
// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2376
// if(b_w==16) am= 8*(a1+a2);
2378
if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2379
else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2381
/* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2382
if(am&(~255)) am= ~(am>>31);
2386
/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2387
else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2388
else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2389
else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2394
tmp -= (b_h+5)*stride;
2396
for(y=0; y < b_h; y++){
2397
for(x=0; x < b_w; x++){
2398
int a0= tmp[x + 0*stride];
2399
int a1= tmp[x + 1*stride];
2400
int a2= tmp[x + 2*stride];
2401
int a3= tmp[x + 3*stride];
2402
int a4= tmp[x + 4*stride];
2403
int a5= tmp[x + 5*stride];
2404
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2405
// int am= 18*(a2+a3) - 2*(a1+a4);
2406
/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2407
int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2409
// if(b_w==16) am= 8*(a1+a2);
2411
if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2412
else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2414
if(am&(~255)) am= ~(am>>31);
2417
/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2418
else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2419
else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2420
else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2425
STOP_TIMER("mc_block")
2428
#define mca(dx,dy,b_w)\
2429
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2430
uint8_t tmp[stride*(b_w+5)];\
2432
mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2444
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2445
if(block->type & BLOCK_INTRA){
2447
const int color = block->color[plane_index];
2448
const int color4= color*0x01010101;
2450
for(y=0; y < b_h; y++){
2451
*(uint32_t*)&dst[0 + y*stride]= color4;
2452
*(uint32_t*)&dst[4 + y*stride]= color4;
2453
*(uint32_t*)&dst[8 + y*stride]= color4;
2454
*(uint32_t*)&dst[12+ y*stride]= color4;
2455
*(uint32_t*)&dst[16+ y*stride]= color4;
2456
*(uint32_t*)&dst[20+ y*stride]= color4;
2457
*(uint32_t*)&dst[24+ y*stride]= color4;
2458
*(uint32_t*)&dst[28+ y*stride]= color4;
2461
for(y=0; y < b_h; y++){
2462
*(uint32_t*)&dst[0 + y*stride]= color4;
2463
*(uint32_t*)&dst[4 + y*stride]= color4;
2464
*(uint32_t*)&dst[8 + y*stride]= color4;
2465
*(uint32_t*)&dst[12+ y*stride]= color4;
2468
for(y=0; y < b_h; y++){
2469
*(uint32_t*)&dst[0 + y*stride]= color4;
2470
*(uint32_t*)&dst[4 + y*stride]= color4;
2473
for(y=0; y < b_h; y++){
2474
*(uint32_t*)&dst[0 + y*stride]= color4;
2477
for(y=0; y < b_h; y++){
2478
for(x=0; x < b_w; x++){
2479
dst[x + y*stride]= color;
2484
uint8_t *src= s->last_picture[block->ref].data[plane_index];
2485
const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2486
int mx= block->mx*scale;
2487
int my= block->my*scale;
2488
const int dx= mx&15;
2489
const int dy= my&15;
2490
const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2493
src += sx + sy*stride;
2494
if( (unsigned)sx >= w - b_w - 4
2495
|| (unsigned)sy >= h - b_h - 4){
2496
ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2499
// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2500
// assert(!(b_w&(b_w-1)));
2501
assert(b_w>1 && b_h>1);
2502
assert(tab_index>=0 && tab_index<4 || b_w==32);
2503
if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2504
mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2507
for(y=0; y<b_h; y+=16){
2508
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2509
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2512
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2513
else if(b_w==2*b_h){
2514
s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2515
s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2518
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2519
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2524
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2525
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2528
for(y=0; y<b_h; y++){
2529
//FIXME ugly missue of obmc_stride
2530
const uint8_t *obmc1= obmc + y*obmc_stride;
2531
const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2532
const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2533
const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2534
dst = slice_buffer_get_line(sb, src_y + y);
2535
for(x=0; x<b_w; x++){
2536
int v= obmc1[x] * block[3][x + y*src_stride]
2537
+obmc2[x] * block[2][x + y*src_stride]
2538
+obmc3[x] * block[1][x + y*src_stride]
2539
+obmc4[x] * block[0][x + y*src_stride];
2541
v <<= 8 - LOG2_OBMC_MAX;
2543
v += 1<<(7 - FRAC_BITS);
2544
v >>= 8 - FRAC_BITS;
2547
v += dst[x + src_x];
2548
v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2549
if(v&(~255)) v= ~(v>>31);
2550
dst8[x + y*src_stride] = v;
2552
dst[x + src_x] -= v;
2558
//FIXME name clenup (b_w, block_w, b_width stuff)
2559
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2560
const int b_width = s->b_width << s->block_max_depth;
2561
const int b_height= s->b_height << s->block_max_depth;
2562
const int b_stride= b_width;
2563
BlockNode *lt= &s->block[b_x + b_y*b_stride];
2564
BlockNode *rt= lt+1;
2565
BlockNode *lb= lt+b_stride;
2566
BlockNode *rb= lb+1;
2568
int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2569
uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2576
}else if(b_x + 1 >= b_width){
2583
}else if(b_y + 1 >= b_height){
2588
if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2591
if(!sliced && !offset_dst)
2594
}else if(src_x + b_w > w){
2598
obmc -= src_y*obmc_stride;
2600
if(!sliced && !offset_dst)
2601
dst -= src_y*dst_stride;
2603
}else if(src_y + b_h> h){
2607
if(b_w<=0 || b_h<=0) return;
2609
assert(src_stride > 2*MB_SIZE + 5);
2610
if(!sliced && offset_dst)
2611
dst += src_x + src_y*dst_stride;
2612
dst8+= src_x + src_y*src_stride;
2613
// src += src_x + src_y*src_stride;
2615
ptmp= tmp + 3*tmp_step;
2618
pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2620
if(same_block(lt, rt)){
2625
pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2628
if(same_block(lt, lb)){
2630
}else if(same_block(rt, lb)){
2635
pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2638
if(same_block(lt, rb) ){
2640
}else if(same_block(rt, rb)){
2642
}else if(same_block(lb, rb)){
2646
pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2649
for(y=0; y<b_h; y++){
2650
for(x=0; x<b_w; x++){
2651
int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2652
if(add) dst[x + y*dst_stride] += v;
2653
else dst[x + y*dst_stride] -= v;
2656
for(y=0; y<b_h; y++){
2657
uint8_t *obmc2= obmc + (obmc_stride>>1);
2658
for(x=0; x<b_w; x++){
2659
int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2660
if(add) dst[x + y*dst_stride] += v;
2661
else dst[x + y*dst_stride] -= v;
2664
for(y=0; y<b_h; y++){
2665
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2666
for(x=0; x<b_w; x++){
2667
int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2668
if(add) dst[x + y*dst_stride] += v;
2669
else dst[x + y*dst_stride] -= v;
2672
for(y=0; y<b_h; y++){
2673
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2674
uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2675
for(x=0; x<b_w; x++){
2676
int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2677
if(add) dst[x + y*dst_stride] += v;
2678
else dst[x + y*dst_stride] -= v;
2685
s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2686
STOP_TIMER("inner_add_yblock")
2688
for(y=0; y<b_h; y++){
2689
//FIXME ugly missue of obmc_stride
2690
const uint8_t *obmc1= obmc + y*obmc_stride;
2691
const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2692
const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2693
const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2694
for(x=0; x<b_w; x++){
2695
int v= obmc1[x] * block[3][x + y*src_stride]
2696
+obmc2[x] * block[2][x + y*src_stride]
2697
+obmc3[x] * block[1][x + y*src_stride]
2698
+obmc4[x] * block[0][x + y*src_stride];
2700
v <<= 8 - LOG2_OBMC_MAX;
2702
v += 1<<(7 - FRAC_BITS);
2703
v >>= 8 - FRAC_BITS;
2706
v += dst[x + y*dst_stride];
2707
v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2708
if(v&(~255)) v= ~(v>>31);
2709
dst8[x + y*src_stride] = v;
2711
dst[x + y*dst_stride] -= v;
2718
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2719
Plane *p= &s->plane[plane_index];
2720
const int mb_w= s->b_width << s->block_max_depth;
2721
const int mb_h= s->b_height << s->block_max_depth;
2723
int block_size = MB_SIZE >> s->block_max_depth;
2724
int block_w = plane_index ? block_size/2 : block_size;
2725
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2726
int obmc_stride= plane_index ? block_size : 2*block_size;
2727
int ref_stride= s->current_picture.linesize[plane_index];
2728
uint8_t *dst8= s->current_picture.data[plane_index];
2733
if(s->keyframe || (s->avctx->debug&512)){
2738
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2740
// DWTELEM * line = slice_buffer_get_line(sb, y);
2741
DWTELEM * line = sb->line[y];
2744
// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2745
int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2747
if(v&(~255)) v= ~(v>>31);
2748
dst8[x + y*ref_stride]= v;
2752
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2754
// DWTELEM * line = slice_buffer_get_line(sb, y);
2755
DWTELEM * line = sb->line[y];
2758
line[x] -= 128 << FRAC_BITS;
2759
// buf[x + y*w]-= 128<<FRAC_BITS;
2767
for(mb_x=0; mb_x<=mb_w; mb_x++){
2770
add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2771
block_w*mb_x - block_w/2,
2772
block_w*mb_y - block_w/2,
2775
w, ref_stride, obmc_stride,
2777
add, 0, plane_index);
2779
STOP_TIMER("add_yblock")
2782
STOP_TIMER("predict_slice")
2785
static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2786
Plane *p= &s->plane[plane_index];
2787
const int mb_w= s->b_width << s->block_max_depth;
2788
const int mb_h= s->b_height << s->block_max_depth;
2790
int block_size = MB_SIZE >> s->block_max_depth;
2791
int block_w = plane_index ? block_size/2 : block_size;
2792
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2793
const int obmc_stride= plane_index ? block_size : 2*block_size;
2794
int ref_stride= s->current_picture.linesize[plane_index];
2795
uint8_t *dst8= s->current_picture.data[plane_index];
2800
if(s->keyframe || (s->avctx->debug&512)){
2805
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2807
int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2809
if(v&(~255)) v= ~(v>>31);
2810
dst8[x + y*ref_stride]= v;
2814
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2816
buf[x + y*w]-= 128<<FRAC_BITS;
2824
for(mb_x=0; mb_x<=mb_w; mb_x++){
2827
add_yblock(s, 0, NULL, buf, dst8, obmc,
2828
block_w*mb_x - block_w/2,
2829
block_w*mb_y - block_w/2,
2832
w, ref_stride, obmc_stride,
2834
add, 1, plane_index);
2836
STOP_TIMER("add_yblock")
2839
STOP_TIMER("predict_slice")
2842
static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2843
const int mb_h= s->b_height << s->block_max_depth;
2845
for(mb_y=0; mb_y<=mb_h; mb_y++)
2846
predict_slice(s, buf, plane_index, add, mb_y);
2849
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2851
Plane *p= &s->plane[plane_index];
2852
const int block_size = MB_SIZE >> s->block_max_depth;
2853
const int block_w = plane_index ? block_size/2 : block_size;
2854
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2855
const int obmc_stride= plane_index ? block_size : 2*block_size;
2856
const int ref_stride= s->current_picture.linesize[plane_index];
2857
uint8_t *src= s-> input_picture.data[plane_index];
2858
DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2859
const int b_stride = s->b_width << s->block_max_depth;
2860
const int w= p->width;
2861
const int h= p->height;
2862
int index= mb_x + mb_y*b_stride;
2863
BlockNode *b= &s->block[index];
2864
BlockNode backup= *b;
2868
b->type|= BLOCK_INTRA;
2869
b->color[plane_index]= 0;
2870
memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2873
int mb_x2= mb_x + (i &1) - 1;
2874
int mb_y2= mb_y + (i>>1) - 1;
2875
int x= block_w*mb_x2 + block_w/2;
2876
int y= block_w*mb_y2 + block_w/2;
2878
add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2879
x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2881
for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2882
for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2883
int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2884
int obmc_v= obmc[index];
2886
if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2887
if(x<0) obmc_v += obmc[index + block_w];
2888
if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2889
if(x+block_w>w) obmc_v += obmc[index - block_w];
2890
//FIXME precalc this or simplify it somehow else
2892
d = -dst[index] + (1<<(FRAC_BITS-1));
2894
ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2895
aa += obmc_v * obmc_v; //FIXME precalclate this
2901
return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2904
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2905
const int b_stride = s->b_width << s->block_max_depth;
2906
const int b_height = s->b_height<< s->block_max_depth;
2907
int index= x + y*b_stride;
2908
const BlockNode *b = &s->block[index];
2909
const BlockNode *left = x ? &s->block[index-1] : &null_block;
2910
const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2911
const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2912
const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2914
// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2915
// int my_context= av_log2(2*FFABS(left->my - top->my));
2917
if(x<0 || x>=b_stride || y>=b_height)
2924
00001XXXX 15-30 8-15
2926
//FIXME try accurate rate
2927
//FIXME intra and inter predictors if surrounding blocks arent the same type
2928
if(b->type & BLOCK_INTRA){
2929
return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2930
+ av_log2(2*FFABS(left->color[1] - b->color[1]))
2931
+ av_log2(2*FFABS(left->color[2] - b->color[2])));
2933
pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2936
return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2937
+ av_log2(2*FFABS(dmy))
2938
+ av_log2(2*b->ref));
2942
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2943
Plane *p= &s->plane[plane_index];
2944
const int block_size = MB_SIZE >> s->block_max_depth;
2945
const int block_w = plane_index ? block_size/2 : block_size;
2946
const int obmc_stride= plane_index ? block_size : 2*block_size;
2947
const int ref_stride= s->current_picture.linesize[plane_index];
2948
uint8_t *dst= s->current_picture.data[plane_index];
2949
uint8_t *src= s-> input_picture.data[plane_index];
2950
DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2951
uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2952
uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2953
const int b_stride = s->b_width << s->block_max_depth;
2954
const int b_height = s->b_height<< s->block_max_depth;
2955
const int w= p->width;
2956
const int h= p->height;
2959
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2960
int sx= block_w*mb_x - block_w/2;
2961
int sy= block_w*mb_y - block_w/2;
2962
int x0= FFMAX(0,-sx);
2963
int y0= FFMAX(0,-sy);
2964
int x1= FFMIN(block_w*2, w-sx);
2965
int y1= FFMIN(block_w*2, h-sy);
2968
pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2970
for(y=y0; y<y1; y++){
2971
const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2972
const DWTELEM *pred1 = pred + y*obmc_stride;
2973
uint8_t *cur1 = cur + y*ref_stride;
2974
uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2975
for(x=x0; x<x1; x++){
2976
int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2977
v = (v + pred1[x]) >> FRAC_BITS;
2978
if(v&(~255)) v= ~(v>>31);
2983
/* copy the regions where obmc[] = (uint8_t)256 */
2984
if(LOG2_OBMC_MAX == 8
2985
&& (mb_x == 0 || mb_x == b_stride-1)
2986
&& (mb_y == 0 || mb_y == b_height-1)){
2995
for(y=y0; y<y1; y++)
2996
memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3000
/* FIXME rearrange dsputil to fit 32x32 cmp functions */
3001
/* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3002
/* FIXME cmps overlap but don't cover the wavelet's whole support,
3003
* so improving the score of one block is not strictly guaranteed to
3004
* improve the score of the whole frame, so iterative motion est
3005
* doesn't always converge. */
3006
if(s->avctx->me_cmp == FF_CMP_W97)
3007
distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3008
else if(s->avctx->me_cmp == FF_CMP_W53)
3009
distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3013
int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3014
distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3019
distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3028
rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3030
if(mb_x == b_stride-2)
3031
rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3033
return distortion + rate*penalty_factor;
3036
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3038
Plane *p= &s->plane[plane_index];
3039
const int block_size = MB_SIZE >> s->block_max_depth;
3040
const int block_w = plane_index ? block_size/2 : block_size;
3041
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3042
const int obmc_stride= plane_index ? block_size : 2*block_size;
3043
const int ref_stride= s->current_picture.linesize[plane_index];
3044
uint8_t *dst= s->current_picture.data[plane_index];
3045
uint8_t *src= s-> input_picture.data[plane_index];
3046
static const DWTELEM zero_dst[4096]; //FIXME
3047
const int b_stride = s->b_width << s->block_max_depth;
3048
const int w= p->width;
3049
const int h= p->height;
3052
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3055
int mb_x2= mb_x + (i%3) - 1;
3056
int mb_y2= mb_y + (i/3) - 1;
3057
int x= block_w*mb_x2 + block_w/2;
3058
int y= block_w*mb_y2 + block_w/2;
3060
add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3061
x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3063
//FIXME find a cleaner/simpler way to skip the outside stuff
3064
for(y2= y; y2<0; y2++)
3065
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3066
for(y2= h; y2<y+block_w; y2++)
3067
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3069
for(y2= y; y2<y+block_w; y2++)
3070
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3073
for(y2= y; y2<y+block_w; y2++)
3074
memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3077
assert(block_w== 8 || block_w==16);
3078
distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3082
BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3083
int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3091
rate = get_block_bits(s, mb_x, mb_y, 2);
3092
for(i=merged?4:0; i<9; i++){
3093
static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3094
rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3097
return distortion + rate*penalty_factor;
3100
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3101
const int b_stride= s->b_width << s->block_max_depth;
3102
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3103
BlockNode backup= *block;
3104
int rd, index, value;
3106
assert(mb_x>=0 && mb_y>=0);
3107
assert(mb_x<b_stride);
3110
block->color[0] = p[0];
3111
block->color[1] = p[1];
3112
block->color[2] = p[2];
3113
block->type |= BLOCK_INTRA;
3115
index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3116
value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3117
if(s->me_cache[index] == value)
3119
s->me_cache[index]= value;
3123
block->type &= ~BLOCK_INTRA;
3126
rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3138
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3139
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3140
int p[2] = {p0, p1};
3141
return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3144
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3145
const int b_stride= s->b_width << s->block_max_depth;
3146
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3147
BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3148
int rd, index, value;
3150
assert(mb_x>=0 && mb_y>=0);
3151
assert(mb_x<b_stride);
3152
assert(((mb_x|mb_y)&1) == 0);
3154
index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3155
value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3156
if(s->me_cache[index] == value)
3158
s->me_cache[index]= value;
3163
block->type &= ~BLOCK_INTRA;
3164
block[1]= block[b_stride]= block[b_stride+1]= *block;
3166
rd= get_4block_rd(s, mb_x, mb_y, 0);
3173
block[0]= backup[0];
3174
block[1]= backup[1];
3175
block[b_stride]= backup[2];
3176
block[b_stride+1]= backup[3];
3181
static void iterative_me(SnowContext *s){
3182
int pass, mb_x, mb_y;
3183
const int b_width = s->b_width << s->block_max_depth;
3184
const int b_height= s->b_height << s->block_max_depth;
3185
const int b_stride= b_width;
3189
RangeCoder r = s->c;
3190
uint8_t state[sizeof(s->block_state)];
3191
memcpy(state, s->block_state, sizeof(s->block_state));
3192
for(mb_y= 0; mb_y<s->b_height; mb_y++)
3193
for(mb_x= 0; mb_x<s->b_width; mb_x++)
3194
encode_q_branch(s, 0, mb_x, mb_y);
3196
memcpy(s->block_state, state, sizeof(s->block_state));
3199
for(pass=0; pass<25; pass++){
3202
for(mb_y= 0; mb_y<b_height; mb_y++){
3203
for(mb_x= 0; mb_x<b_width; mb_x++){
3204
int dia_change, i, j, ref;
3205
int best_rd= INT_MAX, ref_rd;
3206
BlockNode backup, ref_b;
3207
const int index= mb_x + mb_y * b_stride;
3208
BlockNode *block= &s->block[index];
3209
BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3210
BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3211
BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3212
BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3213
BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3214
BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3215
BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3216
BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3217
const int b_w= (MB_SIZE >> s->block_max_depth);
3218
uint8_t obmc_edged[b_w*2][b_w*2];
3220
if(pass && (block->type & BLOCK_OPT))
3222
block->type |= BLOCK_OPT;
3226
if(!s->me_cache_generation)
3227
memset(s->me_cache, 0, sizeof(s->me_cache));
3228
s->me_cache_generation += 1<<22;
3233
memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3235
for(y=0; y<b_w*2; y++)
3236
memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3237
if(mb_x==b_stride-1)
3238
for(y=0; y<b_w*2; y++)
3239
memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3241
for(x=0; x<b_w*2; x++)
3242
obmc_edged[0][x] += obmc_edged[b_w-1][x];
3243
for(y=1; y<b_w; y++)
3244
memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3246
if(mb_y==b_height-1){
3247
for(x=0; x<b_w*2; x++)
3248
obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3249
for(y=b_w; y<b_w*2-1; y++)
3250
memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3254
//skip stuff outside the picture
3255
if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3257
uint8_t *src= s-> input_picture.data[0];
3258
uint8_t *dst= s->current_picture.data[0];
3259
const int stride= s->current_picture.linesize[0];
3260
const int block_w= MB_SIZE >> s->block_max_depth;
3261
const int sx= block_w*mb_x - block_w/2;
3262
const int sy= block_w*mb_y - block_w/2;
3263
const int w= s->plane[0].width;
3264
const int h= s->plane[0].height;
3268
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3269
for(y=h; y<sy+block_w*2; y++)
3270
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3272
for(y=sy; y<sy+block_w*2; y++)
3273
memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3275
if(sx+block_w*2 > w){
3276
for(y=sy; y<sy+block_w*2; y++)
3277
memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3281
// intra(black) = neighbors' contribution to the current block
3283
color[i]= get_dc(s, mb_x, mb_y, i);
3285
// get previous score (cant be cached due to OBMC)
3286
if(pass > 0 && (block->type&BLOCK_INTRA)){
3287
int color0[3]= {block->color[0], block->color[1], block->color[2]};
3288
check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3290
check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3294
for(ref=0; ref < s->ref_frames; ref++){
3295
int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3296
if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3301
check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3302
check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3304
check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3306
check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3308
check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3310
check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3313
//FIXME avoid subpel interpol / round to nearest integer
3316
for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3318
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3319
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3320
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3321
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3327
static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3330
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3332
//FIXME or try the standard 2 pass qpel or similar
3334
mvr[0][0]= block->mx;
3335
mvr[0][1]= block->my;
3336
if(ref_rd > best_rd){
3344
check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3345
//FIXME RD style color selection
3347
if(!same_block(block, &backup)){
3348
if(tb ) tb ->type &= ~BLOCK_OPT;
3349
if(lb ) lb ->type &= ~BLOCK_OPT;
3350
if(rb ) rb ->type &= ~BLOCK_OPT;
3351
if(bb ) bb ->type &= ~BLOCK_OPT;
3352
if(tlb) tlb->type &= ~BLOCK_OPT;
3353
if(trb) trb->type &= ~BLOCK_OPT;
3354
if(blb) blb->type &= ~BLOCK_OPT;
3355
if(brb) brb->type &= ~BLOCK_OPT;
3360
av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3365
if(s->block_max_depth == 1){
3367
for(mb_y= 0; mb_y<b_height; mb_y+=2){
3368
for(mb_x= 0; mb_x<b_width; mb_x+=2){
3370
int best_rd, init_rd;
3371
const int index= mb_x + mb_y * b_stride;
3374
b[0]= &s->block[index];
3376
b[2]= b[0]+b_stride;
3378
if(same_block(b[0], b[1]) &&
3379
same_block(b[0], b[2]) &&
3380
same_block(b[0], b[3]))
3383
if(!s->me_cache_generation)
3384
memset(s->me_cache, 0, sizeof(s->me_cache));
3385
s->me_cache_generation += 1<<22;
3387
init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3389
//FIXME more multiref search?
3390
check_4block_inter(s, mb_x, mb_y,
3391
(b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3392
(b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3395
if(!(b[i]->type&BLOCK_INTRA))
3396
check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3398
if(init_rd != best_rd)
3402
av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3406
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3407
const int level= b->level;
3408
const int w= b->width;
3409
const int h= b->height;
3410
const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3411
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3412
int x,y, thres1, thres2;
3415
if(s->qlog == LOSSLESS_QLOG) return;
3417
bias= bias ? 0 : (3*qmul)>>3;
3418
thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3424
int i= src[x + y*stride];
3426
if((unsigned)(i+thres1) > thres2){
3429
i/= qmul; //FIXME optimize
3430
src[x + y*stride]= i;
3434
i/= qmul; //FIXME optimize
3435
src[x + y*stride]= -i;
3438
src[x + y*stride]= 0;
3444
int i= src[x + y*stride];
3446
if((unsigned)(i+thres1) > thres2){
3449
i= (i + bias) / qmul; //FIXME optimize
3450
src[x + y*stride]= i;
3454
i= (i + bias) / qmul; //FIXME optimize
3455
src[x + y*stride]= -i;
3458
src[x + y*stride]= 0;
3462
if(level+1 == s->spatial_decomposition_count){
3463
// STOP_TIMER("quantize")
3467
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3468
const int w= b->width;
3469
const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3470
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3471
const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3475
if(s->qlog == LOSSLESS_QLOG) return;
3477
for(y=start_y; y<end_y; y++){
3478
// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3479
DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3483
line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3485
line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3489
if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3490
STOP_TIMER("dquant")
3494
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3495
const int w= b->width;
3496
const int h= b->height;
3497
const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3498
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3499
const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3503
if(s->qlog == LOSSLESS_QLOG) return;
3507
int i= src[x + y*stride];
3509
src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3511
src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3515
if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3516
STOP_TIMER("dquant")
3520
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3521
const int w= b->width;
3522
const int h= b->height;
3525
for(y=h-1; y>=0; y--){
3526
for(x=w-1; x>=0; x--){
3527
int i= x + y*stride;
3531
if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3532
else src[i] -= src[i - 1];
3534
if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3535
else src[i] -= src[i - 1];
3538
if(y) src[i] -= src[i - stride];
3544
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3545
const int w= b->width;
3550
DWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3554
line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3556
for(y=start_y; y<end_y; y++){
3558
// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3559
line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3563
if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3564
else line[x] += line[x - 1];
3566
if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3567
else line[x] += line[x - 1];
3570
if(y) line[x] += prev[x];
3575
// STOP_TIMER("correlate")
3578
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3579
const int w= b->width;
3580
const int h= b->height;
3585
int i= x + y*stride;
3589
if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3590
else src[i] += src[i - 1];
3592
if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3593
else src[i] += src[i - 1];
3596
if(y) src[i] += src[i - stride];
3602
static void encode_header(SnowContext *s){
3603
int plane_index, level, orientation;
3606
memset(kstate, MID_STATE, sizeof(kstate));
3608
put_rac(&s->c, kstate, s->keyframe);
3609
if(s->keyframe || s->always_reset){
3611
s->last_spatial_decomposition_type=
3615
s->last_block_max_depth= 0;
3618
put_symbol(&s->c, s->header_state, s->version, 0);
3619
put_rac(&s->c, s->header_state, s->always_reset);
3620
put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3621
put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3622
put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3623
put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3624
put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3625
put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3626
put_rac(&s->c, s->header_state, s->spatial_scalability);
3627
// put_rac(&s->c, s->header_state, s->rate_scalability);
3628
put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3630
for(plane_index=0; plane_index<2; plane_index++){
3631
for(level=0; level<s->spatial_decomposition_count; level++){
3632
for(orientation=level ? 1:0; orientation<4; orientation++){
3633
if(orientation==2) continue;
3634
put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3639
put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3640
put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3641
put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3642
put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3643
put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3645
s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3646
s->last_qlog = s->qlog;
3647
s->last_qbias = s->qbias;
3648
s->last_mv_scale = s->mv_scale;
3649
s->last_block_max_depth = s->block_max_depth;
3652
static int decode_header(SnowContext *s){
3653
int plane_index, level, orientation;
3656
memset(kstate, MID_STATE, sizeof(kstate));
3658
s->keyframe= get_rac(&s->c, kstate);
3659
if(s->keyframe || s->always_reset){
3661
s->spatial_decomposition_type=
3665
s->block_max_depth= 0;
3668
s->version= get_symbol(&s->c, s->header_state, 0);
3670
av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3673
s->always_reset= get_rac(&s->c, s->header_state);
3674
s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3675
s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3676
s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3677
s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3678
s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3679
s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3680
s->spatial_scalability= get_rac(&s->c, s->header_state);
3681
// s->rate_scalability= get_rac(&s->c, s->header_state);
3682
s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3684
for(plane_index=0; plane_index<3; plane_index++){
3685
for(level=0; level<s->spatial_decomposition_count; level++){
3686
for(orientation=level ? 1:0; orientation<4; orientation++){
3688
if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3689
else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3690
else q= get_symbol(&s->c, s->header_state, 1);
3691
s->plane[plane_index].band[level][orientation].qlog= q;
3697
s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3698
if(s->spatial_decomposition_type > 2){
3699
av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3703
s->qlog += get_symbol(&s->c, s->header_state, 1);
3704
s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3705
s->qbias += get_symbol(&s->c, s->header_state, 1);
3706
s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3707
if(s->block_max_depth > 1 || s->block_max_depth < 0){
3708
av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3709
s->block_max_depth= 0;
3716
static void init_qexp(void){
3720
for(i=0; i<QROOT; i++){
3722
v *= pow(2, 1.0 / QROOT);
3726
static int common_init(AVCodecContext *avctx){
3727
SnowContext *s = avctx->priv_data;
3729
int level, orientation, plane_index, dec;
3734
dsputil_init(&s->dsp, avctx);
3737
s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3738
s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3739
s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3740
s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3741
s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3742
s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3761
#define mcfh(dx,dy)\
3762
s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3763
s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3764
mc_block_hpel ## dx ## dy ## 16;\
3765
s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3766
s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3767
mc_block_hpel ## dx ## dy ## 8;
3777
dec= s->spatial_decomposition_count= 5;
3778
s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3780
s->chroma_h_shift= 1; //FIXME XXX
3781
s->chroma_v_shift= 1;
3783
// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3785
width= s->avctx->width;
3786
height= s->avctx->height;
3788
s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3790
s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3791
s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3793
for(plane_index=0; plane_index<3; plane_index++){
3794
int w= s->avctx->width;
3795
int h= s->avctx->height;
3798
w>>= s->chroma_h_shift;
3799
h>>= s->chroma_v_shift;
3801
s->plane[plane_index].width = w;
3802
s->plane[plane_index].height= h;
3803
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3804
for(level=s->spatial_decomposition_count-1; level>=0; level--){
3805
for(orientation=level ? 1 : 0; orientation<4; orientation++){
3806
SubBand *b= &s->plane[plane_index].band[level][orientation];
3808
b->buf= s->spatial_dwt_buffer;
3810
b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3811
b->width = (w + !(orientation&1))>>1;
3812
b->height= (h + !(orientation>1))>>1;
3814
b->stride_line = 1 << (s->spatial_decomposition_count - level);
3815
b->buf_x_offset = 0;
3816
b->buf_y_offset = 0;
3820
b->buf_x_offset = (w+1)>>1;
3823
b->buf += b->stride>>1;
3824
b->buf_y_offset = b->stride_line >> 1;
3828
b->parent= &s->plane[plane_index].band[level-1][orientation];
3829
b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3836
for(i=0; i<MAX_REF_FRAMES; i++)
3837
for(j=0; j<MAX_REF_FRAMES; j++)
3838
scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3842
width= s->width= avctx->width;
3843
height= s->height= avctx->height;
3845
assert(width && height);
3847
s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3852
static int qscale2qlog(int qscale){
3853
return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3854
+ 61*QROOT/8; //<64 >60
3857
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3859
/* estimate the frame's complexity as a sum of weighted dwt coefs.
3860
* FIXME we know exact mv bits at this point,
3861
* but ratecontrol isn't set up to include them. */
3862
uint32_t coef_sum= 0;
3863
int level, orientation, delta_qlog;
3865
for(level=0; level<s->spatial_decomposition_count; level++){
3866
for(orientation=level ? 1 : 0; orientation<4; orientation++){
3867
SubBand *b= &s->plane[0].band[level][orientation];
3868
DWTELEM *buf= b->buf;
3869
const int w= b->width;
3870
const int h= b->height;
3871
const int stride= b->stride;
3872
const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3873
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3874
const int qdiv= (1<<16)/qmul;
3877
decorrelate(s, b, buf, stride, 1, 0);
3880
coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3882
correlate(s, b, buf, stride, 1, 0);
3886
/* ugly, ratecontrol just takes a sqrt again */
3887
coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3888
assert(coef_sum < INT_MAX);
3890
if(pict->pict_type == I_TYPE){
3891
s->m.current_picture.mb_var_sum= coef_sum;
3892
s->m.current_picture.mc_mb_var_sum= 0;
3894
s->m.current_picture.mc_mb_var_sum= coef_sum;
3895
s->m.current_picture.mb_var_sum= 0;
3898
pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3899
if (pict->quality < 0)
3901
s->lambda= pict->quality * 3/2;
3902
delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3903
s->qlog+= delta_qlog;
3907
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3908
int width = p->width;
3909
int height= p->height;
3910
int level, orientation, x, y;
3912
for(level=0; level<s->spatial_decomposition_count; level++){
3913
for(orientation=level ? 1 : 0; orientation<4; orientation++){
3914
SubBand *b= &p->band[level][orientation];
3915
DWTELEM *buf= b->buf;
3918
memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3919
buf[b->width/2 + b->height/2*b->stride]= 256*256;
3920
ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3921
for(y=0; y<height; y++){
3922
for(x=0; x<width; x++){
3923
int64_t d= s->spatial_dwt_buffer[x + y*width];
3928
b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3929
// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3934
static int encode_init(AVCodecContext *avctx)
3936
SnowContext *s = avctx->priv_data;
3939
if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3940
av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3941
"use vstrict=-2 / -strict -2 to use it anyway\n");
3945
if(avctx->prediction_method == DWT_97
3946
&& (avctx->flags & CODEC_FLAG_QSCALE)
3947
&& avctx->global_quality == 0){
3948
av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3958
s->m.flags = avctx->flags;
3959
s->m.bit_rate= avctx->bit_rate;
3961
s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3962
s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3963
s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3964
s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3965
h263_encode_init(&s->m); //mv_penalty
3967
s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3969
if(avctx->flags&CODEC_FLAG_PASS1){
3970
if(!avctx->stats_out)
3971
avctx->stats_out = av_mallocz(256);
3973
if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3974
if(ff_rate_control_init(&s->m) < 0)
3977
s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3979
for(plane_index=0; plane_index<3; plane_index++){
3980
calculate_vissual_weight(s, &s->plane[plane_index]);
3984
avctx->coded_frame= &s->current_picture;
3985
switch(avctx->pix_fmt){
3986
// case PIX_FMT_YUV444P:
3987
// case PIX_FMT_YUV422P:
3988
case PIX_FMT_YUV420P:
3990
// case PIX_FMT_YUV411P:
3991
// case PIX_FMT_YUV410P:
3992
s->colorspace_type= 0;
3994
/* case PIX_FMT_RGB32:
3998
av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4001
// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4002
s->chroma_h_shift= 1;
4003
s->chroma_v_shift= 1;
4005
ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4006
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4008
s->avctx->get_buffer(s->avctx, &s->input_picture);
4010
if(s->avctx->me_method == ME_ITER){
4012
int size= s->b_width * s->b_height << 2*s->block_max_depth;
4013
for(i=0; i<s->max_ref_frames; i++){
4014
s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4015
s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4022
static int frame_start(SnowContext *s){
4024
int w= s->avctx->width; //FIXME round up to x16 ?
4025
int h= s->avctx->height;
4027
if(s->current_picture.data[0]){
4028
draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4029
draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4030
draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4033
tmp= s->last_picture[s->max_ref_frames-1];
4034
memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4035
s->last_picture[0]= s->current_picture;
4036
s->current_picture= tmp;
4042
for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4043
if(i && s->last_picture[i-1].key_frame)
4048
s->current_picture.reference= 1;
4049
if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4050
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4054
s->current_picture.key_frame= s->keyframe;
4059
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4060
SnowContext *s = avctx->priv_data;
4061
RangeCoder * const c= &s->c;
4062
AVFrame *pict = data;
4063
const int width= s->avctx->width;
4064
const int height= s->avctx->height;
4065
int level, orientation, plane_index, i, y;
4066
uint8_t rc_header_bak[sizeof(s->header_state)];
4067
uint8_t rc_block_bak[sizeof(s->block_state)];
4069
ff_init_range_encoder(c, buf, buf_size);
4070
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4074
for(y=0; y<(height>>shift); y++)
4075
memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4076
&pict->data[i][y * pict->linesize[i]],
4079
s->new_picture = *pict;
4081
s->m.picture_number= avctx->frame_number;
4082
if(avctx->flags&CODEC_FLAG_PASS2){
4084
pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4085
s->keyframe= pict->pict_type==FF_I_TYPE;
4086
if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4087
pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4088
if (pict->quality < 0)
4092
s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4094
pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4097
if(s->pass1_rc && avctx->frame_number == 0)
4098
pict->quality= 2*FF_QP2LAMBDA;
4100
s->qlog= qscale2qlog(pict->quality);
4101
s->lambda = pict->quality * 3/2;
4103
if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4104
s->qlog= LOSSLESS_QLOG;
4106
}//else keep previous frame's qlog until after motion est
4110
s->m.current_picture_ptr= &s->m.current_picture;
4111
if(pict->pict_type == P_TYPE){
4112
int block_width = (width +15)>>4;
4113
int block_height= (height+15)>>4;
4114
int stride= s->current_picture.linesize[0];
4116
assert(s->current_picture.data[0]);
4117
assert(s->last_picture[0].data[0]);
4119
s->m.avctx= s->avctx;
4120
s->m.current_picture.data[0]= s->current_picture.data[0];
4121
s->m. last_picture.data[0]= s->last_picture[0].data[0];
4122
s->m. new_picture.data[0]= s-> input_picture.data[0];
4123
s->m. last_picture_ptr= &s->m. last_picture;
4125
s->m. last_picture.linesize[0]=
4126
s->m. new_picture.linesize[0]=
4127
s->m.current_picture.linesize[0]= stride;
4128
s->m.uvlinesize= s->current_picture.linesize[1];
4130
s->m.height= height;
4131
s->m.mb_width = block_width;
4132
s->m.mb_height= block_height;
4133
s->m.mb_stride= s->m.mb_width+1;
4134
s->m.b8_stride= 2*s->m.mb_width+1;
4136
s->m.pict_type= pict->pict_type;
4137
s->m.me_method= s->avctx->me_method;
4138
s->m.me.scene_change_score=0;
4139
s->m.flags= s->avctx->flags;
4140
s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4141
s->m.out_format= FMT_H263;
4142
s->m.unrestricted_mv= 1;
4144
s->m.lambda = s->lambda;
4145
s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4146
s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4148
s->m.dsp= s->dsp; //move
4154
memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4155
memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4160
s->m.pict_type = pict->pict_type;
4161
s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4164
s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4165
encode_blocks(s, 1);
4166
s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4168
for(plane_index=0; plane_index<3; plane_index++){
4169
Plane *p= &s->plane[plane_index];
4173
// int bits= put_bits_count(&s->c.pb);
4175
if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4177
if(pict->data[plane_index]) //FIXME gray hack
4180
s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4183
predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4186
&& pict->pict_type == P_TYPE
4187
&& !(avctx->flags&CODEC_FLAG_PASS2)
4188
&& s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4189
ff_init_range_encoder(c, buf, buf_size);
4190
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4191
pict->pict_type= FF_I_TYPE;
4193
s->current_picture.key_frame=1;
4197
if(s->qlog == LOSSLESS_QLOG){
4200
s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4205
ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4207
if(s->pass1_rc && plane_index==0){
4208
int delta_qlog = ratecontrol_1pass(s, pict);
4209
if (delta_qlog <= INT_MIN)
4212
//reordering qlog in the bitstream would eliminate this reset
4213
ff_init_range_encoder(c, buf, buf_size);
4214
memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4215
memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4217
encode_blocks(s, 0);
4221
for(level=0; level<s->spatial_decomposition_count; level++){
4222
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4223
SubBand *b= &p->band[level][orientation];
4225
quantize(s, b, b->buf, b->stride, s->qbias);
4227
decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4228
encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4229
assert(b->parent==NULL || b->parent->stride == b->stride*2);
4231
correlate(s, b, b->buf, b->stride, 1, 0);
4234
// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4236
for(level=0; level<s->spatial_decomposition_count; level++){
4237
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4238
SubBand *b= &p->band[level][orientation];
4240
dequantize(s, b, b->buf, b->stride);
4244
ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4245
if(s->qlog == LOSSLESS_QLOG){
4248
s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4253
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4254
STOP_TIMER("pred-conv")}
4257
if(pict->pict_type == I_TYPE){
4260
s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4261
pict->data[plane_index][y*pict->linesize[plane_index] + x];
4265
memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4266
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4269
if(s->avctx->flags&CODEC_FLAG_PSNR){
4272
if(pict->data[plane_index]) //FIXME gray hack
4275
int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4279
s->avctx->error[plane_index] += error;
4280
s->current_picture.error[plane_index] = error;
4284
if(s->last_picture[s->max_ref_frames-1].data[0])
4285
avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4287
s->current_picture.coded_picture_number = avctx->frame_number;
4288
s->current_picture.pict_type = pict->pict_type;
4289
s->current_picture.quality = pict->quality;
4290
s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4291
s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4292
s->m.current_picture.display_picture_number =
4293
s->m.current_picture.coded_picture_number = avctx->frame_number;
4294
s->m.current_picture.quality = pict->quality;
4295
s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4297
if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4299
if(avctx->flags&CODEC_FLAG_PASS1)
4300
ff_write_pass1_stats(&s->m);
4301
s->m.last_pict_type = s->m.pict_type;
4302
avctx->frame_bits = s->m.frame_bits;
4303
avctx->mv_bits = s->m.mv_bits;
4304
avctx->misc_bits = s->m.misc_bits;
4305
avctx->p_tex_bits = s->m.p_tex_bits;
4309
return ff_rac_terminate(c);
4312
static void common_end(SnowContext *s){
4313
int plane_index, level, orientation, i;
4315
av_freep(&s->spatial_dwt_buffer);
4317
av_freep(&s->m.me.scratchpad);
4318
av_freep(&s->m.me.map);
4319
av_freep(&s->m.me.score_map);
4320
av_freep(&s->m.obmc_scratchpad);
4322
av_freep(&s->block);
4324
for(i=0; i<MAX_REF_FRAMES; i++){
4325
av_freep(&s->ref_mvs[i]);
4326
av_freep(&s->ref_scores[i]);
4327
if(s->last_picture[i].data[0])
4328
s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4331
for(plane_index=0; plane_index<3; plane_index++){
4332
for(level=s->spatial_decomposition_count-1; level>=0; level--){
4333
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4334
SubBand *b= &s->plane[plane_index].band[level][orientation];
4336
av_freep(&b->x_coeff);
4342
static int encode_end(AVCodecContext *avctx)
4344
SnowContext *s = avctx->priv_data;
4347
av_free(avctx->stats_out);
4352
static int decode_init(AVCodecContext *avctx)
4354
SnowContext *s = avctx->priv_data;
4357
avctx->pix_fmt= PIX_FMT_YUV420P;
4361
block_size = MB_SIZE >> s->block_max_depth;
4362
slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4367
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4368
SnowContext *s = avctx->priv_data;
4369
RangeCoder * const c= &s->c;
4371
AVFrame *picture = data;
4372
int level, orientation, plane_index;
4374
ff_init_range_decoder(c, buf, buf_size);
4375
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4377
s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4379
if(!s->block) alloc_blocks(s);
4382
//keyframe flag dupliaction mess FIXME
4383
if(avctx->debug&FF_DEBUG_PICT_INFO)
4384
av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4388
for(plane_index=0; plane_index<3; plane_index++){
4389
Plane *p= &s->plane[plane_index];
4393
int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4395
if(s->avctx->debug&2048){
4396
memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4397
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4401
int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4402
s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4408
for(level=0; level<s->spatial_decomposition_count; level++){
4409
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4410
SubBand *b= &p->band[level][orientation];
4411
unpack_coeffs(s, b, b->parent, orientation);
4414
STOP_TIMER("unpack coeffs");
4418
const int mb_h= s->b_height << s->block_max_depth;
4419
const int block_size = MB_SIZE >> s->block_max_depth;
4420
const int block_w = plane_index ? block_size/2 : block_size;
4422
dwt_compose_t cs[MAX_DECOMPOSITIONS];
4427
ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4428
for(mb_y=0; mb_y<=mb_h; mb_y++){
4430
int slice_starty = block_w*mb_y;
4431
int slice_h = block_w*(mb_y+1);
4432
if (!(s->keyframe || s->avctx->debug&512)){
4433
slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4434
slice_h -= (block_w >> 1);
4439
for(level=0; level<s->spatial_decomposition_count; level++){
4440
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4441
SubBand *b= &p->band[level][orientation];
4444
int our_mb_start = mb_y;
4445
int our_mb_end = (mb_y + 1);
4447
start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4448
end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4449
if (!(s->keyframe || s->avctx->debug&512)){
4450
start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4451
end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4453
start_y = FFMIN(b->height, start_y);
4454
end_y = FFMIN(b->height, end_y);
4456
if (start_y != end_y){
4457
if (orientation == 0){
4458
SubBand * correlate_band = &p->band[0][0];
4459
int correlate_end_y = FFMIN(b->height, end_y + 1);
4460
int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4461
decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4462
correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4463
dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4466
decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4470
STOP_TIMER("decode_subband_slice");
4474
for(; yd<slice_h; yd+=4){
4475
ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4477
STOP_TIMER("idwt slice");}
4480
if(s->qlog == LOSSLESS_QLOG){
4481
for(; yq<slice_h && yq<h; yq++){
4482
DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4484
line[x] <<= FRAC_BITS;
4489
predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4491
y = FFMIN(p->height, slice_starty);
4492
end_y = FFMIN(p->height, slice_h);
4494
slice_buffer_release(&s->sb, y++);
4497
slice_buffer_flush(&s->sb);
4499
STOP_TIMER("idwt + predict_slices")}
4504
if(s->last_picture[s->max_ref_frames-1].data[0])
4505
avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4507
if(!(s->avctx->debug&2048))
4508
*picture= s->current_picture;
4510
*picture= s->mconly_picture;
4512
*data_size = sizeof(AVFrame);
4514
bytes_read= c->bytestream - c->bytestream_start;
4515
if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4520
static int decode_end(AVCodecContext *avctx)
4522
SnowContext *s = avctx->priv_data;
4524
slice_buffer_destroy(&s->sb);
4531
AVCodec snow_decoder = {
4535
sizeof(SnowContext),
4540
0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4544
#ifdef CONFIG_ENCODERS
4545
AVCodec snow_encoder = {
4549
sizeof(SnowContext),
4565
int buffer[2][width*height];
4568
s.spatial_decomposition_count=6;
4569
s.spatial_decomposition_type=1;
4571
printf("testing 5/3 DWT\n");
4572
for(i=0; i<width*height; i++)
4573
buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4575
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4576
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4578
for(i=0; i<width*height; i++)
4579
if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4581
printf("testing 9/7 DWT\n");
4582
s.spatial_decomposition_type=0;
4583
for(i=0; i<width*height; i++)
4584
buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4586
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4587
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4589
for(i=0; i<width*height; i++)
4590
if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4593
printf("testing AC coder\n");
4594
memset(s.header_state, 0, sizeof(s.header_state));
4595
ff_init_range_encoder(&s.c, buffer[0], 256*256);
4596
ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4598
for(i=-256; i<256; i++){
4600
put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4601
STOP_TIMER("put_symbol")
4603
ff_rac_terminate(&s.c);
4605
memset(s.header_state, 0, sizeof(s.header_state));
4606
ff_init_range_decoder(&s.c, buffer[0], 256*256);
4607
ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4609
for(i=-256; i<256; i++){
4612
j= get_symbol(&s.c, s.header_state, 1);
4613
STOP_TIMER("get_symbol")
4614
if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4618
int level, orientation, x, y;
4619
int64_t errors[8][4];
4622
memset(errors, 0, sizeof(errors));
4623
s.spatial_decomposition_count=3;
4624
s.spatial_decomposition_type=0;
4625
for(level=0; level<s.spatial_decomposition_count; level++){
4626
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4627
int w= width >> (s.spatial_decomposition_count-level);
4628
int h= height >> (s.spatial_decomposition_count-level);
4629
int stride= width << (s.spatial_decomposition_count-level);
4630
DWTELEM *buf= buffer[0];
4633
if(orientation&1) buf+=w;
4634
if(orientation>1) buf+=stride>>1;
4636
memset(buffer[0], 0, sizeof(int)*width*height);
4637
buf[w/2 + h/2*stride]= 256*256;
4638
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4639
for(y=0; y<height; y++){
4640
for(x=0; x<width; x++){
4641
int64_t d= buffer[0][x + y*width];
4643
if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4645
if(FFABS(height/2-y)<9 && level==2) printf("\n");
4647
error= (int)(sqrt(error)+0.5);
4648
errors[level][orientation]= error;
4649
if(g) g=ff_gcd(g, error);
4653
printf("static int const visual_weight[][4]={\n");
4654
for(level=0; level<s.spatial_decomposition_count; level++){
4656
for(orientation=0; orientation<4; orientation++){
4657
printf("%8"PRId64",", errors[level][orientation]/g);
4665
int w= width >> (s.spatial_decomposition_count-level);
4666
int h= height >> (s.spatial_decomposition_count-level);
4667
int stride= width << (s.spatial_decomposition_count-level);
4668
DWTELEM *buf= buffer[0];
4674
memset(buffer[0], 0, sizeof(int)*width*height);
4676
for(y=0; y<height; y++){
4677
for(x=0; x<width; x++){
4678
int tab[4]={0,2,3,1};
4679
buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4682
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4686
buf[x + y*stride ]=169;
4687
buf[x + y*stride-w]=64;
4690
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4692
for(y=0; y<height; y++){
4693
for(x=0; x<width; x++){
4694
int64_t d= buffer[0][x + y*width];
4696
if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4698
if(FFABS(height/2-y)<9) printf("\n");