2
* Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4
* This library is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2 of the License, or (at your option) any later version.
9
* This library is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with this library; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
#include "rangecoder.h"
26
#include "mpegvideo.h"
31
static const int8_t quant3[256]={
32
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
static const int8_t quant3b[256]={
50
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
static const int8_t quant3bA[256]={
68
0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
static const int8_t quant5[256]={
86
0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
static const int8_t quant7[256]={
104
0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
static const int8_t quant9[256]={
122
0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123
3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
static const int8_t quant11[256]={
140
0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
static const int8_t quant13[256]={
158
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159
4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177
static const uint8_t obmc32[1024]={
178
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181
0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182
0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183
0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184
0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185
0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186
0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187
0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189
0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190
0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191
0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192
0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193
0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194
0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195
0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196
0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197
0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198
0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199
0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200
0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201
0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202
0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203
0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204
0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205
0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206
0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212
static const uint8_t obmc16[256]={
213
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214
0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216
0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217
0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218
0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219
1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220
1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221
1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222
1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223
0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224
0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225
0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226
0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227
0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232
static const uint8_t obmc32[1024]={
233
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249
8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250
8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251
8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252
8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253
4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254
4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255
4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256
4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257
4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258
4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259
4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260
4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261
0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262
0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263
0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264
0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267
static const uint8_t obmc16[256]={
268
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276
16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278
12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279
8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280
8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281
4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282
4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283
0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
287
static const uint8_t obmc32[1024]={
288
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291
0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292
0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293
0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294
0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295
0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296
0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297
0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299
0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300
0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301
0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302
0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303
0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304
0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305
0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306
0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307
0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308
0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309
0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310
0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311
0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312
0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313
0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314
0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315
0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316
0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317
0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322
static const uint8_t obmc16[256]={
323
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324
0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326
0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327
0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328
1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329
1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330
0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331
0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332
1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333
1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334
0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335
0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336
0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337
0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
344
static const uint8_t obmc8[64]={
345
4, 12, 20, 28, 28, 20, 12, 4,
346
12, 36, 60, 84, 84, 60, 36, 12,
347
20, 60,100,140,140,100, 60, 20,
348
28, 84,140,196,196,140, 84, 28,
349
28, 84,140,196,196,140, 84, 28,
350
20, 60,100,140,140,100, 60, 20,
351
12, 36, 60, 84, 84, 60, 36, 12,
352
4, 12, 20, 28, 28, 20, 12, 4,
357
static const uint8_t obmc4[16]={
365
static const uint8_t *obmc_tab[4]={
366
obmc32, obmc16, obmc8, obmc4
369
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371
typedef struct BlockNode{
377
//#define TYPE_SPLIT 1
378
#define BLOCK_INTRA 1
380
//#define TYPE_NOCOLOR 4
381
uint8_t level; //FIXME merge into type?
384
static const BlockNode null_block= { //FIXME add border maybe
385
.color= {128,128,128},
393
#define LOG2_MB_SIZE 4
394
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
typedef struct x_and_coeff{
401
typedef struct SubBand{
406
int qlog; ///< log(qscale)/log[2^(1/6)]
410
int stride_line; ///< Stride measured in lines, not pixels.
411
x_and_coeff * x_coeff;
412
struct SubBand *parent;
413
uint8_t state[/*7*2*/ 7 + 512][32];
416
typedef struct Plane{
419
SubBand band[MAX_DECOMPOSITIONS][4];
422
typedef struct SnowContext{
423
// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
425
AVCodecContext *avctx;
429
AVFrame input_picture; ///< new_picture with the internal linesizes
430
AVFrame current_picture;
431
AVFrame last_picture[MAX_REF_FRAMES];
432
AVFrame mconly_picture;
433
// uint8_t q_context[16];
434
uint8_t header_state[32];
435
uint8_t block_state[128 + 32*128];
439
int spatial_decomposition_type;
440
int temporal_decomposition_type;
441
int spatial_decomposition_count;
442
int temporal_decomposition_count;
445
int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
446
uint32_t *ref_scores[MAX_REF_FRAMES];
447
DWTELEM *spatial_dwt_buffer;
451
int spatial_scalability;
458
#define QBIAS_SHIFT 3
462
Plane plane[MAX_PLANES];
464
#define ME_CACHE_SIZE 1024
465
int me_cache[ME_CACHE_SIZE];
466
int me_cache_generation;
469
MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
480
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
481
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
483
static void iterative_me(SnowContext *s);
485
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
489
buf->base_buffer = base_buffer;
490
buf->line_count = line_count;
491
buf->line_width = line_width;
492
buf->data_count = max_allocated_lines;
493
buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
494
buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
496
for (i = 0; i < max_allocated_lines; i++)
498
buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
501
buf->data_stack_top = max_allocated_lines - 1;
504
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
509
// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
511
assert(buf->data_stack_top >= 0);
512
// assert(!buf->line[line]);
514
return buf->line[line];
516
offset = buf->line_width * line;
517
buffer = buf->data_stack[buf->data_stack_top];
518
buf->data_stack_top--;
519
buf->line[line] = buffer;
521
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
526
static void slice_buffer_release(slice_buffer * buf, int line)
531
assert(line >= 0 && line < buf->line_count);
532
assert(buf->line[line]);
534
offset = buf->line_width * line;
535
buffer = buf->line[line];
536
buf->data_stack_top++;
537
buf->data_stack[buf->data_stack_top] = buffer;
538
buf->line[line] = NULL;
540
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
543
static void slice_buffer_flush(slice_buffer * buf)
546
for (i = 0; i < buf->line_count; i++)
550
// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
551
slice_buffer_release(buf, i);
556
static void slice_buffer_destroy(slice_buffer * buf)
559
slice_buffer_flush(buf);
561
for (i = buf->data_count - 1; i >= 0; i--)
563
assert(buf->data_stack[i]);
564
av_freep(&buf->data_stack[i]);
566
assert(buf->data_stack);
567
av_freep(&buf->data_stack);
569
av_freep(&buf->line);
573
// Avoid a name clash on SGI IRIX
576
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
577
static uint8_t qexp[QROOT];
579
static inline int mirror(int v, int m){
580
while((unsigned)v > (unsigned)m){
587
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
592
const int e= av_log2(a);
594
const int el= FFMIN(e, 10);
595
put_rac(c, state+0, 0);
598
put_rac(c, state+1+i, 1); //1..10
601
put_rac(c, state+1+9, 1); //1..10
603
put_rac(c, state+1+FFMIN(i,9), 0);
605
for(i=e-1; i>=el; i--){
606
put_rac(c, state+22+9, (a>>i)&1); //22..31
609
put_rac(c, state+22+i, (a>>i)&1); //22..31
613
put_rac(c, state+11 + el, v < 0); //11..21
616
put_rac(c, state+0, 0);
619
put_rac(c, state+1+i, 1); //1..10
621
put_rac(c, state+1+i, 0);
623
for(i=e-1; i>=0; i--){
624
put_rac(c, state+22+i, (a>>i)&1); //22..31
628
put_rac(c, state+11 + e, v < 0); //11..21
631
put_rac(c, state+1+FFMIN(i,9), 1); //1..10
633
put_rac(c, state+1+FFMIN(i,9), 0);
635
for(i=e-1; i>=0; i--){
636
put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
640
put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
644
put_rac(c, state+0, 1);
648
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
649
if(get_rac(c, state+0))
654
while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
659
for(i=e-1; i>=0; i--){
660
a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
663
if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
670
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
672
int r= log2>=0 ? 1<<log2 : 1;
678
put_rac(c, state+4+log2, 1);
683
put_rac(c, state+4+log2, 0);
685
for(i=log2-1; i>=0; i--){
686
put_rac(c, state+31-i, (v>>i)&1);
690
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
692
int r= log2>=0 ? 1<<log2 : 1;
697
while(get_rac(c, state+4+log2)){
703
for(i=log2-1; i>=0; i--){
704
v+= get_rac(c, state+31-i)<<i;
710
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
711
const int mirror_left= !highpass;
712
const int mirror_right= (width&1) ^ highpass;
713
const int w= (width>>1) - 1 + (highpass & width);
716
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
718
dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
724
dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
728
dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
733
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
734
const int mirror_left= !highpass;
735
const int mirror_right= (width&1) ^ highpass;
736
const int w= (width>>1) - 1 + (highpass & width);
743
dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
749
int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
752
dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
756
int r= 3*2*ref[w*ref_step];
759
dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
765
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
766
const int mirror_left= !highpass;
767
const int mirror_right= (width&1) ^ highpass;
768
const int w= (width>>1) - 1 + (highpass & width);
772
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
774
dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
780
dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
784
dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
790
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
793
for(x=start; x<width; x+=2){
797
int x2= x + 2*i - n + 1;
799
else if(x2>=width) x2= 2*width-x2-2;
800
sum += coeffs[i]*(int64_t)dst[x2];
802
if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
803
else dst[x] += (sum + (1<<shift)/2)>>shift;
807
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
809
for(y=start; y<height; y+=2){
810
for(x=0; x<width; x++){
814
int y2= y + 2*i - n + 1;
816
else if(y2>=height) y2= 2*height-y2-2;
817
sum += coeffs[i]*(int64_t)dst[x + y2*stride];
819
if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
820
else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829
#if 0 // more accurate 9/7
832
#define COEFFS1 (int[]){-25987,-25987}
835
#define COEFFS2 (int[]){-27777,-27777}
838
#define COEFFS3 (int[]){28931,28931}
841
#define COEFFS4 (int[]){14533,14533}
845
#define COEFFS1 (int[]){1,-9,-9,1}
848
#define COEFFS2 (int[]){-1,5,5,-1}
861
#define COEFFS1 (int[]){1,1}
864
#define COEFFS2 (int[]){-1,-1}
877
#define COEFFS2 (int[]){-1,-1}
880
#define COEFFS3 (int[]){-1,-1}
883
#define COEFFS4 (int[]){-5,29,29,-5}
888
#define COEFFS1 (int[]){-203,-203}
891
#define COEFFS2 (int[]){-217,-217}
894
#define COEFFS3 (int[]){113,113}
897
#define COEFFS4 (int[]){227,227}
905
#define COEFFS2 (int[]){-1,-1}
908
#define COEFFS3 (int[]){-1,-1}
911
#define COEFFS4 (int[]){3,3}
915
#define COEFFS1 (int[]){1,-9,-9,1}
918
#define COEFFS2 (int[]){1,1}
928
#define COEFFS1 (int[]){1,-9,-9,1}
931
#define COEFFS2 (int[]){-1,5,5,-1}
939
static void horizontal_decomposeX(DWTELEM *b, int width){
941
const int width2= width>>1;
942
const int w2= (width+1)>>1;
945
inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
946
inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
947
inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
948
inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
950
for(x=0; x<width2; x++){
952
temp[x+w2]= b[2*x + 1];
956
memcpy(b, temp, width*sizeof(int));
959
static void horizontal_composeX(DWTELEM *b, int width){
961
const int width2= width>>1;
963
const int w2= (width+1)>>1;
965
memcpy(temp, b, width*sizeof(int));
966
for(x=0; x<width2; x++){
968
b[2*x + 1]= temp[x+w2];
973
inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
974
inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
975
inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
976
inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
979
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
982
for(y=0; y<height; y++){
983
for(x=0; x<width; x++){
984
buffer[y*stride + x] *= SCALEX;
988
for(y=0; y<height; y++){
989
horizontal_decomposeX(buffer + y*stride, width);
992
inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
993
inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
994
inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
995
inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
998
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1001
inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1002
inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1003
inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1004
inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1006
for(y=0; y<height; y++){
1007
horizontal_composeX(buffer + y*stride, width);
1010
for(y=0; y<height; y++){
1011
for(x=0; x<width; x++){
1012
buffer[y*stride + x] /= SCALEX;
1017
static void horizontal_decompose53i(DWTELEM *b, int width){
1018
DWTELEM temp[width];
1019
const int width2= width>>1;
1021
const int w2= (width+1)>>1;
1023
for(x=0; x<width2; x++){
1025
temp[x+w2]= b[2*x + 1];
1039
for(x=1; x+1<width2; x+=2){
1043
A2 += (A1 + A3 + 2)>>2;
1047
A1= temp[x+1+width2];
1050
A4 += (A1 + A3 + 2)>>2;
1056
A2 += (A1 + A3 + 2)>>2;
1061
lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1062
lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1066
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1069
for(i=0; i<width; i++){
1070
b1[i] -= (b0[i] + b2[i])>>1;
1074
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1077
for(i=0; i<width; i++){
1078
b1[i] += (b0[i] + b2[i] + 2)>>2;
1082
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1084
DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1085
DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1087
for(y=-2; y<height; y+=2){
1088
DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1089
DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1092
if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1093
if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1094
STOP_TIMER("horizontal_decompose53i")}
1097
if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1098
if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1099
STOP_TIMER("vertical_decompose53i*")}
1106
static void horizontal_decompose97i(DWTELEM *b, int width){
1107
DWTELEM temp[width];
1108
const int w2= (width+1)>>1;
1110
lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1111
liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1112
lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1113
lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1117
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1120
for(i=0; i<width; i++){
1121
b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1125
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1128
for(i=0; i<width; i++){
1130
b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1132
int r= 3*(b0[i] + b2[i]);
1135
b1[i] += (r+W_CO)>>W_CS;
1140
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1143
for(i=0; i<width; i++){
1145
b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1147
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1152
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1155
for(i=0; i<width; i++){
1156
b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1160
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1162
DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1163
DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1164
DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1165
DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1167
for(y=-4; y<height; y+=2){
1168
DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1169
DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1172
if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1173
if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1175
STOP_TIMER("horizontal_decompose97i")
1179
if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1180
if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1181
if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1182
if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1185
STOP_TIMER("vertical_decompose97i")
1195
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1198
for(level=0; level<decomposition_count; level++){
1200
case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1201
case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1202
case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1207
static void horizontal_compose53i(DWTELEM *b, int width){
1208
DWTELEM temp[width];
1209
const int width2= width>>1;
1210
const int w2= (width+1)>>1;
1222
for(x=1; x+1<width2; x+=2){
1226
A2 += (A1 + A3 + 2)>>2;
1230
A1= temp[x+1+width2];
1233
A4 += (A1 + A3 + 2)>>2;
1239
A2 += (A1 + A3 + 2)>>2;
1243
lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1244
lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1246
for(x=0; x<width2; x++){
1248
b[2*x + 1]= temp[x+w2];
1254
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1257
for(i=0; i<width; i++){
1258
b1[i] += (b0[i] + b2[i])>>1;
1262
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1265
for(i=0; i<width; i++){
1266
b1[i] -= (b0[i] + b2[i] + 2)>>2;
1270
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1271
cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1272
cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1276
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1277
cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1278
cs->b1 = buffer + mirror(-1 , height-1)*stride;
1282
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1285
DWTELEM *b0= cs->b0;
1286
DWTELEM *b1= cs->b1;
1287
DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1288
DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1291
if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1292
if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1293
STOP_TIMER("vertical_compose53i*")}
1296
if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1297
if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1298
STOP_TIMER("horizontal_compose53i")}
1305
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1307
DWTELEM *b0= cs->b0;
1308
DWTELEM *b1= cs->b1;
1309
DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1310
DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1313
if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1314
if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1315
STOP_TIMER("vertical_compose53i*")}
1318
if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1319
if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1320
STOP_TIMER("horizontal_compose53i")}
1327
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1329
spatial_compose53i_init(&cs, buffer, height, stride);
1330
while(cs.y <= height)
1331
spatial_compose53i_dy(&cs, buffer, width, height, stride);
1335
void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1336
DWTELEM temp[width];
1337
const int w2= (width+1)>>1;
1339
lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1340
lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1341
liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1342
lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1345
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1348
for(i=0; i<width; i++){
1349
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1353
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1356
for(i=0; i<width; i++){
1358
b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1360
int r= 3*(b0[i] + b2[i]);
1363
b1[i] -= (r+W_CO)>>W_CS;
1368
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1371
for(i=0; i<width; i++){
1373
b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1375
b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1380
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1383
for(i=0; i<width; i++){
1384
b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1388
void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1391
for(i=0; i<width; i++){
1395
b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1397
b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1399
r= 3*(b2[i] + b4[i]);
1402
b3[i] -= (r+W_CO)>>W_CS;
1405
b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1407
b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1409
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1413
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1414
cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1415
cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1416
cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1417
cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1421
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1422
cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1423
cs->b1 = buffer + mirror(-3 , height-1)*stride;
1424
cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1425
cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1429
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1432
DWTELEM *b0= cs->b0;
1433
DWTELEM *b1= cs->b1;
1434
DWTELEM *b2= cs->b2;
1435
DWTELEM *b3= cs->b3;
1436
DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1437
DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1440
if(y>0 && y+4<height){
1441
dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1443
if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1444
if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1445
if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1446
if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1449
STOP_TIMER("vertical_compose97i")}}
1452
if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1453
if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1454
if(width>400 && y+0<(unsigned)height){
1455
STOP_TIMER("horizontal_compose97i")}}
1464
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1466
DWTELEM *b0= cs->b0;
1467
DWTELEM *b1= cs->b1;
1468
DWTELEM *b2= cs->b2;
1469
DWTELEM *b3= cs->b3;
1470
DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1471
DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1474
if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1475
if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1476
if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1477
if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1479
STOP_TIMER("vertical_compose97i")}}
1482
if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1483
if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1484
if(width>400 && b0 <= b2){
1485
STOP_TIMER("horizontal_compose97i")}}
1494
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1496
spatial_compose97i_init(&cs, buffer, height, stride);
1497
while(cs.y <= height)
1498
spatial_compose97i_dy(&cs, buffer, width, height, stride);
1501
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1503
for(level=decomposition_count-1; level>=0; level--){
1505
case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1506
case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1507
/* not slicified yet */
1508
case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1509
av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1514
static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1516
for(level=decomposition_count-1; level>=0; level--){
1518
case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1519
case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1520
/* not slicified yet */
1521
case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1526
static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1527
const int support = type==1 ? 3 : 5;
1531
for(level=decomposition_count-1; level>=0; level--){
1532
while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1534
case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1536
case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1544
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1545
const int support = type==1 ? 3 : 5;
1549
for(level=decomposition_count-1; level>=0; level--){
1550
while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1552
case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1554
case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1562
static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1565
for(level=decomposition_count-1; level>=0; level--)
1566
spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1568
dwt_compose_t cs[MAX_DECOMPOSITIONS];
1570
ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1571
for(y=0; y<height; y+=4)
1572
ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1576
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1577
const int w= b->width;
1578
const int h= b->height;
1590
int /*ll=0, */l=0, lt=0, t=0, rt=0;
1591
v= src[x + y*stride];
1594
t= src[x + (y-1)*stride];
1596
lt= src[x - 1 + (y-1)*stride];
1599
rt= src[x + 1 + (y-1)*stride];
1603
l= src[x - 1 + y*stride];
1605
if(orientation==1) ll= src[y + (x-2)*stride];
1606
else ll= src[x - 2 + y*stride];
1612
if(px<b->parent->width && py<b->parent->height)
1613
p= parent[px + py*2*stride];
1615
if(!(/*ll|*/l|lt|t|rt|p)){
1617
runs[run_index++]= run;
1625
max_index= run_index;
1626
runs[run_index++]= run;
1628
run= runs[run_index++];
1630
put_symbol2(&s->c, b->state[30], max_index, 0);
1631
if(run_index <= max_index)
1632
put_symbol2(&s->c, b->state[1], run, 3);
1635
if(s->c.bytestream_end - s->c.bytestream < w*40){
1636
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1641
int /*ll=0, */l=0, lt=0, t=0, rt=0;
1642
v= src[x + y*stride];
1645
t= src[x + (y-1)*stride];
1647
lt= src[x - 1 + (y-1)*stride];
1650
rt= src[x + 1 + (y-1)*stride];
1654
l= src[x - 1 + y*stride];
1656
if(orientation==1) ll= src[y + (x-2)*stride];
1657
else ll= src[x - 2 + y*stride];
1663
if(px<b->parent->width && py<b->parent->height)
1664
p= parent[px + py*2*stride];
1666
if(/*ll|*/l|lt|t|rt|p){
1667
int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1669
put_rac(&s->c, &b->state[0][context], !!v);
1672
run= runs[run_index++];
1674
if(run_index <= max_index)
1675
put_symbol2(&s->c, b->state[1], run, 3);
1683
int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1684
int l2= 2*ABS(l) + (l<0);
1685
int t2= 2*ABS(t) + (t<0);
1687
put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1688
put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1696
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1697
// encode_subband_qtree(s, b, src, parent, stride, orientation);
1698
// encode_subband_z0run(s, b, src, parent, stride, orientation);
1699
return encode_subband_c0run(s, b, src, parent, stride, orientation);
1700
// encode_subband_dzr(s, b, src, parent, stride, orientation);
1703
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1704
const int w= b->width;
1705
const int h= b->height;
1710
x_and_coeff *xc= b->x_coeff;
1711
x_and_coeff *prev_xc= NULL;
1712
x_and_coeff *prev2_xc= xc;
1713
x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1714
x_and_coeff *prev_parent_xc= parent_xc;
1716
runs= get_symbol2(&s->c, b->state[30], 0);
1717
if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1722
int lt=0, t=0, rt=0;
1724
if(y && prev_xc->x == 0){
1736
if(prev_xc->x == x + 1)
1742
if(x>>1 > parent_xc->x){
1745
if(x>>1 == parent_xc->x){
1746
p= parent_xc->coeff;
1749
if(/*ll|*/l|lt|t|rt|p){
1750
int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1752
v=get_rac(&s->c, &b->state[0][context]);
1754
v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1755
v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1762
if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1764
v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1765
v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1774
if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1775
else max_run= FFMIN(run, w-x-1);
1777
max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1783
(xc++)->x= w+1; //end marker
1789
while(parent_xc->x != parent->width+1)
1792
prev_parent_xc= parent_xc;
1794
parent_xc= prev_parent_xc;
1799
(xc++)->x= w+1; //end marker
1803
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1804
const int w= b->width;
1806
const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1807
int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1808
int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1813
if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1818
/* If we are on the second or later slice, restore our index. */
1820
new_index = save_state[0];
1823
for(y=start_y; y<h; y++){
1826
DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1827
memset(line, 0, b->width*sizeof(DWTELEM));
1828
v = b->x_coeff[new_index].coeff;
1829
x = b->x_coeff[new_index++].x;
1832
register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1833
register int u= -(v&1);
1834
line[x] = (t^u) - u;
1836
v = b->x_coeff[new_index].coeff;
1837
x = b->x_coeff[new_index++].x;
1840
if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1841
STOP_TIMER("decode_subband")
1844
/* Save our variables for the next slice. */
1845
save_state[0] = new_index;
1850
static void reset_contexts(SnowContext *s){
1851
int plane_index, level, orientation;
1853
for(plane_index=0; plane_index<3; plane_index++){
1854
for(level=0; level<s->spatial_decomposition_count; level++){
1855
for(orientation=level ? 1:0; orientation<4; orientation++){
1856
memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1860
memset(s->header_state, MID_STATE, sizeof(s->header_state));
1861
memset(s->block_state, MID_STATE, sizeof(s->block_state));
1864
static int alloc_blocks(SnowContext *s){
1865
int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1866
int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1871
s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1875
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1876
uint8_t *bytestream= d->bytestream;
1877
uint8_t *bytestream_start= d->bytestream_start;
1879
d->bytestream= bytestream;
1880
d->bytestream_start= bytestream_start;
1883
//near copy & paste from dsputil, FIXME
1884
static int pix_sum(uint8_t * pix, int line_size, int w)
1889
for (i = 0; i < w; i++) {
1890
for (j = 0; j < w; j++) {
1894
pix += line_size - w;
1899
//near copy & paste from dsputil, FIXME
1900
static int pix_norm1(uint8_t * pix, int line_size, int w)
1903
uint32_t *sq = squareTbl + 256;
1906
for (i = 0; i < w; i++) {
1907
for (j = 0; j < w; j ++) {
1911
pix += line_size - w;
1916
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1917
const int w= s->b_width << s->block_max_depth;
1918
const int rem_depth= s->block_max_depth - level;
1919
const int index= (x + y*w) << rem_depth;
1920
const int block_w= 1<<rem_depth;
1933
for(j=0; j<block_w; j++){
1934
for(i=0; i<block_w; i++){
1935
s->block[index + i + j*w]= block;
1940
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1941
const int offset[3]= {
1943
((y*c->uvstride + x)>>1),
1944
((y*c->uvstride + x)>>1),
1948
c->src[0][i]= src [i];
1949
c->ref[0][i]= ref [i] + offset[i];
1954
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1955
BlockNode *left, BlockNode *top, BlockNode *tr){
1956
if(s->ref_frames == 1){
1957
*mx = mid_pred(left->mx, top->mx, tr->mx);
1958
*my = mid_pred(left->my, top->my, tr->my);
1960
const int *scale = scale_mv_ref[ref];
1961
*mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1962
top ->mx * scale[top ->ref] + 128 >>8,
1963
tr ->mx * scale[tr ->ref] + 128 >>8);
1964
*my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1965
top ->my * scale[top ->ref] + 128 >>8,
1966
tr ->my * scale[tr ->ref] + 128 >>8);
1973
#define P_TOPRIGHT P[3]
1974
#define P_MEDIAN P[4]
1976
#define FLAG_QPEL 1 //must be 1
1978
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1979
uint8_t p_buffer[1024];
1980
uint8_t i_buffer[1024];
1981
uint8_t p_state[sizeof(s->block_state)];
1982
uint8_t i_state[sizeof(s->block_state)];
1984
uint8_t *pbbak= s->c.bytestream;
1985
uint8_t *pbbak_start= s->c.bytestream_start;
1986
int score, score2, iscore, i_len, p_len, block_s, sum;
1987
const int w= s->b_width << s->block_max_depth;
1988
const int h= s->b_height << s->block_max_depth;
1989
const int rem_depth= s->block_max_depth - level;
1990
const int index= (x + y*w) << rem_depth;
1991
const int block_w= 1<<(LOG2_MB_SIZE - level);
1992
int trx= (x+1)<<rem_depth;
1993
int try= (y+1)<<rem_depth;
1994
BlockNode *left = x ? &s->block[index-1] : &null_block;
1995
BlockNode *top = y ? &s->block[index-w] : &null_block;
1996
BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1997
BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1998
BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1999
BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2000
int pl = left->color[0];
2001
int pcb= left->color[1];
2002
int pcr= left->color[2];
2006
const int stride= s->current_picture.linesize[0];
2007
const int uvstride= s->current_picture.linesize[1];
2008
uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2009
s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2010
s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2012
int16_t last_mv[3][2];
2013
int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2014
const int shift= 1+qpel;
2015
MotionEstContext *c= &s->m.me;
2016
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2017
int mx_context= av_log2(2*ABS(left->mx - top->mx));
2018
int my_context= av_log2(2*ABS(left->my - top->my));
2019
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2020
int ref, best_ref, ref_score, ref_mx, ref_my;
2022
assert(sizeof(s->block_state) >= 256);
2024
set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2028
// clip predictors / edge ?
2030
P_LEFT[0]= left->mx;
2031
P_LEFT[1]= left->my;
2034
P_TOPRIGHT[0]= tr->mx;
2035
P_TOPRIGHT[1]= tr->my;
2037
last_mv[0][0]= s->block[index].mx;
2038
last_mv[0][1]= s->block[index].my;
2039
last_mv[1][0]= right->mx;
2040
last_mv[1][1]= right->my;
2041
last_mv[2][0]= bottom->mx;
2042
last_mv[2][1]= bottom->my;
2049
assert(s->m.me. stride == stride);
2050
assert(s->m.me.uvstride == uvstride);
2052
c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2053
c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2054
c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2055
c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2057
c->xmin = - x*block_w - 16+2;
2058
c->ymin = - y*block_w - 16+2;
2059
c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2060
c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2062
if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2063
if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2064
if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2065
if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2066
if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2067
if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2068
if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2070
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2071
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2074
c->pred_x= P_LEFT[0];
2075
c->pred_y= P_LEFT[1];
2077
c->pred_x = P_MEDIAN[0];
2078
c->pred_y = P_MEDIAN[1];
2083
for(ref=0; ref<s->ref_frames; ref++){
2084
init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2086
ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2087
(1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2089
assert(ref_mx >= c->xmin);
2090
assert(ref_mx <= c->xmax);
2091
assert(ref_my >= c->ymin);
2092
assert(ref_my <= c->ymax);
2094
ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2095
ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2096
ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2097
if(s->ref_mvs[ref]){
2098
s->ref_mvs[ref][index][0]= ref_mx;
2099
s->ref_mvs[ref][index][1]= ref_my;
2100
s->ref_scores[ref][index]= ref_score;
2102
if(score > ref_score){
2109
//FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2113
pc.bytestream_start=
2114
pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2115
memcpy(p_state, s->block_state, sizeof(s->block_state));
2117
if(level!=s->block_max_depth)
2118
put_rac(&pc, &p_state[4 + s_context], 1);
2119
put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2120
if(s->ref_frames > 1)
2121
put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2122
pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2123
put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2124
put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2125
p_len= pc.bytestream - pc.bytestream_start;
2126
score += (s->lambda2*(p_len*8
2127
+ (pc.outstanding_count - s->c.outstanding_count)*8
2128
+ (-av_log2(pc.range) + av_log2(s->c.range))
2129
))>>FF_LAMBDA_SHIFT;
2131
block_s= block_w*block_w;
2132
sum = pix_sum(current_data[0], stride, block_w);
2133
l= (sum + block_s/2)/block_s;
2134
iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2136
block_s= block_w*block_w>>2;
2137
sum = pix_sum(current_data[1], uvstride, block_w>>1);
2138
cb= (sum + block_s/2)/block_s;
2139
// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2140
sum = pix_sum(current_data[2], uvstride, block_w>>1);
2141
cr= (sum + block_s/2)/block_s;
2142
// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2145
ic.bytestream_start=
2146
ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2147
memcpy(i_state, s->block_state, sizeof(s->block_state));
2148
if(level!=s->block_max_depth)
2149
put_rac(&ic, &i_state[4 + s_context], 1);
2150
put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2151
put_symbol(&ic, &i_state[32], l-pl , 1);
2152
put_symbol(&ic, &i_state[64], cb-pcb, 1);
2153
put_symbol(&ic, &i_state[96], cr-pcr, 1);
2154
i_len= ic.bytestream - ic.bytestream_start;
2155
iscore += (s->lambda2*(i_len*8
2156
+ (ic.outstanding_count - s->c.outstanding_count)*8
2157
+ (-av_log2(ic.range) + av_log2(s->c.range))
2158
))>>FF_LAMBDA_SHIFT;
2160
// assert(score==256*256*256*64-1);
2161
assert(iscore < 255*255*256 + s->lambda2*10);
2162
assert(iscore >= 0);
2163
assert(l>=0 && l<=255);
2164
assert(pl>=0 && pl<=255);
2167
int varc= iscore >> 8;
2168
int vard= score >> 8;
2169
if (vard <= 64 || vard < varc)
2170
c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2172
c->scene_change_score+= s->m.qscale;
2175
if(level!=s->block_max_depth){
2176
put_rac(&s->c, &s->block_state[4 + s_context], 0);
2177
score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2178
score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2179
score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2180
score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2181
score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2183
if(score2 < score && score2 < iscore)
2188
pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2189
memcpy(pbbak, i_buffer, i_len);
2191
s->c.bytestream_start= pbbak_start;
2192
s->c.bytestream= pbbak + i_len;
2193
set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2194
memcpy(s->block_state, i_state, sizeof(s->block_state));
2197
memcpy(pbbak, p_buffer, p_len);
2199
s->c.bytestream_start= pbbak_start;
2200
s->c.bytestream= pbbak + p_len;
2201
set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2202
memcpy(s->block_state, p_state, sizeof(s->block_state));
2207
static always_inline int same_block(BlockNode *a, BlockNode *b){
2208
if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2209
return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2211
return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2215
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2216
const int w= s->b_width << s->block_max_depth;
2217
const int rem_depth= s->block_max_depth - level;
2218
const int index= (x + y*w) << rem_depth;
2219
int trx= (x+1)<<rem_depth;
2220
BlockNode *b= &s->block[index];
2221
BlockNode *left = x ? &s->block[index-1] : &null_block;
2222
BlockNode *top = y ? &s->block[index-w] : &null_block;
2223
BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2224
BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2225
int pl = left->color[0];
2226
int pcb= left->color[1];
2227
int pcr= left->color[2];
2229
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2230
int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2231
int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2232
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2235
set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2239
if(level!=s->block_max_depth){
2240
if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2241
put_rac(&s->c, &s->block_state[4 + s_context], 1);
2243
put_rac(&s->c, &s->block_state[4 + s_context], 0);
2244
encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2245
encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2246
encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2247
encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2251
if(b->type & BLOCK_INTRA){
2252
pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2253
put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2254
put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2255
put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2256
put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2257
set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2259
pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2260
put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2261
if(s->ref_frames > 1)
2262
put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2263
put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2264
put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2265
set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2269
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2270
const int w= s->b_width << s->block_max_depth;
2271
const int rem_depth= s->block_max_depth - level;
2272
const int index= (x + y*w) << rem_depth;
2273
int trx= (x+1)<<rem_depth;
2274
BlockNode *left = x ? &s->block[index-1] : &null_block;
2275
BlockNode *top = y ? &s->block[index-w] : &null_block;
2276
BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2277
BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2278
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2281
set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2285
if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2287
int l = left->color[0];
2288
int cb= left->color[1];
2289
int cr= left->color[2];
2290
int mx= mid_pred(left->mx, top->mx, tr->mx);
2291
int my= mid_pred(left->my, top->my, tr->my);
2293
int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2294
int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2295
int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2297
type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2300
pred_mv(s, &mx, &my, 0, left, top, tr);
2301
l += get_symbol(&s->c, &s->block_state[32], 1);
2302
cb+= get_symbol(&s->c, &s->block_state[64], 1);
2303
cr+= get_symbol(&s->c, &s->block_state[96], 1);
2305
if(s->ref_frames > 1)
2306
ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2307
pred_mv(s, &mx, &my, ref, left, top, tr);
2308
mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2309
my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2311
set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2313
decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2314
decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2315
decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2316
decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2320
static void encode_blocks(SnowContext *s){
2325
if(s->avctx->me_method == ME_ITER && !s->keyframe)
2329
if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2330
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2334
if(s->avctx->me_method == ME_ITER)
2335
encode_q_branch2(s, 0, x, y);
2337
encode_q_branch (s, 0, x, y);
2342
static void decode_blocks(SnowContext *s){
2349
decode_q_branch(s, 0, x, y);
2354
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2357
for(y=0; y < b_h+5; y++){
2358
for(x=0; x < b_w; x++){
2365
// int am= 9*(a1+a2) - (a0+a3);
2366
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2367
// int am= 18*(a2+a3) - 2*(a1+a4);
2368
// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2369
// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2371
// if(b_w==16) am= 8*(a1+a2);
2373
if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2374
else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2376
/* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2377
if(am&(~255)) am= ~(am>>31);
2381
/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2382
else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2383
else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2384
else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2389
tmp -= (b_h+5)*stride;
2391
for(y=0; y < b_h; y++){
2392
for(x=0; x < b_w; x++){
2393
int a0= tmp[x + 0*stride];
2394
int a1= tmp[x + 1*stride];
2395
int a2= tmp[x + 2*stride];
2396
int a3= tmp[x + 3*stride];
2397
int a4= tmp[x + 4*stride];
2398
int a5= tmp[x + 5*stride];
2399
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2400
// int am= 18*(a2+a3) - 2*(a1+a4);
2401
/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2402
int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2404
// if(b_w==16) am= 8*(a1+a2);
2406
if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2407
else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2409
if(am&(~255)) am= ~(am>>31);
2412
/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2413
else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2414
else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2415
else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2420
STOP_TIMER("mc_block")
2423
#define mca(dx,dy,b_w)\
2424
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2425
uint8_t tmp[stride*(b_w+5)];\
2427
mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2439
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2440
if(block->type & BLOCK_INTRA){
2442
const int color = block->color[plane_index];
2443
const int color4= color*0x01010101;
2445
for(y=0; y < b_h; y++){
2446
*(uint32_t*)&dst[0 + y*stride]= color4;
2447
*(uint32_t*)&dst[4 + y*stride]= color4;
2448
*(uint32_t*)&dst[8 + y*stride]= color4;
2449
*(uint32_t*)&dst[12+ y*stride]= color4;
2450
*(uint32_t*)&dst[16+ y*stride]= color4;
2451
*(uint32_t*)&dst[20+ y*stride]= color4;
2452
*(uint32_t*)&dst[24+ y*stride]= color4;
2453
*(uint32_t*)&dst[28+ y*stride]= color4;
2456
for(y=0; y < b_h; y++){
2457
*(uint32_t*)&dst[0 + y*stride]= color4;
2458
*(uint32_t*)&dst[4 + y*stride]= color4;
2459
*(uint32_t*)&dst[8 + y*stride]= color4;
2460
*(uint32_t*)&dst[12+ y*stride]= color4;
2463
for(y=0; y < b_h; y++){
2464
*(uint32_t*)&dst[0 + y*stride]= color4;
2465
*(uint32_t*)&dst[4 + y*stride]= color4;
2468
for(y=0; y < b_h; y++){
2469
*(uint32_t*)&dst[0 + y*stride]= color4;
2472
for(y=0; y < b_h; y++){
2473
for(x=0; x < b_w; x++){
2474
dst[x + y*stride]= color;
2479
uint8_t *src= s->last_picture[block->ref].data[plane_index];
2480
const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2481
int mx= block->mx*scale;
2482
int my= block->my*scale;
2483
const int dx= mx&15;
2484
const int dy= my&15;
2485
const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2488
src += sx + sy*stride;
2489
if( (unsigned)sx >= w - b_w - 4
2490
|| (unsigned)sy >= h - b_h - 4){
2491
ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2494
// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2495
// assert(!(b_w&(b_w-1)));
2496
assert(b_w>1 && b_h>1);
2497
assert(tab_index>=0 && tab_index<4 || b_w==32);
2498
if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2499
mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2502
for(y=0; y<b_h; y+=16){
2503
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2504
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2507
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2508
else if(b_w==2*b_h){
2509
s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2510
s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2513
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2514
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2519
void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2520
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2523
for(y=0; y<b_h; y++){
2524
//FIXME ugly missue of obmc_stride
2525
uint8_t *obmc1= obmc + y*obmc_stride;
2526
uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2527
uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2528
uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2529
dst = slice_buffer_get_line(sb, src_y + y);
2530
for(x=0; x<b_w; x++){
2531
int v= obmc1[x] * block[3][x + y*src_stride]
2532
+obmc2[x] * block[2][x + y*src_stride]
2533
+obmc3[x] * block[1][x + y*src_stride]
2534
+obmc4[x] * block[0][x + y*src_stride];
2536
v <<= 8 - LOG2_OBMC_MAX;
2538
v += 1<<(7 - FRAC_BITS);
2539
v >>= 8 - FRAC_BITS;
2542
v += dst[x + src_x];
2543
v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2544
if(v&(~255)) v= ~(v>>31);
2545
dst8[x + y*src_stride] = v;
2547
dst[x + src_x] -= v;
2553
//FIXME name clenup (b_w, block_w, b_width stuff)
2554
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2555
DWTELEM * dst = NULL;
2556
const int b_width = s->b_width << s->block_max_depth;
2557
const int b_height= s->b_height << s->block_max_depth;
2558
const int b_stride= b_width;
2559
BlockNode *lt= &s->block[b_x + b_y*b_stride];
2560
BlockNode *rt= lt+1;
2561
BlockNode *lb= lt+b_stride;
2562
BlockNode *rb= lb+1;
2564
int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2565
uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2572
}else if(b_x + 1 >= b_width){
2579
}else if(b_y + 1 >= b_height){
2584
if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2588
}else if(src_x + b_w > w){
2592
obmc -= src_y*obmc_stride;
2595
}else if(src_y + b_h> h){
2599
if(b_w<=0 || b_h<=0) return;
2601
assert(src_stride > 2*MB_SIZE + 5);
2602
// old_dst += src_x + src_y*dst_stride;
2603
dst8+= src_x + src_y*src_stride;
2604
// src += src_x + src_y*src_stride;
2606
ptmp= tmp + 3*tmp_step;
2609
pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2611
if(same_block(lt, rt)){
2616
pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2619
if(same_block(lt, lb)){
2621
}else if(same_block(rt, lb)){
2626
pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2629
if(same_block(lt, rb) ){
2631
}else if(same_block(rt, rb)){
2633
}else if(same_block(lb, rb)){
2637
pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2640
for(y=0; y<b_h; y++){
2641
for(x=0; x<b_w; x++){
2642
int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2643
if(add) dst[x + y*dst_stride] += v;
2644
else dst[x + y*dst_stride] -= v;
2647
for(y=0; y<b_h; y++){
2648
uint8_t *obmc2= obmc + (obmc_stride>>1);
2649
for(x=0; x<b_w; x++){
2650
int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2651
if(add) dst[x + y*dst_stride] += v;
2652
else dst[x + y*dst_stride] -= v;
2655
for(y=0; y<b_h; y++){
2656
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2657
for(x=0; x<b_w; x++){
2658
int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2659
if(add) dst[x + y*dst_stride] += v;
2660
else dst[x + y*dst_stride] -= v;
2663
for(y=0; y<b_h; y++){
2664
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2665
uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2666
for(x=0; x<b_w; x++){
2667
int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2668
if(add) dst[x + y*dst_stride] += v;
2669
else dst[x + y*dst_stride] -= v;
2677
s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2678
STOP_TIMER("Inner add y block")
2683
//FIXME name clenup (b_w, block_w, b_width stuff)
2684
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2685
const int b_width = s->b_width << s->block_max_depth;
2686
const int b_height= s->b_height << s->block_max_depth;
2687
const int b_stride= b_width;
2688
BlockNode *lt= &s->block[b_x + b_y*b_stride];
2689
BlockNode *rt= lt+1;
2690
BlockNode *lb= lt+b_stride;
2691
BlockNode *rb= lb+1;
2693
int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2694
uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2701
}else if(b_x + 1 >= b_width){
2708
}else if(b_y + 1 >= b_height){
2713
if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2719
}else if(src_x + b_w > w){
2723
obmc -= src_y*obmc_stride;
2726
dst -= src_y*dst_stride;
2728
}else if(src_y + b_h> h){
2732
if(b_w<=0 || b_h<=0) return;
2734
assert(src_stride > 2*MB_SIZE + 5);
2736
dst += src_x + src_y*dst_stride;
2737
dst8+= src_x + src_y*src_stride;
2738
// src += src_x + src_y*src_stride;
2740
ptmp= tmp + 3*tmp_step;
2743
pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2745
if(same_block(lt, rt)){
2750
pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2753
if(same_block(lt, lb)){
2755
}else if(same_block(rt, lb)){
2760
pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2763
if(same_block(lt, rb) ){
2765
}else if(same_block(rt, rb)){
2767
}else if(same_block(lb, rb)){
2771
pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2774
for(y=0; y<b_h; y++){
2775
for(x=0; x<b_w; x++){
2776
int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2777
if(add) dst[x + y*dst_stride] += v;
2778
else dst[x + y*dst_stride] -= v;
2781
for(y=0; y<b_h; y++){
2782
uint8_t *obmc2= obmc + (obmc_stride>>1);
2783
for(x=0; x<b_w; x++){
2784
int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2785
if(add) dst[x + y*dst_stride] += v;
2786
else dst[x + y*dst_stride] -= v;
2789
for(y=0; y<b_h; y++){
2790
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2791
for(x=0; x<b_w; x++){
2792
int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2793
if(add) dst[x + y*dst_stride] += v;
2794
else dst[x + y*dst_stride] -= v;
2797
for(y=0; y<b_h; y++){
2798
uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2799
uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2800
for(x=0; x<b_w; x++){
2801
int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2802
if(add) dst[x + y*dst_stride] += v;
2803
else dst[x + y*dst_stride] -= v;
2807
for(y=0; y<b_h; y++){
2808
//FIXME ugly missue of obmc_stride
2809
uint8_t *obmc1= obmc + y*obmc_stride;
2810
uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2811
uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2812
uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2813
for(x=0; x<b_w; x++){
2814
int v= obmc1[x] * block[3][x + y*src_stride]
2815
+obmc2[x] * block[2][x + y*src_stride]
2816
+obmc3[x] * block[1][x + y*src_stride]
2817
+obmc4[x] * block[0][x + y*src_stride];
2819
v <<= 8 - LOG2_OBMC_MAX;
2821
v += 1<<(7 - FRAC_BITS);
2822
v >>= 8 - FRAC_BITS;
2825
v += dst[x + y*dst_stride];
2826
v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2827
if(v&(~255)) v= ~(v>>31);
2828
dst8[x + y*src_stride] = v;
2830
dst[x + y*dst_stride] -= v;
2837
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2838
Plane *p= &s->plane[plane_index];
2839
const int mb_w= s->b_width << s->block_max_depth;
2840
const int mb_h= s->b_height << s->block_max_depth;
2842
int block_size = MB_SIZE >> s->block_max_depth;
2843
int block_w = plane_index ? block_size/2 : block_size;
2844
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2845
int obmc_stride= plane_index ? block_size : 2*block_size;
2846
int ref_stride= s->current_picture.linesize[plane_index];
2847
uint8_t *dst8= s->current_picture.data[plane_index];
2852
if(s->keyframe || (s->avctx->debug&512)){
2857
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2859
// DWTELEM * line = slice_buffer_get_line(sb, y);
2860
DWTELEM * line = sb->line[y];
2863
// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2864
int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2866
if(v&(~255)) v= ~(v>>31);
2867
dst8[x + y*ref_stride]= v;
2871
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2873
// DWTELEM * line = slice_buffer_get_line(sb, y);
2874
DWTELEM * line = sb->line[y];
2877
line[x] -= 128 << FRAC_BITS;
2878
// buf[x + y*w]-= 128<<FRAC_BITS;
2886
for(mb_x=0; mb_x<=mb_w; mb_x++){
2889
add_yblock_buffered(s, sb, old_buffer, dst8, obmc,
2890
block_w*mb_x - block_w/2,
2891
block_w*mb_y - block_w/2,
2894
w, ref_stride, obmc_stride,
2898
STOP_TIMER("add_yblock")
2901
STOP_TIMER("predict_slice")
2904
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2905
Plane *p= &s->plane[plane_index];
2906
const int mb_w= s->b_width << s->block_max_depth;
2907
const int mb_h= s->b_height << s->block_max_depth;
2909
int block_size = MB_SIZE >> s->block_max_depth;
2910
int block_w = plane_index ? block_size/2 : block_size;
2911
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2912
const int obmc_stride= plane_index ? block_size : 2*block_size;
2913
int ref_stride= s->current_picture.linesize[plane_index];
2914
uint8_t *dst8= s->current_picture.data[plane_index];
2919
if(s->keyframe || (s->avctx->debug&512)){
2924
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2926
int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2928
if(v&(~255)) v= ~(v>>31);
2929
dst8[x + y*ref_stride]= v;
2933
for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2935
buf[x + y*w]-= 128<<FRAC_BITS;
2943
for(mb_x=0; mb_x<=mb_w; mb_x++){
2946
add_yblock(s, buf, dst8, obmc,
2947
block_w*mb_x - block_w/2,
2948
block_w*mb_y - block_w/2,
2951
w, ref_stride, obmc_stride,
2953
add, 1, plane_index);
2955
STOP_TIMER("add_yblock")
2958
STOP_TIMER("predict_slice")
2961
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2962
const int mb_h= s->b_height << s->block_max_depth;
2964
for(mb_y=0; mb_y<=mb_h; mb_y++)
2965
predict_slice(s, buf, plane_index, add, mb_y);
2968
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2970
Plane *p= &s->plane[plane_index];
2971
const int block_size = MB_SIZE >> s->block_max_depth;
2972
const int block_w = plane_index ? block_size/2 : block_size;
2973
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2974
const int obmc_stride= plane_index ? block_size : 2*block_size;
2975
const int ref_stride= s->current_picture.linesize[plane_index];
2976
uint8_t *src= s-> input_picture.data[plane_index];
2977
DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2978
const int b_stride = s->b_width << s->block_max_depth;
2979
const int w= p->width;
2980
const int h= p->height;
2981
int index= mb_x + mb_y*b_stride;
2982
BlockNode *b= &s->block[index];
2983
BlockNode backup= *b;
2987
b->type|= BLOCK_INTRA;
2988
b->color[plane_index]= 0;
2989
memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2992
int mb_x2= mb_x + (i &1) - 1;
2993
int mb_y2= mb_y + (i>>1) - 1;
2994
int x= block_w*mb_x2 + block_w/2;
2995
int y= block_w*mb_y2 + block_w/2;
2997
add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2998
x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3000
for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3001
for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3002
int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3003
int obmc_v= obmc[index];
3005
if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3006
if(x<0) obmc_v += obmc[index + block_w];
3007
if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3008
if(x+block_w>w) obmc_v += obmc[index - block_w];
3009
//FIXME precalc this or simplify it somehow else
3011
d = -dst[index] + (1<<(FRAC_BITS-1));
3013
ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3014
aa += obmc_v * obmc_v; //FIXME precalclate this
3020
return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3023
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3024
const int b_stride = s->b_width << s->block_max_depth;
3025
const int b_height = s->b_height<< s->block_max_depth;
3026
int index= x + y*b_stride;
3027
BlockNode *b = &s->block[index];
3028
BlockNode *left = x ? &s->block[index-1] : &null_block;
3029
BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3030
BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3031
BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3033
// int mx_context= av_log2(2*ABS(left->mx - top->mx));
3034
// int my_context= av_log2(2*ABS(left->my - top->my));
3036
if(x<0 || x>=b_stride || y>=b_height)
3043
00001XXXX 15-30 8-15
3045
//FIXME try accurate rate
3046
//FIXME intra and inter predictors if surrounding blocks arent the same type
3047
if(b->type & BLOCK_INTRA){
3048
return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3049
+ av_log2(2*ABS(left->color[1] - b->color[1]))
3050
+ av_log2(2*ABS(left->color[2] - b->color[2])));
3052
pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3055
return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
3056
+ av_log2(2*ABS(dmy))
3057
+ av_log2(2*b->ref));
3061
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3062
Plane *p= &s->plane[plane_index];
3063
const int block_size = MB_SIZE >> s->block_max_depth;
3064
const int block_w = plane_index ? block_size/2 : block_size;
3065
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3066
const int obmc_stride= plane_index ? block_size : 2*block_size;
3067
const int ref_stride= s->current_picture.linesize[plane_index];
3068
uint8_t *dst= s->current_picture.data[plane_index];
3069
uint8_t *src= s-> input_picture.data[plane_index];
3070
DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3071
uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3072
uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3073
const int b_stride = s->b_width << s->block_max_depth;
3074
const int b_height = s->b_height<< s->block_max_depth;
3075
const int w= p->width;
3076
const int h= p->height;
3079
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3080
int sx= block_w*mb_x - block_w/2;
3081
int sy= block_w*mb_y - block_w/2;
3082
int x0= FFMAX(0,-sx);
3083
int y0= FFMAX(0,-sy);
3084
int x1= FFMIN(block_w*2, w-sx);
3085
int y1= FFMIN(block_w*2, h-sy);
3088
pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3090
for(y=y0; y<y1; y++){
3091
const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3092
const DWTELEM *pred1 = pred + y*obmc_stride;
3093
uint8_t *cur1 = cur + y*ref_stride;
3094
uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3095
for(x=x0; x<x1; x++){
3096
int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3097
v = (v + pred1[x]) >> FRAC_BITS;
3098
if(v&(~255)) v= ~(v>>31);
3103
/* copy the regions where obmc[] = (uint8_t)256 */
3104
if(LOG2_OBMC_MAX == 8
3105
&& (mb_x == 0 || mb_x == b_stride-1)
3106
&& (mb_y == 0 || mb_y == b_height-1)){
3115
for(y=y0; y<y1; y++)
3116
memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3120
/* FIXME rearrange dsputil to fit 32x32 cmp functions */
3121
/* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3122
/* FIXME cmps overlap but don't cover the wavelet's whole support,
3123
* so improving the score of one block is not strictly guaranteed to
3124
* improve the score of the whole frame, so iterative motion est
3125
* doesn't always converge. */
3126
if(s->avctx->me_cmp == FF_CMP_W97)
3127
distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3128
else if(s->avctx->me_cmp == FF_CMP_W53)
3129
distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3133
int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3134
distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3139
distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3148
rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3150
if(mb_x == b_stride-2)
3151
rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3153
return distortion + rate*penalty_factor;
3156
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3158
Plane *p= &s->plane[plane_index];
3159
const int block_size = MB_SIZE >> s->block_max_depth;
3160
const int block_w = plane_index ? block_size/2 : block_size;
3161
const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3162
const int obmc_stride= plane_index ? block_size : 2*block_size;
3163
const int ref_stride= s->current_picture.linesize[plane_index];
3164
uint8_t *dst= s->current_picture.data[plane_index];
3165
uint8_t *src= s-> input_picture.data[plane_index];
3166
const static DWTELEM zero_dst[4096]; //FIXME
3167
const int b_stride = s->b_width << s->block_max_depth;
3168
const int b_height = s->b_height<< s->block_max_depth;
3169
const int w= p->width;
3170
const int h= p->height;
3173
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3176
int mb_x2= mb_x + (i%3) - 1;
3177
int mb_y2= mb_y + (i/3) - 1;
3178
int x= block_w*mb_x2 + block_w/2;
3179
int y= block_w*mb_y2 + block_w/2;
3181
add_yblock(s, zero_dst, dst, obmc,
3182
x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3184
//FIXME find a cleaner/simpler way to skip the outside stuff
3185
for(y2= y; y2<0; y2++)
3186
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3187
for(y2= h; y2<y+block_w; y2++)
3188
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3190
for(y2= y; y2<y+block_w; y2++)
3191
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3194
for(y2= y; y2<y+block_w; y2++)
3195
memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3198
assert(block_w== 8 || block_w==16);
3199
distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3203
BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3204
int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3212
rate = get_block_bits(s, mb_x, mb_y, 2);
3213
for(i=merged?4:0; i<9; i++){
3214
static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3215
rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3218
return distortion + rate*penalty_factor;
3221
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3222
const int b_stride= s->b_width << s->block_max_depth;
3223
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3224
BlockNode backup= *block;
3225
int rd, index, value;
3227
assert(mb_x>=0 && mb_y>=0);
3228
assert(mb_x<b_stride);
3231
block->color[0] = p[0];
3232
block->color[1] = p[1];
3233
block->color[2] = p[2];
3234
block->type |= BLOCK_INTRA;
3236
index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3237
value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3238
if(s->me_cache[index] == value)
3240
s->me_cache[index]= value;
3244
block->type &= ~BLOCK_INTRA;
3247
rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3259
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3260
static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3261
int p[2] = {p0, p1};
3262
return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3265
static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3266
const int b_stride= s->b_width << s->block_max_depth;
3267
BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3268
BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3269
int rd, index, value;
3271
assert(mb_x>=0 && mb_y>=0);
3272
assert(mb_x<b_stride);
3273
assert(((mb_x|mb_y)&1) == 0);
3275
index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3276
value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3277
if(s->me_cache[index] == value)
3279
s->me_cache[index]= value;
3284
block->type &= ~BLOCK_INTRA;
3285
block[1]= block[b_stride]= block[b_stride+1]= *block;
3287
rd= get_4block_rd(s, mb_x, mb_y, 0);
3294
block[0]= backup[0];
3295
block[1]= backup[1];
3296
block[b_stride]= backup[2];
3297
block[b_stride+1]= backup[3];
3302
static void iterative_me(SnowContext *s){
3303
int pass, mb_x, mb_y;
3304
const int b_width = s->b_width << s->block_max_depth;
3305
const int b_height= s->b_height << s->block_max_depth;
3306
const int b_stride= b_width;
3310
RangeCoder r = s->c;
3311
uint8_t state[sizeof(s->block_state)];
3312
memcpy(state, s->block_state, sizeof(s->block_state));
3313
for(mb_y= 0; mb_y<s->b_height; mb_y++)
3314
for(mb_x= 0; mb_x<s->b_width; mb_x++)
3315
encode_q_branch(s, 0, mb_x, mb_y);
3317
memcpy(s->block_state, state, sizeof(s->block_state));
3320
for(pass=0; pass<25; pass++){
3323
for(mb_y= 0; mb_y<b_height; mb_y++){
3324
for(mb_x= 0; mb_x<b_width; mb_x++){
3325
int dia_change, i, j, ref;
3326
int best_rd= INT_MAX, ref_rd;
3327
BlockNode backup, ref_b;
3328
const int index= mb_x + mb_y * b_stride;
3329
BlockNode *block= &s->block[index];
3330
BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3331
BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3332
BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3333
BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3334
BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3335
BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3336
BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3337
BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3338
const int b_w= (MB_SIZE >> s->block_max_depth);
3339
uint8_t obmc_edged[b_w*2][b_w*2];
3341
if(pass && (block->type & BLOCK_OPT))
3343
block->type |= BLOCK_OPT;
3347
if(!s->me_cache_generation)
3348
memset(s->me_cache, 0, sizeof(s->me_cache));
3349
s->me_cache_generation += 1<<22;
3354
memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3356
for(y=0; y<b_w*2; y++)
3357
memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3358
if(mb_x==b_stride-1)
3359
for(y=0; y<b_w*2; y++)
3360
memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3362
for(x=0; x<b_w*2; x++)
3363
obmc_edged[0][x] += obmc_edged[b_w-1][x];
3364
for(y=1; y<b_w; y++)
3365
memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3367
if(mb_y==b_height-1){
3368
for(x=0; x<b_w*2; x++)
3369
obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3370
for(y=b_w; y<b_w*2-1; y++)
3371
memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3375
//skip stuff outside the picture
3376
if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3378
uint8_t *src= s-> input_picture.data[0];
3379
uint8_t *dst= s->current_picture.data[0];
3380
const int stride= s->current_picture.linesize[0];
3381
const int block_w= MB_SIZE >> s->block_max_depth;
3382
const int sx= block_w*mb_x - block_w/2;
3383
const int sy= block_w*mb_y - block_w/2;
3384
const int w= s->plane[0].width;
3385
const int h= s->plane[0].height;
3389
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3390
for(y=h; y<sy+block_w*2; y++)
3391
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3393
for(y=sy; y<sy+block_w*2; y++)
3394
memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3396
if(sx+block_w*2 > w){
3397
for(y=sy; y<sy+block_w*2; y++)
3398
memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3402
// intra(black) = neighbors' contribution to the current block
3404
color[i]= get_dc(s, mb_x, mb_y, i);
3406
// get previous score (cant be cached due to OBMC)
3407
if(pass > 0 && (block->type&BLOCK_INTRA)){
3408
int color0[3]= {block->color[0], block->color[1], block->color[2]};
3409
check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3411
check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3415
for(ref=0; ref < s->ref_frames; ref++){
3416
int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3417
if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3422
check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3423
check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3425
check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3427
check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3429
check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3431
check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3434
//FIXME avoid subpel interpol / round to nearest integer
3437
for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3439
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3440
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3441
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3442
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3448
static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3451
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3453
//FIXME or try the standard 2 pass qpel or similar
3455
mvr[0][0]= block->mx;
3456
mvr[0][1]= block->my;
3457
if(ref_rd > best_rd){
3465
check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3466
//FIXME RD style color selection
3468
if(!same_block(block, &backup)){
3469
if(tb ) tb ->type &= ~BLOCK_OPT;
3470
if(lb ) lb ->type &= ~BLOCK_OPT;
3471
if(rb ) rb ->type &= ~BLOCK_OPT;
3472
if(bb ) bb ->type &= ~BLOCK_OPT;
3473
if(tlb) tlb->type &= ~BLOCK_OPT;
3474
if(trb) trb->type &= ~BLOCK_OPT;
3475
if(blb) blb->type &= ~BLOCK_OPT;
3476
if(brb) brb->type &= ~BLOCK_OPT;
3481
av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3486
if(s->block_max_depth == 1){
3488
for(mb_y= 0; mb_y<b_height; mb_y+=2){
3489
for(mb_x= 0; mb_x<b_width; mb_x+=2){
3491
int best_rd, init_rd;
3492
const int index= mb_x + mb_y * b_stride;
3495
b[0]= &s->block[index];
3497
b[2]= b[0]+b_stride;
3499
if(same_block(b[0], b[1]) &&
3500
same_block(b[0], b[2]) &&
3501
same_block(b[0], b[3]))
3504
if(!s->me_cache_generation)
3505
memset(s->me_cache, 0, sizeof(s->me_cache));
3506
s->me_cache_generation += 1<<22;
3508
init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3510
//FIXME more multiref search?
3511
check_4block_inter(s, mb_x, mb_y,
3512
(b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3513
(b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3516
if(!(b[i]->type&BLOCK_INTRA))
3517
check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3519
if(init_rd != best_rd)
3523
av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3527
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3528
const int level= b->level;
3529
const int w= b->width;
3530
const int h= b->height;
3531
const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3532
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3533
int x,y, thres1, thres2;
3536
if(s->qlog == LOSSLESS_QLOG) return;
3538
bias= bias ? 0 : (3*qmul)>>3;
3539
thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3545
int i= src[x + y*stride];
3547
if((unsigned)(i+thres1) > thres2){
3550
i/= qmul; //FIXME optimize
3551
src[x + y*stride]= i;
3555
i/= qmul; //FIXME optimize
3556
src[x + y*stride]= -i;
3559
src[x + y*stride]= 0;
3565
int i= src[x + y*stride];
3567
if((unsigned)(i+thres1) > thres2){
3570
i= (i + bias) / qmul; //FIXME optimize
3571
src[x + y*stride]= i;
3575
i= (i + bias) / qmul; //FIXME optimize
3576
src[x + y*stride]= -i;
3579
src[x + y*stride]= 0;
3583
if(level+1 == s->spatial_decomposition_count){
3584
// STOP_TIMER("quantize")
3588
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3589
const int w= b->width;
3590
const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3591
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3592
const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3596
if(s->qlog == LOSSLESS_QLOG) return;
3598
for(y=start_y; y<end_y; y++){
3599
// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3600
DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3604
line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3606
line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3610
if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3611
STOP_TIMER("dquant")
3615
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3616
const int w= b->width;
3617
const int h= b->height;
3618
const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3619
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3620
const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3624
if(s->qlog == LOSSLESS_QLOG) return;
3628
int i= src[x + y*stride];
3630
src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3632
src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3636
if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3637
STOP_TIMER("dquant")
3641
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3642
const int w= b->width;
3643
const int h= b->height;
3646
for(y=h-1; y>=0; y--){
3647
for(x=w-1; x>=0; x--){
3648
int i= x + y*stride;
3652
if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3653
else src[i] -= src[i - 1];
3655
if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3656
else src[i] -= src[i - 1];
3659
if(y) src[i] -= src[i - stride];
3665
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3666
const int w= b->width;
3675
line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3677
for(y=start_y; y<end_y; y++){
3679
// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3680
line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3684
if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3685
else line[x] += line[x - 1];
3687
if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3688
else line[x] += line[x - 1];
3691
if(y) line[x] += prev[x];
3696
// STOP_TIMER("correlate")
3699
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3700
const int w= b->width;
3701
const int h= b->height;
3706
int i= x + y*stride;
3710
if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3711
else src[i] += src[i - 1];
3713
if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3714
else src[i] += src[i - 1];
3717
if(y) src[i] += src[i - stride];
3723
static void encode_header(SnowContext *s){
3724
int plane_index, level, orientation;
3727
memset(kstate, MID_STATE, sizeof(kstate));
3729
put_rac(&s->c, kstate, s->keyframe);
3730
if(s->keyframe || s->always_reset)
3733
put_symbol(&s->c, s->header_state, s->version, 0);
3734
put_rac(&s->c, s->header_state, s->always_reset);
3735
put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3736
put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3737
put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3738
put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3739
put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3740
put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3741
put_rac(&s->c, s->header_state, s->spatial_scalability);
3742
// put_rac(&s->c, s->header_state, s->rate_scalability);
3743
put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3745
for(plane_index=0; plane_index<2; plane_index++){
3746
for(level=0; level<s->spatial_decomposition_count; level++){
3747
for(orientation=level ? 1:0; orientation<4; orientation++){
3748
if(orientation==2) continue;
3749
put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3754
put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3755
put_symbol(&s->c, s->header_state, s->qlog, 1);
3756
put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3757
put_symbol(&s->c, s->header_state, s->qbias, 1);
3758
put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3761
static int decode_header(SnowContext *s){
3762
int plane_index, level, orientation;
3765
memset(kstate, MID_STATE, sizeof(kstate));
3767
s->keyframe= get_rac(&s->c, kstate);
3768
if(s->keyframe || s->always_reset)
3771
s->version= get_symbol(&s->c, s->header_state, 0);
3773
av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3776
s->always_reset= get_rac(&s->c, s->header_state);
3777
s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3778
s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3779
s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3780
s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3781
s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3782
s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3783
s->spatial_scalability= get_rac(&s->c, s->header_state);
3784
// s->rate_scalability= get_rac(&s->c, s->header_state);
3785
s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3787
for(plane_index=0; plane_index<3; plane_index++){
3788
for(level=0; level<s->spatial_decomposition_count; level++){
3789
for(orientation=level ? 1:0; orientation<4; orientation++){
3791
if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3792
else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3793
else q= get_symbol(&s->c, s->header_state, 1);
3794
s->plane[plane_index].band[level][orientation].qlog= q;
3800
s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3801
if(s->spatial_decomposition_type > 2){
3802
av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3806
s->qlog= get_symbol(&s->c, s->header_state, 1);
3807
s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3808
s->qbias= get_symbol(&s->c, s->header_state, 1);
3809
s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3810
if(s->block_max_depth > 1 || s->block_max_depth < 0){
3811
av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3812
s->block_max_depth= 0;
3819
static void init_qexp(void){
3823
for(i=0; i<QROOT; i++){
3825
v *= pow(2, 1.0 / QROOT);
3829
static int common_init(AVCodecContext *avctx){
3830
SnowContext *s = avctx->priv_data;
3832
int level, orientation, plane_index, dec;
3837
dsputil_init(&s->dsp, avctx);
3840
s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3841
s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3842
s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3843
s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3844
s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3845
s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3864
#define mcfh(dx,dy)\
3865
s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3866
s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3867
mc_block_hpel ## dx ## dy ## 16;\
3868
s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3869
s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3870
mc_block_hpel ## dx ## dy ## 8;
3880
dec= s->spatial_decomposition_count= 5;
3881
s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3883
s->chroma_h_shift= 1; //FIXME XXX
3884
s->chroma_v_shift= 1;
3886
// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3888
width= s->avctx->width;
3889
height= s->avctx->height;
3891
s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3893
s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3894
s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3896
for(plane_index=0; plane_index<3; plane_index++){
3897
int w= s->avctx->width;
3898
int h= s->avctx->height;
3901
w>>= s->chroma_h_shift;
3902
h>>= s->chroma_v_shift;
3904
s->plane[plane_index].width = w;
3905
s->plane[plane_index].height= h;
3906
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3907
for(level=s->spatial_decomposition_count-1; level>=0; level--){
3908
for(orientation=level ? 1 : 0; orientation<4; orientation++){
3909
SubBand *b= &s->plane[plane_index].band[level][orientation];
3911
b->buf= s->spatial_dwt_buffer;
3913
b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3914
b->width = (w + !(orientation&1))>>1;
3915
b->height= (h + !(orientation>1))>>1;
3917
b->stride_line = 1 << (s->spatial_decomposition_count - level);
3918
b->buf_x_offset = 0;
3919
b->buf_y_offset = 0;
3923
b->buf_x_offset = (w+1)>>1;
3926
b->buf += b->stride>>1;
3927
b->buf_y_offset = b->stride_line >> 1;
3931
b->parent= &s->plane[plane_index].band[level-1][orientation];
3932
b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3939
for(i=0; i<MAX_REF_FRAMES; i++)
3940
for(j=0; j<MAX_REF_FRAMES; j++)
3941
scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3945
width= s->width= avctx->width;
3946
height= s->height= avctx->height;
3948
assert(width && height);
3950
s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3955
static int qscale2qlog(int qscale){
3956
return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3957
+ 61*QROOT/8; //<64 >60
3960
static void ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3962
/* estimate the frame's complexity as a sum of weighted dwt coefs.
3963
* FIXME we know exact mv bits at this point,
3964
* but ratecontrol isn't set up to include them. */
3965
uint32_t coef_sum= 0;
3966
int level, orientation;
3968
for(level=0; level<s->spatial_decomposition_count; level++){
3969
for(orientation=level ? 1 : 0; orientation<4; orientation++){
3970
SubBand *b= &s->plane[0].band[level][orientation];
3971
DWTELEM *buf= b->buf;
3972
const int w= b->width;
3973
const int h= b->height;
3974
const int stride= b->stride;
3975
const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3976
const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3977
const int qdiv= (1<<16)/qmul;
3980
decorrelate(s, b, buf, stride, 1, 0);
3983
coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3985
correlate(s, b, buf, stride, 1, 0);
3989
/* ugly, ratecontrol just takes a sqrt again */
3990
coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3991
assert(coef_sum < INT_MAX);
3993
if(pict->pict_type == I_TYPE){
3994
s->m.current_picture.mb_var_sum= coef_sum;
3995
s->m.current_picture.mc_mb_var_sum= 0;
3997
s->m.current_picture.mc_mb_var_sum= coef_sum;
3998
s->m.current_picture.mb_var_sum= 0;
4001
pict->quality= ff_rate_estimate_qscale(&s->m, 1);
4002
s->lambda= pict->quality * 3/2;
4003
s->qlog= qscale2qlog(pict->quality);
4006
static void calculate_vissual_weight(SnowContext *s, Plane *p){
4007
int width = p->width;
4008
int height= p->height;
4009
int level, orientation, x, y;
4011
for(level=0; level<s->spatial_decomposition_count; level++){
4012
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4013
SubBand *b= &p->band[level][orientation];
4014
DWTELEM *buf= b->buf;
4017
memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
4018
buf[b->width/2 + b->height/2*b->stride]= 256*256;
4019
ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
4020
for(y=0; y<height; y++){
4021
for(x=0; x<width; x++){
4022
int64_t d= s->spatial_dwt_buffer[x + y*width];
4027
b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
4028
// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
4033
static int encode_init(AVCodecContext *avctx)
4035
SnowContext *s = avctx->priv_data;
4038
if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
4039
av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
4040
"use vstrict=-2 / -strict -2 to use it anyway\n");
4044
if(avctx->prediction_method == DWT_97
4045
&& (avctx->flags & CODEC_FLAG_QSCALE)
4046
&& avctx->global_quality == 0){
4047
av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
4057
s->m.flags = avctx->flags;
4058
s->m.bit_rate= avctx->bit_rate;
4060
s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4061
s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4062
s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4063
s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4064
h263_encode_init(&s->m); //mv_penalty
4066
s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4068
if(avctx->flags&CODEC_FLAG_PASS1){
4069
if(!avctx->stats_out)
4070
avctx->stats_out = av_mallocz(256);
4072
if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4073
if(ff_rate_control_init(&s->m) < 0)
4076
s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4078
for(plane_index=0; plane_index<3; plane_index++){
4079
calculate_vissual_weight(s, &s->plane[plane_index]);
4083
avctx->coded_frame= &s->current_picture;
4084
switch(avctx->pix_fmt){
4085
// case PIX_FMT_YUV444P:
4086
// case PIX_FMT_YUV422P:
4087
case PIX_FMT_YUV420P:
4089
// case PIX_FMT_YUV411P:
4090
// case PIX_FMT_YUV410P:
4091
s->colorspace_type= 0;
4093
/* case PIX_FMT_RGBA32:
4097
av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4100
// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4101
s->chroma_h_shift= 1;
4102
s->chroma_v_shift= 1;
4104
ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4105
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4107
s->avctx->get_buffer(s->avctx, &s->input_picture);
4109
if(s->avctx->me_method == ME_ITER){
4111
int size= s->b_width * s->b_height << 2*s->block_max_depth;
4112
for(i=0; i<s->max_ref_frames; i++){
4113
s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4114
s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4121
static int frame_start(SnowContext *s){
4123
int w= s->avctx->width; //FIXME round up to x16 ?
4124
int h= s->avctx->height;
4126
if(s->current_picture.data[0]){
4127
draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4128
draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4129
draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4132
tmp= s->last_picture[s->max_ref_frames-1];
4133
memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4134
s->last_picture[0]= s->current_picture;
4135
s->current_picture= tmp;
4141
for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4142
if(i && s->last_picture[i-1].key_frame)
4147
s->current_picture.reference= 1;
4148
if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4149
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4153
s->current_picture.key_frame= s->keyframe;
4158
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4159
SnowContext *s = avctx->priv_data;
4160
RangeCoder * const c= &s->c;
4161
AVFrame *pict = data;
4162
const int width= s->avctx->width;
4163
const int height= s->avctx->height;
4164
int level, orientation, plane_index, i, y;
4166
ff_init_range_encoder(c, buf, buf_size);
4167
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4171
for(y=0; y<(height>>shift); y++)
4172
memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4173
&pict->data[i][y * pict->linesize[i]],
4176
s->new_picture = *pict;
4178
s->m.picture_number= avctx->frame_number;
4179
if(avctx->flags&CODEC_FLAG_PASS2){
4181
pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4182
s->keyframe= pict->pict_type==FF_I_TYPE;
4183
if(!(avctx->flags&CODEC_FLAG_QSCALE))
4184
pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4186
s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4188
pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4191
if(s->pass1_rc && avctx->frame_number == 0)
4192
pict->quality= 2*FF_QP2LAMBDA;
4194
s->qlog= qscale2qlog(pict->quality);
4195
s->lambda = pict->quality * 3/2;
4197
if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4198
s->qlog= LOSSLESS_QLOG;
4200
}//else keep previous frame's qlog until after motion est
4204
s->m.current_picture_ptr= &s->m.current_picture;
4205
if(pict->pict_type == P_TYPE){
4206
int block_width = (width +15)>>4;
4207
int block_height= (height+15)>>4;
4208
int stride= s->current_picture.linesize[0];
4210
assert(s->current_picture.data[0]);
4211
assert(s->last_picture[0].data[0]);
4213
s->m.avctx= s->avctx;
4214
s->m.current_picture.data[0]= s->current_picture.data[0];
4215
s->m. last_picture.data[0]= s->last_picture[0].data[0];
4216
s->m. new_picture.data[0]= s-> input_picture.data[0];
4217
s->m. last_picture_ptr= &s->m. last_picture;
4219
s->m. last_picture.linesize[0]=
4220
s->m. new_picture.linesize[0]=
4221
s->m.current_picture.linesize[0]= stride;
4222
s->m.uvlinesize= s->current_picture.linesize[1];
4224
s->m.height= height;
4225
s->m.mb_width = block_width;
4226
s->m.mb_height= block_height;
4227
s->m.mb_stride= s->m.mb_width+1;
4228
s->m.b8_stride= 2*s->m.mb_width+1;
4230
s->m.pict_type= pict->pict_type;
4231
s->m.me_method= s->avctx->me_method;
4232
s->m.me.scene_change_score=0;
4233
s->m.flags= s->avctx->flags;
4234
s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4235
s->m.out_format= FMT_H263;
4236
s->m.unrestricted_mv= 1;
4238
s->m.lambda = s->lambda;
4239
s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4240
s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4242
s->m.dsp= s->dsp; //move
4249
s->m.pict_type = pict->pict_type;
4250
s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4253
s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4255
s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4257
for(plane_index=0; plane_index<3; plane_index++){
4258
Plane *p= &s->plane[plane_index];
4262
// int bits= put_bits_count(&s->c.pb);
4264
if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4266
if(pict->data[plane_index]) //FIXME gray hack
4269
s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4272
predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4275
&& pict->pict_type == P_TYPE
4276
&& !(avctx->flags&CODEC_FLAG_PASS2)
4277
&& s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4278
ff_init_range_encoder(c, buf, buf_size);
4279
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4280
pict->pict_type= FF_I_TYPE;
4282
s->current_picture.key_frame=1;
4287
if(s->qlog == LOSSLESS_QLOG){
4290
s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4295
ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4297
if(s->pass1_rc && plane_index==0)
4298
ratecontrol_1pass(s, pict);
4300
for(level=0; level<s->spatial_decomposition_count; level++){
4301
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4302
SubBand *b= &p->band[level][orientation];
4304
quantize(s, b, b->buf, b->stride, s->qbias);
4306
decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4307
encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4308
assert(b->parent==NULL || b->parent->stride == b->stride*2);
4310
correlate(s, b, b->buf, b->stride, 1, 0);
4313
// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4315
for(level=0; level<s->spatial_decomposition_count; level++){
4316
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4317
SubBand *b= &p->band[level][orientation];
4319
dequantize(s, b, b->buf, b->stride);
4323
ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4324
if(s->qlog == LOSSLESS_QLOG){
4327
s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4332
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4333
STOP_TIMER("pred-conv")}
4336
if(pict->pict_type == I_TYPE){
4339
s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4340
pict->data[plane_index][y*pict->linesize[plane_index] + x];
4344
memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4345
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4348
if(s->avctx->flags&CODEC_FLAG_PSNR){
4351
if(pict->data[plane_index]) //FIXME gray hack
4354
int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4358
s->avctx->error[plane_index] += error;
4359
s->current_picture.error[plane_index] = error;
4363
if(s->last_picture[s->max_ref_frames-1].data[0])
4364
avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4366
s->current_picture.coded_picture_number = avctx->frame_number;
4367
s->current_picture.pict_type = pict->pict_type;
4368
s->current_picture.quality = pict->quality;
4369
s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4370
s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4371
s->m.current_picture.display_picture_number =
4372
s->m.current_picture.coded_picture_number = avctx->frame_number;
4373
s->m.current_picture.quality = pict->quality;
4374
s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4376
ff_rate_estimate_qscale(&s->m, 0);
4377
if(avctx->flags&CODEC_FLAG_PASS1)
4378
ff_write_pass1_stats(&s->m);
4379
s->m.last_pict_type = s->m.pict_type;
4383
return ff_rac_terminate(c);
4386
static void common_end(SnowContext *s){
4387
int plane_index, level, orientation, i;
4389
av_freep(&s->spatial_dwt_buffer);
4391
av_freep(&s->m.me.scratchpad);
4392
av_freep(&s->m.me.map);
4393
av_freep(&s->m.me.score_map);
4394
av_freep(&s->m.obmc_scratchpad);
4396
av_freep(&s->block);
4398
for(i=0; i<MAX_REF_FRAMES; i++){
4399
av_freep(&s->ref_mvs[i]);
4400
av_freep(&s->ref_scores[i]);
4401
if(s->last_picture[i].data[0])
4402
s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4405
for(plane_index=0; plane_index<3; plane_index++){
4406
for(level=s->spatial_decomposition_count-1; level>=0; level--){
4407
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4408
SubBand *b= &s->plane[plane_index].band[level][orientation];
4410
av_freep(&b->x_coeff);
4416
static int encode_end(AVCodecContext *avctx)
4418
SnowContext *s = avctx->priv_data;
4421
av_free(avctx->stats_out);
4426
static int decode_init(AVCodecContext *avctx)
4428
SnowContext *s = avctx->priv_data;
4431
avctx->pix_fmt= PIX_FMT_YUV420P;
4435
block_size = MB_SIZE >> s->block_max_depth;
4436
slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4441
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4442
SnowContext *s = avctx->priv_data;
4443
RangeCoder * const c= &s->c;
4445
AVFrame *picture = data;
4446
int level, orientation, plane_index;
4448
ff_init_range_decoder(c, buf, buf_size);
4449
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4451
s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4453
if(!s->block) alloc_blocks(s);
4456
//keyframe flag dupliaction mess FIXME
4457
if(avctx->debug&FF_DEBUG_PICT_INFO)
4458
av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4462
for(plane_index=0; plane_index<3; plane_index++){
4463
Plane *p= &s->plane[plane_index];
4467
int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4469
if(s->avctx->debug&2048){
4470
memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4471
predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4475
int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4476
s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4482
for(level=0; level<s->spatial_decomposition_count; level++){
4483
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4484
SubBand *b= &p->band[level][orientation];
4485
unpack_coeffs(s, b, b->parent, orientation);
4488
STOP_TIMER("unpack coeffs");
4492
const int mb_h= s->b_height << s->block_max_depth;
4493
const int block_size = MB_SIZE >> s->block_max_depth;
4494
const int block_w = plane_index ? block_size/2 : block_size;
4496
dwt_compose_t cs[MAX_DECOMPOSITIONS];
4501
ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4502
for(mb_y=0; mb_y<=mb_h; mb_y++){
4504
int slice_starty = block_w*mb_y;
4505
int slice_h = block_w*(mb_y+1);
4506
if (!(s->keyframe || s->avctx->debug&512)){
4507
slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4508
slice_h -= (block_w >> 1);
4513
for(level=0; level<s->spatial_decomposition_count; level++){
4514
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4515
SubBand *b= &p->band[level][orientation];
4518
int our_mb_start = mb_y;
4519
int our_mb_end = (mb_y + 1);
4521
start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4522
end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4523
if (!(s->keyframe || s->avctx->debug&512)){
4524
start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4525
end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4527
start_y = FFMIN(b->height, start_y);
4528
end_y = FFMIN(b->height, end_y);
4530
if (start_y != end_y){
4531
if (orientation == 0){
4532
SubBand * correlate_band = &p->band[0][0];
4533
int correlate_end_y = FFMIN(b->height, end_y + 1);
4534
int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4535
decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4536
correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4537
dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4540
decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4544
STOP_TIMER("decode_subband_slice");
4548
for(; yd<slice_h; yd+=4){
4549
ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4551
STOP_TIMER("idwt slice");}
4554
if(s->qlog == LOSSLESS_QLOG){
4555
for(; yq<slice_h && yq<h; yq++){
4556
DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4558
line[x] <<= FRAC_BITS;
4563
predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4565
y = FFMIN(p->height, slice_starty);
4566
end_y = FFMIN(p->height, slice_h);
4568
slice_buffer_release(&s->sb, y++);
4571
slice_buffer_flush(&s->sb);
4573
STOP_TIMER("idwt + predict_slices")}
4578
if(s->last_picture[s->max_ref_frames-1].data[0])
4579
avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4581
if(!(s->avctx->debug&2048))
4582
*picture= s->current_picture;
4584
*picture= s->mconly_picture;
4586
*data_size = sizeof(AVFrame);
4588
bytes_read= c->bytestream - c->bytestream_start;
4589
if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4594
static int decode_end(AVCodecContext *avctx)
4596
SnowContext *s = avctx->priv_data;
4598
slice_buffer_destroy(&s->sb);
4605
AVCodec snow_decoder = {
4609
sizeof(SnowContext),
4614
0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4618
#ifdef CONFIG_ENCODERS
4619
AVCodec snow_encoder = {
4623
sizeof(SnowContext),
4639
int buffer[2][width*height];
4642
s.spatial_decomposition_count=6;
4643
s.spatial_decomposition_type=1;
4645
printf("testing 5/3 DWT\n");
4646
for(i=0; i<width*height; i++)
4647
buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4649
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4650
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4652
for(i=0; i<width*height; i++)
4653
if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4655
printf("testing 9/7 DWT\n");
4656
s.spatial_decomposition_type=0;
4657
for(i=0; i<width*height; i++)
4658
buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4660
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4661
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4663
for(i=0; i<width*height; i++)
4664
if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4667
printf("testing AC coder\n");
4668
memset(s.header_state, 0, sizeof(s.header_state));
4669
ff_init_range_encoder(&s.c, buffer[0], 256*256);
4670
ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4672
for(i=-256; i<256; i++){
4674
put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4675
STOP_TIMER("put_symbol")
4677
ff_rac_terminate(&s.c);
4679
memset(s.header_state, 0, sizeof(s.header_state));
4680
ff_init_range_decoder(&s.c, buffer[0], 256*256);
4681
ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4683
for(i=-256; i<256; i++){
4686
j= get_symbol(&s.c, s.header_state, 1);
4687
STOP_TIMER("get_symbol")
4688
if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4692
int level, orientation, x, y;
4693
int64_t errors[8][4];
4696
memset(errors, 0, sizeof(errors));
4697
s.spatial_decomposition_count=3;
4698
s.spatial_decomposition_type=0;
4699
for(level=0; level<s.spatial_decomposition_count; level++){
4700
for(orientation=level ? 1 : 0; orientation<4; orientation++){
4701
int w= width >> (s.spatial_decomposition_count-level);
4702
int h= height >> (s.spatial_decomposition_count-level);
4703
int stride= width << (s.spatial_decomposition_count-level);
4704
DWTELEM *buf= buffer[0];
4707
if(orientation&1) buf+=w;
4708
if(orientation>1) buf+=stride>>1;
4710
memset(buffer[0], 0, sizeof(int)*width*height);
4711
buf[w/2 + h/2*stride]= 256*256;
4712
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4713
for(y=0; y<height; y++){
4714
for(x=0; x<width; x++){
4715
int64_t d= buffer[0][x + y*width];
4717
if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4719
if(ABS(height/2-y)<9 && level==2) printf("\n");
4721
error= (int)(sqrt(error)+0.5);
4722
errors[level][orientation]= error;
4723
if(g) g=ff_gcd(g, error);
4727
printf("static int const visual_weight[][4]={\n");
4728
for(level=0; level<s.spatial_decomposition_count; level++){
4730
for(orientation=0; orientation<4; orientation++){
4731
printf("%8lld,", errors[level][orientation]/g);
4739
int w= width >> (s.spatial_decomposition_count-level);
4740
int h= height >> (s.spatial_decomposition_count-level);
4741
int stride= width << (s.spatial_decomposition_count-level);
4742
DWTELEM *buf= buffer[0];
4748
memset(buffer[0], 0, sizeof(int)*width*height);
4750
for(y=0; y<height; y++){
4751
for(x=0; x<width; x++){
4752
int tab[4]={0,2,3,1};
4753
buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4756
ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4760
buf[x + y*stride ]=169;
4761
buf[x + y*stride-w]=64;
4764
ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4766
for(y=0; y<height; y++){
4767
for(x=0; x<width; x++){
4768
int64_t d= buffer[0][x + y*width];
4770
if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4772
if(ABS(height/2-y)<9) printf("\n");