4
4
* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
6
* This library is free software; you can redistribute it and/or
6
* This file is part of FFmpeg.
8
* FFmpeg is free software; you can redistribute it and/or
7
9
* modify it under the terms of the GNU Lesser General Public
8
10
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* version 2.1 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
13
* FFmpeg is distributed in the hope that it will be useful,
12
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
16
* Lesser General Public License for more details.
16
18
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* License along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
26
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
24
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
27
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
29
"pshufw $0x0E," #a ", " #b " \n\t"\
31
"pshufw $0x01," #a ", " #b " \n\t"\
27
34
#define SPREADW(a) \
28
"punpcklwd " #a ", " #a " \n\t"\
29
"punpcklwd " #a ", " #a " \n\t"
35
"punpcklwd " #a ", " #a " \n\t"\
36
"punpcklwd " #a ", " #a " \n\t"
30
37
#define PMAXW(a,b) \
31
"psubusw " #a ", " #b " \n\t"\
32
"paddw " #a ", " #b " \n\t"
38
"psubusw " #a ", " #b " \n\t"\
39
"paddw " #a ", " #b " \n\t"
41
"movq " #a ", " #b " \n\t"\
42
"psrlq $32, " #a " \n\t"\
44
"movq " #a ", " #b " \n\t"\
45
"psrlq $16, " #a " \n\t"\
35
50
static int RENAME(dct_quantize)(MpegEncContext *s,
36
51
DCTELEM *block, int n,
37
52
int qscale, int *overflow)
39
int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
54
long last_non_zero_p1;
55
int level=0, q; //=0 is cuz gcc says uninitalized ...
40
56
const uint16_t *qmat, *bias;
41
__align8 int16_t temp_block[64];
57
DECLARE_ALIGNED_8(int16_t, temp_block[64]);
43
59
assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
46
ff_fdct_mmx (block); //cant be anything else ...
62
RENAMEl(ff_fdct) (block); //cant be anything else ...
65
s->denoise_dct(s, block);
55
74
if (!s->h263_aic) {
59
: "=d" (level), "=a"(dummy)
60
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
78
: "=d" (level), "=a"(dummy)
79
: "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])
64
"xorl %%edx, %%edx \n\t"
66
"movzwl %%ax, %%eax \n\t"
68
: "a" ((block[0]>>2) + q), "c" (q<<1)
83
"xorl %%edx, %%edx \n\t"
85
"movzwl %%ax, %%eax \n\t"
87
: "a" ((block[0]>>2) + q), "c" (q<<1)
73
92
/* For AIC we skip quant/dequant of INTRADC */
74
93
level = (block[0] + 4)>>3;
76
95
block[0]=0; //avoid fake overflow
77
96
// temp_block[0] = (block[0] + (q >> 1)) / q;
78
97
last_non_zero_p1 = 1;
79
bias = s->q_intra_matrix16_bias[qscale];
80
qmat = s->q_intra_matrix16[qscale];
98
bias = s->q_intra_matrix16[qscale][1];
99
qmat = s->q_intra_matrix16[qscale][0];
82
101
last_non_zero_p1 = 0;
83
bias = s->q_inter_matrix16_bias[qscale];
84
qmat = s->q_inter_matrix16[qscale];
102
bias = s->q_inter_matrix16[qscale][1];
103
qmat = s->q_inter_matrix16[qscale][0];
87
if(s->out_format == FMT_H263 && s->mpeg_quant==0){
106
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
90
"movd %%eax, %%mm3 \n\t" // last_non_zero_p1
109
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
92
"pxor %%mm7, %%mm7 \n\t" // 0
93
"pxor %%mm4, %%mm4 \n\t" // 0
94
"movq (%2), %%mm5 \n\t" // qmat[0]
95
"pxor %%mm6, %%mm6 \n\t"
96
"psubw (%3), %%mm6 \n\t" // -bias[0]
97
"movl $-128, %%eax \n\t"
100
"pxor %%mm1, %%mm1 \n\t" // 0
101
"movq (%1, %%eax), %%mm0 \n\t" // block[i]
102
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
103
"pxor %%mm1, %%mm0 \n\t"
104
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
105
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
106
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
107
"por %%mm0, %%mm4 \n\t"
108
"pxor %%mm1, %%mm0 \n\t"
109
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
110
"movq %%mm0, (%5, %%eax) \n\t"
111
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
112
"movq (%4, %%eax), %%mm1 \n\t"
113
"movq %%mm7, (%1, %%eax) \n\t" // 0
114
"pandn %%mm1, %%mm0 \n\t"
116
"addl $8, %%eax \n\t"
118
"movq %%mm3, %%mm0 \n\t"
119
"psrlq $32, %%mm3 \n\t"
121
"movq %%mm3, %%mm0 \n\t"
122
"psrlq $16, %%mm3 \n\t"
124
"movd %%mm3, %%eax \n\t"
125
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
126
: "+a" (last_non_zero_p1)
111
"pxor %%mm7, %%mm7 \n\t" // 0
112
"pxor %%mm4, %%mm4 \n\t" // 0
113
"movq (%2), %%mm5 \n\t" // qmat[0]
114
"pxor %%mm6, %%mm6 \n\t"
115
"psubw (%3), %%mm6 \n\t" // -bias[0]
116
"mov $-128, %%"REG_a" \n\t"
119
"pxor %%mm1, %%mm1 \n\t" // 0
120
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
121
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
122
"pxor %%mm1, %%mm0 \n\t"
123
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
124
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
125
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
126
"por %%mm0, %%mm4 \n\t"
127
"pxor %%mm1, %%mm0 \n\t"
128
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
129
"movq %%mm0, (%5, %%"REG_a") \n\t"
130
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
131
"movq (%4, %%"REG_a"), %%mm1 \n\t"
132
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
133
"pandn %%mm1, %%mm0 \n\t"
135
"add $8, %%"REG_a" \n\t"
138
"movd %%mm3, %%"REG_a" \n\t"
139
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
140
: "+a" (last_non_zero_p1)
127
141
: "r" (block+64), "r" (qmat), "r" (bias),
128
142
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
130
144
// note the asm is split cuz gcc doesnt like that many operands ...
132
"movd %1, %%mm1 \n\t" // max_qcoeff
134
"psubusw %%mm1, %%mm4 \n\t"
135
"packuswb %%mm4, %%mm4 \n\t"
136
"movd %%mm4, %0 \n\t" // *overflow
146
"movd %1, %%mm1 \n\t" // max_qcoeff
148
"psubusw %%mm1, %%mm4 \n\t"
149
"packuswb %%mm4, %%mm4 \n\t"
150
"movd %%mm4, %0 \n\t" // *overflow
137
151
: "=g" (*overflow)
138
152
: "g" (s->max_qcoeff)
140
154
}else{ // FMT_H263
142
"movd %%eax, %%mm3 \n\t" // last_non_zero_p1
156
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
144
"pxor %%mm7, %%mm7 \n\t" // 0
145
"pxor %%mm4, %%mm4 \n\t" // 0
146
"movl $-128, %%eax \n\t"
149
"pxor %%mm1, %%mm1 \n\t" // 0
150
"movq (%1, %%eax), %%mm0 \n\t" // block[i]
151
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
152
"pxor %%mm1, %%mm0 \n\t"
153
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
154
"movq (%3, %%eax), %%mm6 \n\t" // bias[0]
155
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
156
"movq (%2, %%eax), %%mm5 \n\t" // qmat[i]
157
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
158
"por %%mm0, %%mm4 \n\t"
159
"pxor %%mm1, %%mm0 \n\t"
160
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
161
"movq %%mm0, (%5, %%eax) \n\t"
162
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
163
"movq (%4, %%eax), %%mm1 \n\t"
164
"movq %%mm7, (%1, %%eax) \n\t" // 0
165
"pandn %%mm1, %%mm0 \n\t"
167
"addl $8, %%eax \n\t"
169
"movq %%mm3, %%mm0 \n\t"
170
"psrlq $32, %%mm3 \n\t"
172
"movq %%mm3, %%mm0 \n\t"
173
"psrlq $16, %%mm3 \n\t"
175
"movd %%mm3, %%eax \n\t"
176
"movzbl %%al, %%eax \n\t" // last_non_zero_p1
177
: "+a" (last_non_zero_p1)
158
"pxor %%mm7, %%mm7 \n\t" // 0
159
"pxor %%mm4, %%mm4 \n\t" // 0
160
"mov $-128, %%"REG_a" \n\t"
163
"pxor %%mm1, %%mm1 \n\t" // 0
164
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
165
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
166
"pxor %%mm1, %%mm0 \n\t"
167
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
168
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
169
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
170
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
171
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
172
"por %%mm0, %%mm4 \n\t"
173
"pxor %%mm1, %%mm0 \n\t"
174
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
175
"movq %%mm0, (%5, %%"REG_a") \n\t"
176
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
177
"movq (%4, %%"REG_a"), %%mm1 \n\t"
178
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
179
"pandn %%mm1, %%mm0 \n\t"
181
"add $8, %%"REG_a" \n\t"
184
"movd %%mm3, %%"REG_a" \n\t"
185
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
186
: "+a" (last_non_zero_p1)
178
187
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
179
188
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
181
190
// note the asm is split cuz gcc doesnt like that many operands ...
183
"movd %1, %%mm1 \n\t" // max_qcoeff
185
"psubusw %%mm1, %%mm4 \n\t"
186
"packuswb %%mm4, %%mm4 \n\t"
187
"movd %%mm4, %0 \n\t" // *overflow
192
"movd %1, %%mm1 \n\t" // max_qcoeff
194
"psubusw %%mm1, %%mm4 \n\t"
195
"packuswb %%mm4, %%mm4 \n\t"
196
"movd %%mm4, %0 \n\t" // *overflow
188
197
: "=g" (*overflow)
189
198
: "g" (s->max_qcoeff)
196
205
if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
197
206
if(last_non_zero_p1 <= 1) goto end;
198
block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
199
block[0x20] = temp_block[0x10];
207
block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
208
block[0x20] = temp_block[0x10];
200
209
if(last_non_zero_p1 <= 4) goto end;
201
block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
202
block[0x09] = temp_block[0x03];
210
block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
211
block[0x09] = temp_block[0x03];
203
212
if(last_non_zero_p1 <= 7) goto end;
204
block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
205
block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
213
block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
214
block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
206
215
if(last_non_zero_p1 <= 11) goto end;
207
block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
208
block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
209
block[0x0C] = temp_block[0x05];
216
block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
217
block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
218
block[0x0C] = temp_block[0x05];
210
219
if(last_non_zero_p1 <= 16) goto end;
211
block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
212
block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
213
block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
214
block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
220
block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
221
block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
222
block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
223
block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
215
224
if(last_non_zero_p1 <= 24) goto end;
216
block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
217
block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
218
block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
219
block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
225
block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
226
block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
227
block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
228
block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
220
229
if(last_non_zero_p1 <= 32) goto end;
221
block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
222
block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
223
block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
224
block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
230
block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
231
block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
232
block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
233
block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
225
234
if(last_non_zero_p1 <= 40) goto end;
226
block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
227
block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
228
block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
229
block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
235
block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
236
block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
237
block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
238
block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
230
239
if(last_non_zero_p1 <= 48) goto end;
231
block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
232
block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
233
block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
234
block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
240
block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
241
block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
242
block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
243
block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
235
244
if(last_non_zero_p1 <= 56) goto end;
236
block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
237
block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
238
block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
245
block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
246
block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
247
block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
239
248
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
240
249
}else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
241
250
if(last_non_zero_p1 <= 1) goto end;
242
block[0x04] = temp_block[0x01];
243
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
251
block[0x04] = temp_block[0x01];
252
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
244
253
if(last_non_zero_p1 <= 4) goto end;
245
block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
246
block[0x05] = temp_block[0x03];
254
block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
255
block[0x05] = temp_block[0x03];
247
256
if(last_non_zero_p1 <= 7) goto end;
248
block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
249
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
257
block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
258
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
250
259
if(last_non_zero_p1 <= 11) goto end;
251
block[0x1C] = temp_block[0x19];
252
block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
253
block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
260
block[0x1C] = temp_block[0x19];
261
block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
262
block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
254
263
if(last_non_zero_p1 <= 16) goto end;
255
block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
256
block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
257
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
258
block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
264
block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
265
block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
266
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
267
block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
259
268
if(last_non_zero_p1 <= 24) goto end;
260
block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
261
block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
262
block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
263
block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
269
block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
270
block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
271
block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
272
block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
264
273
if(last_non_zero_p1 <= 32) goto end;
265
block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
266
block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
267
block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
268
block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
274
block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
275
block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
276
block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
277
block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
269
278
if(last_non_zero_p1 <= 40) goto end;
270
block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
271
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
272
block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
273
block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
279
block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
280
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
281
block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
282
block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
274
283
if(last_non_zero_p1 <= 48) goto end;
275
block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
276
block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
277
block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
278
block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
284
block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
285
block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
286
block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
287
block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
279
288
if(last_non_zero_p1 <= 56) goto end;
280
block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
281
block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
282
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
289
block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
290
block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
291
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
283
292
block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
285
294
if(last_non_zero_p1 <= 1) goto end;
286
block[0x01] = temp_block[0x01];
287
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
295
block[0x01] = temp_block[0x01];
296
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
288
297
if(last_non_zero_p1 <= 4) goto end;
289
block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
290
block[0x03] = temp_block[0x03];
298
block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
299
block[0x03] = temp_block[0x03];
291
300
if(last_non_zero_p1 <= 7) goto end;
292
block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
293
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
301
block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
302
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
294
303
if(last_non_zero_p1 <= 11) goto end;
295
block[0x19] = temp_block[0x19];
296
block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
297
block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
304
block[0x19] = temp_block[0x19];
305
block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
306
block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
298
307
if(last_non_zero_p1 <= 16) goto end;
299
block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
300
block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
301
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
302
block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
308
block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
309
block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
310
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
311
block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
303
312
if(last_non_zero_p1 <= 24) goto end;
304
block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
305
block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
306
block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
307
block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
313
block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
314
block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
315
block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
316
block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
308
317
if(last_non_zero_p1 <= 32) goto end;
309
block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
310
block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
311
block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
312
block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
318
block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
319
block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
320
block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
321
block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
313
322
if(last_non_zero_p1 <= 40) goto end;
314
block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
315
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
316
block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
317
block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
323
block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
324
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
325
block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
326
block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
318
327
if(last_non_zero_p1 <= 48) goto end;
319
block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
320
block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
321
block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
322
block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
328
block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
329
block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
330
block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
331
block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
323
332
if(last_non_zero_p1 <= 56) goto end;
324
block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
325
block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
326
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
333
block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
334
block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
335
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
327
336
block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];