3
void j_rev_dct_ARM(DCTBLOCK data)
5
With DCTBLOCK being a pointer to an array of 64 'signed shorts'
7
Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
#define FIX_0_298631336 2446
28
#define FIX_0_541196100 4433
29
#define FIX_0_765366865 6270
30
#define FIX_1_175875602 9633
31
#define FIX_1_501321110 12299
32
#define FIX_2_053119869 16819
33
#define FIX_3_072711026 25172
34
#define FIX_M_0_390180644 -3196
35
#define FIX_M_0_899976223 -7373
36
#define FIX_M_1_847759065 -15137
37
#define FIX_M_1_961570560 -16069
38
#define FIX_M_2_562915447 -20995
39
#define FIX_0xFFFF 0xFFFF
41
#define FIX_0_298631336_ID 0
42
#define FIX_0_541196100_ID 4
43
#define FIX_0_765366865_ID 8
44
#define FIX_1_175875602_ID 12
45
#define FIX_1_501321110_ID 16
46
#define FIX_2_053119869_ID 20
47
#define FIX_3_072711026_ID 24
48
#define FIX_M_0_390180644_ID 28
49
#define FIX_M_0_899976223_ID 32
50
#define FIX_M_1_847759065_ID 36
51
#define FIX_M_1_961570560_ID 40
52
#define FIX_M_2_562915447_ID 44
53
#define FIX_0xFFFF_ID 48
59
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
61
sub sp, sp, #4 @ reserve some space on the stack
62
str r0, [ sp ] @ save the DCT pointer to the stack
64
mov lr, r0 @ lr = pointer to the current row
65
mov r12, #8 @ r12 = row-counter
66
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
68
ldrsh r0, [lr, # 0] @ r0 = 'd0'
69
ldrsh r1, [lr, # 8] @ r1 = 'd1'
71
@ Optimization for row that have all items except the first set to 0
72
@ (this works as the DCTELEMS are always 4-byte aligned)
80
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
84
ldrsh r2, [lr, # 2] @ r2 = 'd2'
85
ldrsh r4, [lr, # 4] @ r4 = 'd4'
86
ldrsh r6, [lr, # 6] @ r6 = 'd6'
88
ldr r3, [r11, #FIX_0_541196100_ID]
90
ldr r5, [r11, #FIX_M_1_847759065_ID]
91
mul r7, r3, r7 @ r7 = z1
92
ldr r3, [r11, #FIX_0_765366865_ID]
93
mla r6, r5, r6, r7 @ r6 = tmp2
94
add r5, r0, r4 @ r5 = tmp0
95
mla r2, r3, r2, r7 @ r2 = tmp3
96
sub r3, r0, r4 @ r3 = tmp1
98
add r0, r2, r5, lsl #13 @ r0 = tmp10
99
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100
add r4, r6, r3, lsl #13 @ r4 = tmp11
101
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
103
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
105
ldrsh r3, [lr, #10] @ r3 = 'd3'
106
ldrsh r5, [lr, #12] @ r5 = 'd5'
107
ldrsh r7, [lr, #14] @ r7 = 'd7'
109
add r0, r3, r5 @ r0 = 'z2'
110
add r2, r1, r7 @ r2 = 'z1'
111
add r4, r3, r7 @ r4 = 'z3'
112
add r6, r1, r5 @ r6 = 'z4'
113
ldr r9, [r11, #FIX_1_175875602_ID]
114
add r8, r4, r6 @ r8 = z3 + z4
115
ldr r10, [r11, #FIX_M_0_899976223_ID]
116
mul r8, r9, r8 @ r8 = 'z5'
117
ldr r9, [r11, #FIX_M_2_562915447_ID]
118
mul r2, r10, r2 @ r2 = 'z1'
119
ldr r10, [r11, #FIX_M_1_961570560_ID]
120
mul r0, r9, r0 @ r0 = 'z2'
121
ldr r9, [r11, #FIX_M_0_390180644_ID]
122
mla r4, r10, r4, r8 @ r4 = 'z3'
123
ldr r10, [r11, #FIX_0_298631336_ID]
124
mla r6, r9, r6, r8 @ r6 = 'z4'
125
ldr r9, [r11, #FIX_2_053119869_ID]
126
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
127
ldr r10, [r11, #FIX_3_072711026_ID]
128
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
129
ldr r9, [r11, #FIX_1_501321110_ID]
130
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
131
add r7, r7, r4 @ r7 = tmp0
132
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
133
add r5, r5, r6 @ r5 = tmp1
134
add r3, r3, r4 @ r3 = tmp2
135
add r1, r1, r6 @ r1 = tmp3
137
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
140
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
146
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
152
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
158
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
164
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
170
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
176
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
182
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
192
beq start_column_loop
195
ldr r1, [r11, #FIX_0xFFFF_ID]
198
add r0, r0, r0, lsl #16
211
@ Start of column loop
215
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
216
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
217
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
218
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
220
ldr r3, [r11, #FIX_0_541196100_ID]
222
ldr r5, [r11, #FIX_M_1_847759065_ID]
223
mul r1, r3, r1 @ r1 = z1
224
ldr r3, [r11, #FIX_0_765366865_ID]
225
mla r6, r5, r6, r1 @ r6 = tmp2
226
add r5, r0, r4 @ r5 = tmp0
227
mla r2, r3, r2, r1 @ r2 = tmp3
228
sub r3, r0, r4 @ r3 = tmp1
230
add r0, r2, r5, lsl #13 @ r0 = tmp10
231
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
232
add r4, r6, r3, lsl #13 @ r4 = tmp11
233
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
235
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
236
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
237
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
238
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
240
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
246
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
248
add r0, r3, r5 @ r0 = 'z2'
249
add r2, r1, r7 @ r2 = 'z1'
250
add r4, r3, r7 @ r4 = 'z3'
251
add r6, r1, r5 @ r6 = 'z4'
252
ldr r9, [r11, #FIX_1_175875602_ID]
254
ldr r10, [r11, #FIX_M_0_899976223_ID]
255
mul r8, r9, r8 @ r8 = 'z5'
256
ldr r9, [r11, #FIX_M_2_562915447_ID]
257
mul r2, r10, r2 @ r2 = 'z1'
258
ldr r10, [r11, #FIX_M_1_961570560_ID]
259
mul r0, r9, r0 @ r0 = 'z2'
260
ldr r9, [r11, #FIX_M_0_390180644_ID]
261
mla r4, r10, r4, r8 @ r4 = 'z3'
262
ldr r10, [r11, #FIX_0_298631336_ID]
263
mla r6, r9, r6, r8 @ r6 = 'z4'
264
ldr r9, [r11, #FIX_2_053119869_ID]
265
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
266
ldr r10, [r11, #FIX_3_072711026_ID]
267
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
268
ldr r9, [r11, #FIX_1_501321110_ID]
269
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270
add r7, r7, r4 @ r7 = tmp0
271
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272
add r5, r5, r6 @ r5 = tmp1
273
add r3, r3, r4 @ r3 = tmp2
274
add r1, r1, r6 @ r1 = tmp3
276
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
279
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
283
strh r8, [lr, #( 0*8)]
285
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
289
strh r8, [lr, #(14*8)]
291
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
295
strh r8, [lr, #( 2*8)]
297
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
301
strh r8, [lr, #(12*8)]
303
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
307
strh r8, [lr, #( 4*8)]
309
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
313
strh r8, [lr, #(10*8)]
315
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
319
strh r8, [lr, #( 6*8)]
321
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
325
strh r8, [lr, #( 8*8)]
334
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338
strh r0, [lr, #( 0*8)]
339
strh r0, [lr, #(14*8)]
341
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345
strh r4, [lr, #( 2*8)]
346
strh r4, [lr, #(12*8)]
348
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352
strh r6, [lr, #( 4*8)]
353
strh r6, [lr, #(10*8)]
355
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359
strh r2, [lr, #( 6*8)]
360
strh r2, [lr, #( 8*8)]
370
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
374
.word FIX_0_298631336
375
.word FIX_0_541196100
376
.word FIX_0_765366865
377
.word FIX_1_175875602
378
.word FIX_1_501321110
379
.word FIX_2_053119869
380
.word FIX_3_072711026
381
.word FIX_M_0_390180644
382
.word FIX_M_0_899976223
383
.word FIX_M_1_847759065
384
.word FIX_M_1_961570560
385
.word FIX_M_2_562915447