51
51
#define FIX_M_1_961570560_ID 40
52
52
#define FIX_M_2_562915447_ID 44
53
53
#define FIX_0xFFFF_ID 48
59
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
61
sub sp, sp, #4 @ reserve some space on the stack
62
str r0, [ sp ] @ save the DCT pointer to the stack
64
mov lr, r0 @ lr = pointer to the current row
65
mov r12, #8 @ r12 = row-counter
66
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
59
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
61
sub sp, sp, #4 @ reserve some space on the stack
62
str r0, [ sp ] @ save the DCT pointer to the stack
64
mov lr, r0 @ lr = pointer to the current row
65
mov r12, #8 @ r12 = row-counter
66
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
68
ldrsh r0, [lr, # 0] @ r0 = 'd0'
69
ldrsh r1, [lr, # 8] @ r1 = 'd1'
71
@ Optimization for row that have all items except the first set to 0
72
@ (this works as the DCTELEMS are always 4-byte aligned)
80
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
84
ldrsh r2, [lr, # 2] @ r2 = 'd2'
85
ldrsh r4, [lr, # 4] @ r4 = 'd4'
86
ldrsh r6, [lr, # 6] @ r6 = 'd6'
88
ldr r3, [r11, #FIX_0_541196100_ID]
90
ldr r5, [r11, #FIX_M_1_847759065_ID]
91
mul r7, r3, r7 @ r7 = z1
92
ldr r3, [r11, #FIX_0_765366865_ID]
93
mla r6, r5, r6, r7 @ r6 = tmp2
94
add r5, r0, r4 @ r5 = tmp0
95
mla r2, r3, r2, r7 @ r2 = tmp3
96
sub r3, r0, r4 @ r3 = tmp1
98
add r0, r2, r5, lsl #13 @ r0 = tmp10
99
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100
add r4, r6, r3, lsl #13 @ r4 = tmp11
101
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
103
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
105
ldrsh r3, [lr, #10] @ r3 = 'd3'
106
ldrsh r5, [lr, #12] @ r5 = 'd5'
107
ldrsh r7, [lr, #14] @ r7 = 'd7'
109
add r0, r3, r5 @ r0 = 'z2'
110
add r2, r1, r7 @ r2 = 'z1'
111
add r4, r3, r7 @ r4 = 'z3'
112
add r6, r1, r5 @ r6 = 'z4'
113
ldr r9, [r11, #FIX_1_175875602_ID]
114
add r8, r4, r6 @ r8 = z3 + z4
115
ldr r10, [r11, #FIX_M_0_899976223_ID]
116
mul r8, r9, r8 @ r8 = 'z5'
117
ldr r9, [r11, #FIX_M_2_562915447_ID]
118
mul r2, r10, r2 @ r2 = 'z1'
119
ldr r10, [r11, #FIX_M_1_961570560_ID]
120
mul r0, r9, r0 @ r0 = 'z2'
121
ldr r9, [r11, #FIX_M_0_390180644_ID]
122
mla r4, r10, r4, r8 @ r4 = 'z3'
123
ldr r10, [r11, #FIX_0_298631336_ID]
124
mla r6, r9, r6, r8 @ r6 = 'z4'
125
ldr r9, [r11, #FIX_2_053119869_ID]
126
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
127
ldr r10, [r11, #FIX_3_072711026_ID]
128
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
129
ldr r9, [r11, #FIX_1_501321110_ID]
130
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
131
add r7, r7, r4 @ r7 = tmp0
132
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
133
add r5, r5, r6 @ r5 = tmp1
134
add r3, r3, r4 @ r3 = tmp2
135
add r1, r1, r6 @ r1 = tmp3
137
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
140
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
146
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
152
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
158
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
164
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
170
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
176
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
182
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
192
beq start_column_loop
68
ldrsh r0, [lr, # 0] @ r0 = 'd0'
69
ldrsh r1, [lr, # 8] @ r1 = 'd1'
71
@ Optimization for row that have all items except the first set to 0
72
@ (this works as the DCTELEMS are always 4-byte aligned)
80
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
84
ldrsh r2, [lr, # 2] @ r2 = 'd2'
85
ldrsh r4, [lr, # 4] @ r4 = 'd4'
86
ldrsh r6, [lr, # 6] @ r6 = 'd6'
88
ldr r3, [r11, #FIX_0_541196100_ID]
90
ldr r5, [r11, #FIX_M_1_847759065_ID]
91
mul r7, r3, r7 @ r7 = z1
92
ldr r3, [r11, #FIX_0_765366865_ID]
93
mla r6, r5, r6, r7 @ r6 = tmp2
94
add r5, r0, r4 @ r5 = tmp0
95
mla r2, r3, r2, r7 @ r2 = tmp3
96
sub r3, r0, r4 @ r3 = tmp1
98
add r0, r2, r5, lsl #13 @ r0 = tmp10
99
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100
add r4, r6, r3, lsl #13 @ r4 = tmp11
101
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
103
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
105
ldrsh r3, [lr, #10] @ r3 = 'd3'
106
ldrsh r5, [lr, #12] @ r5 = 'd5'
107
ldrsh r7, [lr, #14] @ r7 = 'd7'
109
add r0, r3, r5 @ r0 = 'z2'
110
add r2, r1, r7 @ r2 = 'z1'
111
add r4, r3, r7 @ r4 = 'z3'
112
add r6, r1, r5 @ r6 = 'z4'
113
ldr r9, [r11, #FIX_1_175875602_ID]
114
add r8, r4, r6 @ r8 = z3 + z4
115
ldr r10, [r11, #FIX_M_0_899976223_ID]
116
mul r8, r9, r8 @ r8 = 'z5'
117
ldr r9, [r11, #FIX_M_2_562915447_ID]
118
mul r2, r10, r2 @ r2 = 'z1'
119
ldr r10, [r11, #FIX_M_1_961570560_ID]
120
mul r0, r9, r0 @ r0 = 'z2'
121
ldr r9, [r11, #FIX_M_0_390180644_ID]
122
mla r4, r10, r4, r8 @ r4 = 'z3'
123
ldr r10, [r11, #FIX_0_298631336_ID]
124
mla r6, r9, r6, r8 @ r6 = 'z4'
125
ldr r9, [r11, #FIX_2_053119869_ID]
126
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
127
ldr r10, [r11, #FIX_3_072711026_ID]
128
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
129
ldr r9, [r11, #FIX_1_501321110_ID]
130
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
131
add r7, r7, r4 @ r7 = tmp0
132
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
133
add r5, r5, r6 @ r5 = tmp1
134
add r3, r3, r4 @ r3 = tmp2
135
add r1, r1, r6 @ r1 = tmp3
137
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
140
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
146
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
152
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
158
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
164
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
170
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
176
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
182
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
192
beq start_column_loop
195
ldr r1, [r11, #FIX_0xFFFF_ID]
198
add r0, r0, r0, lsl #16
195
ldr r1, [r11, #FIX_0xFFFF_ID]
198
add r0, r0, r0, lsl #16
210
210
start_column_loop:
211
@ Start of column loop
211
@ Start of column loop
215
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
216
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
217
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
218
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
220
ldr r3, [r11, #FIX_0_541196100_ID]
222
ldr r5, [r11, #FIX_M_1_847759065_ID]
223
mul r1, r3, r1 @ r1 = z1
224
ldr r3, [r11, #FIX_0_765366865_ID]
225
mla r6, r5, r6, r1 @ r6 = tmp2
226
add r5, r0, r4 @ r5 = tmp0
227
mla r2, r3, r2, r1 @ r2 = tmp3
228
sub r3, r0, r4 @ r3 = tmp1
230
add r0, r2, r5, lsl #13 @ r0 = tmp10
231
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
232
add r4, r6, r3, lsl #13 @ r4 = tmp11
233
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
235
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
236
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
237
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
238
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
240
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
246
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
248
add r0, r3, r5 @ r0 = 'z2'
249
add r2, r1, r7 @ r2 = 'z1'
250
add r4, r3, r7 @ r4 = 'z3'
251
add r6, r1, r5 @ r6 = 'z4'
252
ldr r9, [r11, #FIX_1_175875602_ID]
254
ldr r10, [r11, #FIX_M_0_899976223_ID]
255
mul r8, r9, r8 @ r8 = 'z5'
256
ldr r9, [r11, #FIX_M_2_562915447_ID]
257
mul r2, r10, r2 @ r2 = 'z1'
258
ldr r10, [r11, #FIX_M_1_961570560_ID]
259
mul r0, r9, r0 @ r0 = 'z2'
260
ldr r9, [r11, #FIX_M_0_390180644_ID]
261
mla r4, r10, r4, r8 @ r4 = 'z3'
262
ldr r10, [r11, #FIX_0_298631336_ID]
263
mla r6, r9, r6, r8 @ r6 = 'z4'
264
ldr r9, [r11, #FIX_2_053119869_ID]
265
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
266
ldr r10, [r11, #FIX_3_072711026_ID]
267
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
268
ldr r9, [r11, #FIX_1_501321110_ID]
269
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270
add r7, r7, r4 @ r7 = tmp0
271
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272
add r5, r5, r6 @ r5 = tmp1
273
add r3, r3, r4 @ r3 = tmp2
274
add r1, r1, r6 @ r1 = tmp3
276
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
279
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
283
strh r8, [lr, #( 0*8)]
285
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
289
strh r8, [lr, #(14*8)]
291
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
295
strh r8, [lr, #( 2*8)]
297
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
301
strh r8, [lr, #(12*8)]
303
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
307
strh r8, [lr, #( 4*8)]
309
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
313
strh r8, [lr, #(10*8)]
315
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
319
strh r8, [lr, #( 6*8)]
321
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
325
strh r8, [lr, #( 8*8)]
215
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
216
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
217
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
218
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
220
ldr r3, [r11, #FIX_0_541196100_ID]
222
ldr r5, [r11, #FIX_M_1_847759065_ID]
223
mul r1, r3, r1 @ r1 = z1
224
ldr r3, [r11, #FIX_0_765366865_ID]
225
mla r6, r5, r6, r1 @ r6 = tmp2
226
add r5, r0, r4 @ r5 = tmp0
227
mla r2, r3, r2, r1 @ r2 = tmp3
228
sub r3, r0, r4 @ r3 = tmp1
230
add r0, r2, r5, lsl #13 @ r0 = tmp10
231
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
232
add r4, r6, r3, lsl #13 @ r4 = tmp11
233
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
235
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
236
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
237
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
238
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
240
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
246
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
248
add r0, r3, r5 @ r0 = 'z2'
249
add r2, r1, r7 @ r2 = 'z1'
250
add r4, r3, r7 @ r4 = 'z3'
251
add r6, r1, r5 @ r6 = 'z4'
252
ldr r9, [r11, #FIX_1_175875602_ID]
254
ldr r10, [r11, #FIX_M_0_899976223_ID]
255
mul r8, r9, r8 @ r8 = 'z5'
256
ldr r9, [r11, #FIX_M_2_562915447_ID]
257
mul r2, r10, r2 @ r2 = 'z1'
258
ldr r10, [r11, #FIX_M_1_961570560_ID]
259
mul r0, r9, r0 @ r0 = 'z2'
260
ldr r9, [r11, #FIX_M_0_390180644_ID]
261
mla r4, r10, r4, r8 @ r4 = 'z3'
262
ldr r10, [r11, #FIX_0_298631336_ID]
263
mla r6, r9, r6, r8 @ r6 = 'z4'
264
ldr r9, [r11, #FIX_2_053119869_ID]
265
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
266
ldr r10, [r11, #FIX_3_072711026_ID]
267
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
268
ldr r9, [r11, #FIX_1_501321110_ID]
269
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270
add r7, r7, r4 @ r7 = tmp0
271
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272
add r5, r5, r6 @ r5 = tmp1
273
add r3, r3, r4 @ r3 = tmp2
274
add r1, r1, r6 @ r1 = tmp3
276
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
279
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
283
strh r8, [lr, #( 0*8)]
285
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
289
strh r8, [lr, #(14*8)]
291
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
295
strh r8, [lr, #( 2*8)]
297
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
301
strh r8, [lr, #(12*8)]
303
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
307
strh r8, [lr, #( 4*8)]
309
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
313
strh r8, [lr, #(10*8)]
315
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
319
strh r8, [lr, #( 6*8)]
321
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
325
strh r8, [lr, #( 8*8)]
333
333
empty_odd_column:
334
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338
strh r0, [lr, #( 0*8)]
339
strh r0, [lr, #(14*8)]
341
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345
strh r4, [lr, #( 2*8)]
346
strh r4, [lr, #(12*8)]
348
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352
strh r6, [lr, #( 4*8)]
353
strh r6, [lr, #(10*8)]
355
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359
strh r2, [lr, #( 6*8)]
360
strh r2, [lr, #( 8*8)]
370
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
334
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338
strh r0, [lr, #( 0*8)]
339
strh r0, [lr, #(14*8)]
341
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345
strh r4, [lr, #( 2*8)]
346
strh r4, [lr, #(12*8)]
348
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352
strh r6, [lr, #( 4*8)]
353
strh r6, [lr, #(10*8)]
355
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359
strh r2, [lr, #( 6*8)]
360
strh r2, [lr, #( 8*8)]
370
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
374
.word FIX_0_298631336
375
.word FIX_0_541196100
376
.word FIX_0_765366865
377
.word FIX_1_175875602
378
.word FIX_1_501321110
379
.word FIX_2_053119869
380
.word FIX_3_072711026
381
.word FIX_M_0_390180644
382
.word FIX_M_0_899976223
383
.word FIX_M_1_847759065
384
.word FIX_M_1_961570560
385
.word FIX_M_2_562915447
374
.word FIX_0_298631336
375
.word FIX_0_541196100
376
.word FIX_0_765366865
377
.word FIX_1_175875602
378
.word FIX_1_501321110
379
.word FIX_2_053119869
380
.word FIX_3_072711026
381
.word FIX_M_0_390180644
382
.word FIX_M_0_899976223
383
.word FIX_M_1_847759065
384
.word FIX_M_1_961570560
385
.word FIX_M_2_562915447