2
* Optimization of some functions from mpegvideo.c for armv5te
3
* Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
5
* This file is part of FFmpeg.
7
* FFmpeg is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public
9
* License as published by the Free Software Foundation; either
10
* version 2.1 of the License, or (at your option) any later version.
12
* FFmpeg is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Lesser General Public License for more details.
17
* You should have received a copy of the GNU Lesser General Public
18
* License along with FFmpeg; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
* Special optimized version of dct_unquantize_h263_helper_c, it
27
* requires the block to be at least 8 bytes aligned, and may process
28
* more elements than requested. But it is guaranteed to never
29
* process more than 64 elements provided that count argument is <= 64,
30
* so it is safe. This function is optimized for a common distribution
31
* of values for nCoeffs (they are mostly multiple of 8 plus one or
32
* two extra elements). So this function processes data as 8 elements
33
* per loop iteration and contains optional 2 elements processing in
36
* Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
38
function ff_dct_unquantize_h263_armv5te, export=1
47
rsbs r9, ip, r4, asr #16
50
smlatbne r9, r4, r1, r9
52
rsbs lr, ip, r5, asr #16
55
smlatbne lr, r5, r1, lr
57
rsbs r8, ip, r4, asl #16
60
smlabbne r4, r4, r1, r8
62
rsbs r8, ip, r5, asl #16
65
smlabbne r5, r5, r1, r8
72
rsbs r9, ip, r6, asr #16
75
smlatbne r9, r6, r1, r9
77
rsbs lr, ip, r7, asr #16
80
smlatbne lr, r7, r1, lr
82
rsbs r8, ip, r6, asl #16
85
smlabbne r6, r6, r1, r8
87
rsbs r8, ip, r7, asl #16
90
smlabbne r7, r7, r1, r8
98
ldrgtd r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
109
smlabbne r9, r9, r1, r8
113
smlabbne lr, lr, r1, r8