2
* ARM NEON optimised DSP functions
3
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
5
* This file is part of Libav.
7
* Libav is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Lesser General Public
9
* License as published by the Free Software Foundation; either
10
* version 2.1 of the License, or (at your option) any later version.
12
* Libav is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Lesser General Public License for more details.
17
* You should have received a copy of the GNU Lesser General Public
18
* License along with Libav; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
#include "libavutil/attributes.h"
26
#include "libavutil/cpu.h"
27
#include "libavutil/aarch64/cpu.h"
28
#include "libavcodec/h264qpel.h"
30
void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
31
void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
32
void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
33
void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
34
void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
35
void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
36
void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
37
void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
38
void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
39
void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
40
void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
41
void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
42
void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
43
void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
44
void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
45
void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
47
void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
48
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
49
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
50
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
51
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
52
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
53
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
54
void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
55
void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
56
void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
57
void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
58
void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
59
void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
60
void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
61
void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
62
void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
64
void ff_avg_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
65
void ff_avg_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
66
void ff_avg_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
67
void ff_avg_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
68
void ff_avg_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
69
void ff_avg_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
70
void ff_avg_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
71
void ff_avg_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
72
void ff_avg_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
73
void ff_avg_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
74
void ff_avg_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
75
void ff_avg_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
76
void ff_avg_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
77
void ff_avg_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
78
void ff_avg_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
79
void ff_avg_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
81
void ff_avg_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
82
void ff_avg_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
83
void ff_avg_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
84
void ff_avg_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
85
void ff_avg_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
86
void ff_avg_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
87
void ff_avg_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
88
void ff_avg_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
89
void ff_avg_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
90
void ff_avg_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
91
void ff_avg_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
92
void ff_avg_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
93
void ff_avg_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
94
void ff_avg_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
95
void ff_avg_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
96
void ff_avg_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
98
av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
100
const int high_bit_depth = bit_depth > 8;
101
int cpu_flags = av_get_cpu_flags();
103
if (have_neon(cpu_flags) && !high_bit_depth) {
104
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
105
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
106
c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
107
c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
108
c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
109
c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
110
c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
111
c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
112
c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
113
c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
114
c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
115
c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
116
c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
117
c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
118
c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
119
c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
121
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
122
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
123
c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
124
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
125
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
126
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
127
c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
128
c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
129
c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
130
c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
131
c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
132
c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
133
c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
134
c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
135
c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
136
c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
138
c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
139
c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon;
140
c->avg_h264_qpel_pixels_tab[0][ 2] = ff_avg_h264_qpel16_mc20_neon;
141
c->avg_h264_qpel_pixels_tab[0][ 3] = ff_avg_h264_qpel16_mc30_neon;
142
c->avg_h264_qpel_pixels_tab[0][ 4] = ff_avg_h264_qpel16_mc01_neon;
143
c->avg_h264_qpel_pixels_tab[0][ 5] = ff_avg_h264_qpel16_mc11_neon;
144
c->avg_h264_qpel_pixels_tab[0][ 6] = ff_avg_h264_qpel16_mc21_neon;
145
c->avg_h264_qpel_pixels_tab[0][ 7] = ff_avg_h264_qpel16_mc31_neon;
146
c->avg_h264_qpel_pixels_tab[0][ 8] = ff_avg_h264_qpel16_mc02_neon;
147
c->avg_h264_qpel_pixels_tab[0][ 9] = ff_avg_h264_qpel16_mc12_neon;
148
c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_neon;
149
c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_neon;
150
c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_neon;
151
c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_neon;
152
c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_neon;
153
c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon;
155
c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
156
c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon;
157
c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon;
158
c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon;
159
c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon;
160
c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon;
161
c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon;
162
c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon;
163
c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon;
164
c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon;
165
c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon;
166
c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon;
167
c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon;
168
c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
169
c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
170
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;