~ubuntu-branches/ubuntu/utopic/libav/utopic-proposed

« back to all changes in this revision

Viewing changes to libavcodec/aarch64/h264cmc_neon.S

  • Committer: Package Import Robot
  • Author(s): Reinhard Tartler, Reinhard Tartler, Rico Tzschichholz
  • Date: 2014-08-30 11:02:45 UTC
  • mfrom: (1.3.47 sid)
  • Revision ID: package-import@ubuntu.com-20140830110245-io3dg7q85wfr7125
Tags: 6:11~beta1-2
[ Reinhard Tartler ]
* Make libavcodec-dev depend on libavresample-dev

[ Rico Tzschichholz ]
* Some fixes and leftovers from soname bumps

Show diffs side-by-side

added added

removed removed

Lines of Context:
95
95
        b.gt            1b
96
96
        ret
97
97
 
98
 
2:      tst             w6,  w6
99
 
        add             w12, w12, w6
 
98
2:      adds            w12, w12, w6
100
99
        dup             v0.8B, w4
 
100
        b.eq            5f
 
101
        tst             w6,  w6
101
102
        dup             v1.8B, w12
102
103
        b.eq            4f
103
104
 
161
162
        st1             {v17.8B}, [x0], x2
162
163
        b.gt            4b
163
164
        ret
 
165
 
 
166
5:      ld1             {v4.8B}, [x1], x2
 
167
        ld1             {v5.8B}, [x1], x2
 
168
        prfm            pldl1strm, [x1]
 
169
        subs            w3,  w3,  #2
 
170
        umull           v16.8H, v4.8B, v0.8B
 
171
        umull           v17.8H, v5.8B, v0.8B
 
172
        prfm            pldl1strm, [x1, x2]
 
173
  .ifc \codec,h264
 
174
        rshrn           v16.8B, v16.8H, #6
 
175
        rshrn           v17.8B, v17.8H, #6
 
176
  .else
 
177
        add             v16.8H, v16.8H, v22.8H
 
178
        add             v17.8H, v17.8H, v22.8H
 
179
        shrn            v16.8B, v16.8H, #6
 
180
        shrn            v17.8B, v17.8H, #6
 
181
  .endif
 
182
  .ifc \type,avg
 
183
        ld1             {v20.8B}, [x8], x2
 
184
        ld1             {v21.8B}, [x8], x2
 
185
        urhadd          v16.8B, v16.8B, v20.8B
 
186
        urhadd          v17.8B, v17.8B, v21.8B
 
187
  .endif
 
188
        st1             {v16.8B}, [x0], x2
 
189
        st1             {v17.8B}, [x0], x2
 
190
        b.gt            5b
 
191
        ret
164
192
endfunc
165
193
.endm
166
194
 
238
266
        b.gt            1b
239
267
        ret
240
268
 
241
 
2:      tst             w6,  w6
242
 
        add             w12, w12, w6
 
269
2:      adds            w12, w12, w6
243
270
        dup             v30.8B, w4
 
271
        b.eq            5f
 
272
        tst             w6,  w6
244
273
        dup             v31.8B, w12
245
274
        trn1            v0.2S,  v30.2S, v31.2S
246
275
        trn2            v1.2S,  v30.2S, v31.2S
303
332
        st1             {v16.S}[1], [x0], x2
304
333
        b.gt            4b
305
334
        ret
 
335
 
 
336
5:      ld1             {v4.S}[0], [x1], x2
 
337
        ld1             {v4.S}[1], [x1], x2
 
338
        umull           v18.8H, v4.8B,  v30.8B
 
339
        subs            w3,  w3,  #2
 
340
        prfm            pldl1strm, [x1]
 
341
  .ifc \codec,h264
 
342
        rshrn           v16.8B, v18.8H, #6
 
343
  .else
 
344
        add             v18.8H, v18.8H, v22.8H
 
345
        shrn            v16.8B, v18.8H, #6
 
346
  .endif
 
347
  .ifc \type,avg
 
348
        ld1             {v20.S}[0], [x8], x2
 
349
        ld1             {v20.S}[1], [x8], x2
 
350
        urhadd          v16.8B, v16.8B, v20.8B
 
351
  .endif
 
352
        prfm            pldl1strm, [x1]
 
353
        st1             {v16.S}[0], [x0], x2
 
354
        st1             {v16.S}[1], [x0], x2
 
355
        b.gt            5b
 
356
        ret
306
357
endfunc
307
358
.endm
308
359