2
* motion_comp_altivec.c
3
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6
* This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7
* See http://libmpeg2.sourceforge.net/ for updates.
9
* mpeg2dec is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
14
* mpeg2dec is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
19
* You should have received a copy of the GNU General Public License
20
* along with this program; if not, write to the Free Software
21
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34
#include "attributes.h"
35
#include "mpeg2_internal.h"
37
typedef vector signed char vector_s8_t;
38
typedef vector unsigned char vector_u8_t;
39
typedef vector signed short vector_s16_t;
40
typedef vector unsigned short vector_u16_t;
41
typedef vector signed int vector_s32_t;
42
typedef vector unsigned int vector_u32_t;
44
#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */
46
static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
48
return vec_ld (A, (uint8_t *)B);
51
#define vec_ld my_vec_ld
53
static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
55
return vec_and (A, B);
58
#define vec_and my_vec_and
60
static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
62
return vec_avg (A, B);
65
#define vec_avg my_vec_avg
69
static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
70
const int stride, int height)
72
vector_u8_t perm, ref0, ref1, tmp;
74
perm = vec_lvsl (0, ref);
76
height = (height >> 1) - 1;
78
ref0 = vec_ld (0, ref);
79
ref1 = vec_ld (15, ref);
81
tmp = vec_perm (ref0, ref1, perm);
84
ref0 = vec_ld (0, ref);
85
ref1 = vec_ld (15, ref);
87
vec_st (tmp, 0, dest);
88
tmp = vec_perm (ref0, ref1, perm);
90
ref0 = vec_ld (0, ref);
91
ref1 = vec_ld (15, ref);
93
vec_st (tmp, stride, dest);
95
tmp = vec_perm (ref0, ref1, perm);
98
ref0 = vec_ld (0, ref);
99
ref1 = vec_ld (15, ref);
100
vec_st (tmp, 0, dest);
101
tmp = vec_perm (ref0, ref1, perm);
102
vec_st (tmp, stride, dest);
105
static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
106
const int stride, int height)
108
vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
110
tmp0 = vec_lvsl (0, ref);
111
tmp0 = vec_mergeh (tmp0, tmp0);
112
perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
113
tmp1 = vec_lvsl (stride, ref);
114
tmp1 = vec_mergeh (tmp1, tmp1);
115
perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
117
height = (height >> 1) - 1;
119
ref0 = vec_ld (0, ref);
120
ref1 = vec_ld (7, ref);
122
tmp0 = vec_perm (ref0, ref1, perm0);
125
ref0 = vec_ld (0, ref);
126
ref1 = vec_ld (7, ref);
128
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
129
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
131
tmp1 = vec_perm (ref0, ref1, perm1);
133
ref0 = vec_ld (0, ref);
134
ref1 = vec_ld (7, ref);
136
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
137
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
139
tmp0 = vec_perm (ref0, ref1, perm0);
142
ref0 = vec_ld (0, ref);
143
ref1 = vec_ld (7, ref);
144
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
145
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
147
tmp1 = vec_perm (ref0, ref1, perm1);
148
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
149
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
152
static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
153
const int stride, int height)
155
vector_u8_t permA, permB, ref0, ref1, tmp;
157
permA = vec_lvsl (0, ref);
158
permB = vec_add (permA, vec_splat_u8 (1));
160
height = (height >> 1) - 1;
162
ref0 = vec_ld (0, ref);
163
ref1 = vec_ld (16, ref);
165
tmp = vec_avg (vec_perm (ref0, ref1, permA),
166
vec_perm (ref0, ref1, permB));
169
ref0 = vec_ld (0, ref);
170
ref1 = vec_ld (16, ref);
172
vec_st (tmp, 0, dest);
173
tmp = vec_avg (vec_perm (ref0, ref1, permA),
174
vec_perm (ref0, ref1, permB));
176
ref0 = vec_ld (0, ref);
177
ref1 = vec_ld (16, ref);
179
vec_st (tmp, stride, dest);
181
tmp = vec_avg (vec_perm (ref0, ref1, permA),
182
vec_perm (ref0, ref1, permB));
185
ref0 = vec_ld (0, ref);
186
ref1 = vec_ld (16, ref);
187
vec_st (tmp, 0, dest);
188
tmp = vec_avg (vec_perm (ref0, ref1, permA),
189
vec_perm (ref0, ref1, permB));
190
vec_st (tmp, stride, dest);
193
static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
194
const int stride, int height)
196
vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
198
ones = vec_splat_u8 (1);
199
tmp0 = vec_lvsl (0, ref);
200
tmp0 = vec_mergeh (tmp0, tmp0);
201
perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
202
perm0B = vec_add (perm0A, ones);
203
tmp1 = vec_lvsl (stride, ref);
204
tmp1 = vec_mergeh (tmp1, tmp1);
205
perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
206
perm1B = vec_add (perm1A, ones);
208
height = (height >> 1) - 1;
210
ref0 = vec_ld (0, ref);
211
ref1 = vec_ld (8, ref);
213
tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
214
vec_perm (ref0, ref1, perm0B));
217
ref0 = vec_ld (0, ref);
218
ref1 = vec_ld (8, ref);
220
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
221
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
223
tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
224
vec_perm (ref0, ref1, perm1B));
226
ref0 = vec_ld (0, ref);
227
ref1 = vec_ld (8, ref);
229
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
230
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
232
tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
233
vec_perm (ref0, ref1, perm0B));
236
ref0 = vec_ld (0, ref);
237
ref1 = vec_ld (8, ref);
238
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
239
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
241
tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
242
vec_perm (ref0, ref1, perm1B));
243
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
244
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
247
static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
248
const int stride, int height)
250
vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
252
perm = vec_lvsl (0, ref);
254
height = (height >> 1) - 1;
256
ref0 = vec_ld (0, ref);
257
ref1 = vec_ld (15, ref);
259
tmp0 = vec_perm (ref0, ref1, perm);
260
ref0 = vec_ld (0, ref);
261
ref1 = vec_ld (15, ref);
263
tmp1 = vec_perm (ref0, ref1, perm);
264
tmp = vec_avg (tmp0, tmp1);
267
ref0 = vec_ld (0, ref);
268
ref1 = vec_ld (15, ref);
270
vec_st (tmp, 0, dest);
271
tmp0 = vec_perm (ref0, ref1, perm);
272
tmp = vec_avg (tmp0, tmp1);
274
ref0 = vec_ld (0, ref);
275
ref1 = vec_ld (15, ref);
277
vec_st (tmp, stride, dest);
279
tmp1 = vec_perm (ref0, ref1, perm);
280
tmp = vec_avg (tmp0, tmp1);
283
ref0 = vec_ld (0, ref);
284
ref1 = vec_ld (15, ref);
285
vec_st (tmp, 0, dest);
286
tmp0 = vec_perm (ref0, ref1, perm);
287
tmp = vec_avg (tmp0, tmp1);
288
vec_st (tmp, stride, dest);
291
static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
292
const int stride, int height)
294
vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
296
tmp0 = vec_lvsl (0, ref);
297
tmp0 = vec_mergeh (tmp0, tmp0);
298
perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
299
tmp1 = vec_lvsl (stride, ref);
300
tmp1 = vec_mergeh (tmp1, tmp1);
301
perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
303
height = (height >> 1) - 1;
305
ref0 = vec_ld (0, ref);
306
ref1 = vec_ld (7, ref);
308
tmp0 = vec_perm (ref0, ref1, perm0);
309
ref0 = vec_ld (0, ref);
310
ref1 = vec_ld (7, ref);
312
tmp1 = vec_perm (ref0, ref1, perm1);
313
tmp = vec_avg (tmp0, tmp1);
316
ref0 = vec_ld (0, ref);
317
ref1 = vec_ld (7, ref);
319
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
320
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
322
tmp0 = vec_perm (ref0, ref1, perm0);
323
tmp = vec_avg (tmp0, tmp1);
325
ref0 = vec_ld (0, ref);
326
ref1 = vec_ld (7, ref);
328
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
329
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
331
tmp1 = vec_perm (ref0, ref1, perm1);
332
tmp = vec_avg (tmp0, tmp1);
335
ref0 = vec_ld (0, ref);
336
ref1 = vec_ld (7, ref);
337
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
338
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
340
tmp0 = vec_perm (ref0, ref1, perm0);
341
tmp = vec_avg (tmp0, tmp1);
342
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
343
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
346
static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
347
const int stride, int height)
349
vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
352
ones = vec_splat_u8 (1);
353
permA = vec_lvsl (0, ref);
354
permB = vec_add (permA, ones);
356
height = (height >> 1) - 1;
358
ref0 = vec_ld (0, ref);
359
ref1 = vec_ld (16, ref);
361
A = vec_perm (ref0, ref1, permA);
362
B = vec_perm (ref0, ref1, permB);
363
avg0 = vec_avg (A, B);
364
xor0 = vec_xor (A, B);
366
ref0 = vec_ld (0, ref);
367
ref1 = vec_ld (16, ref);
369
A = vec_perm (ref0, ref1, permA);
370
B = vec_perm (ref0, ref1, permB);
371
avg1 = vec_avg (A, B);
372
xor1 = vec_xor (A, B);
373
tmp = vec_sub (vec_avg (avg0, avg1),
374
vec_and (vec_and (ones, vec_or (xor0, xor1)),
375
vec_xor (avg0, avg1)));
378
ref0 = vec_ld (0, ref);
379
ref1 = vec_ld (16, ref);
381
vec_st (tmp, 0, dest);
382
A = vec_perm (ref0, ref1, permA);
383
B = vec_perm (ref0, ref1, permB);
384
avg0 = vec_avg (A, B);
385
xor0 = vec_xor (A, B);
386
tmp = vec_sub (vec_avg (avg0, avg1),
387
vec_and (vec_and (ones, vec_or (xor0, xor1)),
388
vec_xor (avg0, avg1)));
390
ref0 = vec_ld (0, ref);
391
ref1 = vec_ld (16, ref);
393
vec_st (tmp, stride, dest);
395
A = vec_perm (ref0, ref1, permA);
396
B = vec_perm (ref0, ref1, permB);
397
avg1 = vec_avg (A, B);
398
xor1 = vec_xor (A, B);
399
tmp = vec_sub (vec_avg (avg0, avg1),
400
vec_and (vec_and (ones, vec_or (xor0, xor1)),
401
vec_xor (avg0, avg1)));
404
ref0 = vec_ld (0, ref);
405
ref1 = vec_ld (16, ref);
406
vec_st (tmp, 0, dest);
407
A = vec_perm (ref0, ref1, permA);
408
B = vec_perm (ref0, ref1, permB);
409
avg0 = vec_avg (A, B);
410
xor0 = vec_xor (A, B);
411
tmp = vec_sub (vec_avg (avg0, avg1),
412
vec_and (vec_and (ones, vec_or (xor0, xor1)),
413
vec_xor (avg0, avg1)));
414
vec_st (tmp, stride, dest);
417
static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
418
const int stride, int height)
420
vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
421
vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
423
ones = vec_splat_u8 (1);
424
perm0A = vec_lvsl (0, ref);
425
perm0A = vec_mergeh (perm0A, perm0A);
426
perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
427
perm0B = vec_add (perm0A, ones);
428
perm1A = vec_lvsl (stride, ref);
429
perm1A = vec_mergeh (perm1A, perm1A);
430
perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
431
perm1B = vec_add (perm1A, ones);
433
height = (height >> 1) - 1;
435
ref0 = vec_ld (0, ref);
436
ref1 = vec_ld (8, ref);
438
A = vec_perm (ref0, ref1, perm0A);
439
B = vec_perm (ref0, ref1, perm0B);
440
avg0 = vec_avg (A, B);
441
xor0 = vec_xor (A, B);
443
ref0 = vec_ld (0, ref);
444
ref1 = vec_ld (8, ref);
446
A = vec_perm (ref0, ref1, perm1A);
447
B = vec_perm (ref0, ref1, perm1B);
448
avg1 = vec_avg (A, B);
449
xor1 = vec_xor (A, B);
450
tmp = vec_sub (vec_avg (avg0, avg1),
451
vec_and (vec_and (ones, vec_or (xor0, xor1)),
452
vec_xor (avg0, avg1)));
455
ref0 = vec_ld (0, ref);
456
ref1 = vec_ld (8, ref);
458
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
459
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
461
A = vec_perm (ref0, ref1, perm0A);
462
B = vec_perm (ref0, ref1, perm0B);
463
avg0 = vec_avg (A, B);
464
xor0 = vec_xor (A, B);
465
tmp = vec_sub (vec_avg (avg0, avg1),
466
vec_and (vec_and (ones, vec_or (xor0, xor1)),
467
vec_xor (avg0, avg1)));
469
ref0 = vec_ld (0, ref);
470
ref1 = vec_ld (8, ref);
472
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
473
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
475
A = vec_perm (ref0, ref1, perm1A);
476
B = vec_perm (ref0, ref1, perm1B);
477
avg1 = vec_avg (A, B);
478
xor1 = vec_xor (A, B);
479
tmp = vec_sub (vec_avg (avg0, avg1),
480
vec_and (vec_and (ones, vec_or (xor0, xor1)),
481
vec_xor (avg0, avg1)));
484
ref0 = vec_ld (0, ref);
485
ref1 = vec_ld (8, ref);
486
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
487
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
489
A = vec_perm (ref0, ref1, perm0A);
490
B = vec_perm (ref0, ref1, perm0B);
491
avg0 = vec_avg (A, B);
492
xor0 = vec_xor (A, B);
493
tmp = vec_sub (vec_avg (avg0, avg1),
494
vec_and (vec_and (ones, vec_or (xor0, xor1)),
495
vec_xor (avg0, avg1)));
496
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
497
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
501
static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
502
const int stride, int height)
504
vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
505
vector_u16_t splat2, temp;
507
ones = vec_splat_u8 (1);
508
permA = vec_lvsl (0, ref);
509
permB = vec_add (permA, ones);
511
zero = vec_splat_u8 (0);
512
splat2 = vec_splat_u16 (2);
515
ref0 = vec_ld (0, ref);
516
ref1 = vec_ld (8, ref);
518
A = vec_perm (ref0, ref1, permA);
519
B = vec_perm (ref0, ref1, permB);
520
ref0 = vec_ld (0, ref);
521
ref1 = vec_ld (8, ref);
522
C = vec_perm (ref0, ref1, permA);
523
D = vec_perm (ref0, ref1, permB);
525
temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
526
(vector_u16_t)vec_mergeh (zero, B)),
527
vec_add ((vector_u16_t)vec_mergeh (zero, C),
528
(vector_u16_t)vec_mergeh (zero, D)));
529
temp = vec_sr (vec_add (temp, splat2), splat2);
530
tmp = vec_pack (temp, temp);
532
vec_st (tmp, 0, dest);
534
tmp = vec_avg (vec_perm (ref0, ref1, permA),
535
vec_perm (ref0, ref1, permB));
540
static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
541
const int stride, int height)
543
vector_u8_t perm, ref0, ref1, tmp, prev;
545
perm = vec_lvsl (0, ref);
547
height = (height >> 1) - 1;
549
ref0 = vec_ld (0, ref);
550
ref1 = vec_ld (15, ref);
552
prev = vec_ld (0, dest);
553
tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
556
ref0 = vec_ld (0, ref);
557
ref1 = vec_ld (15, ref);
559
prev = vec_ld (stride, dest);
560
vec_st (tmp, 0, dest);
561
tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
563
ref0 = vec_ld (0, ref);
564
ref1 = vec_ld (15, ref);
566
prev = vec_ld (2*stride, dest);
567
vec_st (tmp, stride, dest);
569
tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
572
ref0 = vec_ld (0, ref);
573
ref1 = vec_ld (15, ref);
574
prev = vec_ld (stride, dest);
575
vec_st (tmp, 0, dest);
576
tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
577
vec_st (tmp, stride, dest);
580
static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
581
const int stride, int height)
583
vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
585
tmp0 = vec_lvsl (0, ref);
586
tmp0 = vec_mergeh (tmp0, tmp0);
587
perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
588
tmp1 = vec_lvsl (stride, ref);
589
tmp1 = vec_mergeh (tmp1, tmp1);
590
perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
592
height = (height >> 1) - 1;
594
ref0 = vec_ld (0, ref);
595
ref1 = vec_ld (7, ref);
597
prev = vec_ld (0, dest);
598
tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
601
ref0 = vec_ld (0, ref);
602
ref1 = vec_ld (7, ref);
604
prev = vec_ld (stride, dest);
605
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
606
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
608
tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
610
ref0 = vec_ld (0, ref);
611
ref1 = vec_ld (7, ref);
613
prev = vec_ld (stride, dest);
614
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
615
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
617
tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
620
ref0 = vec_ld (0, ref);
621
ref1 = vec_ld (7, ref);
622
prev = vec_ld (stride, dest);
623
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
624
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
626
tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
627
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
628
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
631
static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
632
const int stride, int height)
634
vector_u8_t permA, permB, ref0, ref1, tmp, prev;
636
permA = vec_lvsl (0, ref);
637
permB = vec_add (permA, vec_splat_u8 (1));
639
height = (height >> 1) - 1;
641
ref0 = vec_ld (0, ref);
642
ref1 = vec_ld (16, ref);
643
prev = vec_ld (0, dest);
645
tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
646
vec_perm (ref0, ref1, permB)));
649
ref0 = vec_ld (0, ref);
650
ref1 = vec_ld (16, ref);
652
prev = vec_ld (stride, dest);
653
vec_st (tmp, 0, dest);
654
tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
655
vec_perm (ref0, ref1, permB)));
657
ref0 = vec_ld (0, ref);
658
ref1 = vec_ld (16, ref);
660
prev = vec_ld (2*stride, dest);
661
vec_st (tmp, stride, dest);
663
tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
664
vec_perm (ref0, ref1, permB)));
667
ref0 = vec_ld (0, ref);
668
ref1 = vec_ld (16, ref);
669
prev = vec_ld (stride, dest);
670
vec_st (tmp, 0, dest);
671
tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
672
vec_perm (ref0, ref1, permB)));
673
vec_st (tmp, stride, dest);
676
static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
677
const int stride, int height)
679
vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
682
ones = vec_splat_u8 (1);
683
tmp0 = vec_lvsl (0, ref);
684
tmp0 = vec_mergeh (tmp0, tmp0);
685
perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
686
perm0B = vec_add (perm0A, ones);
687
tmp1 = vec_lvsl (stride, ref);
688
tmp1 = vec_mergeh (tmp1, tmp1);
689
perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
690
perm1B = vec_add (perm1A, ones);
692
height = (height >> 1) - 1;
694
ref0 = vec_ld (0, ref);
695
ref1 = vec_ld (8, ref);
696
prev = vec_ld (0, dest);
698
tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
699
vec_perm (ref0, ref1, perm0B)));
702
ref0 = vec_ld (0, ref);
703
ref1 = vec_ld (8, ref);
705
prev = vec_ld (stride, dest);
706
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
707
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
709
tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
710
vec_perm (ref0, ref1, perm1B)));
712
ref0 = vec_ld (0, ref);
713
ref1 = vec_ld (8, ref);
715
prev = vec_ld (stride, dest);
716
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
717
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
719
tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
720
vec_perm (ref0, ref1, perm0B)));
723
ref0 = vec_ld (0, ref);
724
ref1 = vec_ld (8, ref);
725
prev = vec_ld (stride, dest);
726
vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
727
vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
729
tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
730
vec_perm (ref0, ref1, perm1B)));
731
vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
732
vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
735
static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
736
const int stride, int height)
738
vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
740
perm = vec_lvsl (0, ref);
742
height = (height >> 1) - 1;
744
ref0 = vec_ld (0, ref);
745
ref1 = vec_ld (15, ref);
747
tmp0 = vec_perm (ref0, ref1, perm);
748
ref0 = vec_ld (0, ref);
749
ref1 = vec_ld (15, ref);
751
prev = vec_ld (0, dest);
752
tmp1 = vec_perm (ref0, ref1, perm);
753
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
756
ref0 = vec_ld (0, ref);
757
ref1 = vec_ld (15, ref);
759
prev = vec_ld (stride, dest);
760
vec_st (tmp, 0, dest);
761
tmp0 = vec_perm (ref0, ref1, perm);
762
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
764
ref0 = vec_ld (0, ref);
765
ref1 = vec_ld (15, ref);
767
prev = vec_ld (2*stride, dest);
768
vec_st (tmp, stride, dest);
770
tmp1 = vec_perm (ref0, ref1, perm);
771
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
774
ref0 = vec_ld (0, ref);
775
ref1 = vec_ld (15, ref);
776
prev = vec_ld (stride, dest);
777
vec_st (tmp, 0, dest);
778
tmp0 = vec_perm (ref0, ref1, perm);
779
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
780
vec_st (tmp, stride, dest);
783
static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
784
const int stride, int height)
786
vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
788
tmp0 = vec_lvsl (0, ref);
789
tmp0 = vec_mergeh (tmp0, tmp0);
790
perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
791
tmp1 = vec_lvsl (stride, ref);
792
tmp1 = vec_mergeh (tmp1, tmp1);
793
perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
795
height = (height >> 1) - 1;
797
ref0 = vec_ld (0, ref);
798
ref1 = vec_ld (7, ref);
800
tmp0 = vec_perm (ref0, ref1, perm0);
801
ref0 = vec_ld (0, ref);
802
ref1 = vec_ld (7, ref);
804
prev = vec_ld (0, dest);
805
tmp1 = vec_perm (ref0, ref1, perm1);
806
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
809
ref0 = vec_ld (0, ref);
810
ref1 = vec_ld (7, ref);
812
prev = vec_ld (stride, dest);
813
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
814
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
816
tmp0 = vec_perm (ref0, ref1, perm0);
817
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
819
ref0 = vec_ld (0, ref);
820
ref1 = vec_ld (7, ref);
822
prev = vec_ld (stride, dest);
823
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
824
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
826
tmp1 = vec_perm (ref0, ref1, perm1);
827
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
830
ref0 = vec_ld (0, ref);
831
ref1 = vec_ld (7, ref);
832
prev = vec_ld (stride, dest);
833
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
834
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
836
tmp0 = vec_perm (ref0, ref1, perm0);
837
tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
838
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
839
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
842
static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
843
const int stride, int height)
845
vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
846
vector_u8_t ones, prev;
848
ones = vec_splat_u8 (1);
849
permA = vec_lvsl (0, ref);
850
permB = vec_add (permA, ones);
852
height = (height >> 1) - 1;
854
ref0 = vec_ld (0, ref);
855
ref1 = vec_ld (16, ref);
857
A = vec_perm (ref0, ref1, permA);
858
B = vec_perm (ref0, ref1, permB);
859
avg0 = vec_avg (A, B);
860
xor0 = vec_xor (A, B);
862
ref0 = vec_ld (0, ref);
863
ref1 = vec_ld (16, ref);
865
prev = vec_ld (0, dest);
866
A = vec_perm (ref0, ref1, permA);
867
B = vec_perm (ref0, ref1, permB);
868
avg1 = vec_avg (A, B);
869
xor1 = vec_xor (A, B);
870
tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
871
vec_and (vec_and (ones, vec_or (xor0, xor1)),
872
vec_xor (avg0, avg1))));
875
ref0 = vec_ld (0, ref);
876
ref1 = vec_ld (16, ref);
878
prev = vec_ld (stride, dest);
879
vec_st (tmp, 0, dest);
880
A = vec_perm (ref0, ref1, permA);
881
B = vec_perm (ref0, ref1, permB);
882
avg0 = vec_avg (A, B);
883
xor0 = vec_xor (A, B);
885
vec_sub (vec_avg (avg0, avg1),
886
vec_and (vec_and (ones, vec_or (xor0, xor1)),
887
vec_xor (avg0, avg1))));
889
ref0 = vec_ld (0, ref);
890
ref1 = vec_ld (16, ref);
892
prev = vec_ld (2*stride, dest);
893
vec_st (tmp, stride, dest);
895
A = vec_perm (ref0, ref1, permA);
896
B = vec_perm (ref0, ref1, permB);
897
avg1 = vec_avg (A, B);
898
xor1 = vec_xor (A, B);
900
vec_sub (vec_avg (avg0, avg1),
901
vec_and (vec_and (ones, vec_or (xor0, xor1)),
902
vec_xor (avg0, avg1))));
905
ref0 = vec_ld (0, ref);
906
ref1 = vec_ld (16, ref);
907
prev = vec_ld (stride, dest);
908
vec_st (tmp, 0, dest);
909
A = vec_perm (ref0, ref1, permA);
910
B = vec_perm (ref0, ref1, permB);
911
avg0 = vec_avg (A, B);
912
xor0 = vec_xor (A, B);
913
tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
914
vec_and (vec_and (ones, vec_or (xor0, xor1)),
915
vec_xor (avg0, avg1))));
916
vec_st (tmp, stride, dest);
919
static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
920
const int stride, int height)
922
vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
923
vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
925
ones = vec_splat_u8 (1);
926
perm0A = vec_lvsl (0, ref);
927
perm0A = vec_mergeh (perm0A, perm0A);
928
perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
929
perm0B = vec_add (perm0A, ones);
930
perm1A = vec_lvsl (stride, ref);
931
perm1A = vec_mergeh (perm1A, perm1A);
932
perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
933
perm1B = vec_add (perm1A, ones);
935
height = (height >> 1) - 1;
937
ref0 = vec_ld (0, ref);
938
ref1 = vec_ld (8, ref);
940
A = vec_perm (ref0, ref1, perm0A);
941
B = vec_perm (ref0, ref1, perm0B);
942
avg0 = vec_avg (A, B);
943
xor0 = vec_xor (A, B);
945
ref0 = vec_ld (0, ref);
946
ref1 = vec_ld (8, ref);
948
prev = vec_ld (0, dest);
949
A = vec_perm (ref0, ref1, perm1A);
950
B = vec_perm (ref0, ref1, perm1B);
951
avg1 = vec_avg (A, B);
952
xor1 = vec_xor (A, B);
953
tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
954
vec_and (vec_and (ones, vec_or (xor0, xor1)),
955
vec_xor (avg0, avg1))));
958
ref0 = vec_ld (0, ref);
959
ref1 = vec_ld (8, ref);
961
prev = vec_ld (stride, dest);
962
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
963
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
965
A = vec_perm (ref0, ref1, perm0A);
966
B = vec_perm (ref0, ref1, perm0B);
967
avg0 = vec_avg (A, B);
968
xor0 = vec_xor (A, B);
970
vec_sub (vec_avg (avg0, avg1),
971
vec_and (vec_and (ones, vec_or (xor0, xor1)),
972
vec_xor (avg0, avg1))));
974
ref0 = vec_ld (0, ref);
975
ref1 = vec_ld (8, ref);
977
prev = vec_ld (stride, dest);
978
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
979
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
981
A = vec_perm (ref0, ref1, perm1A);
982
B = vec_perm (ref0, ref1, perm1B);
983
avg1 = vec_avg (A, B);
984
xor1 = vec_xor (A, B);
986
vec_sub (vec_avg (avg0, avg1),
987
vec_and (vec_and (ones, vec_or (xor0, xor1)),
988
vec_xor (avg0, avg1))));
991
ref0 = vec_ld (0, ref);
992
ref1 = vec_ld (8, ref);
993
prev = vec_ld (stride, dest);
994
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
995
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
997
A = vec_perm (ref0, ref1, perm0A);
998
B = vec_perm (ref0, ref1, perm0B);
999
avg0 = vec_avg (A, B);
1000
xor0 = vec_xor (A, B);
1001
tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1002
vec_and (vec_and (ones, vec_or (xor0, xor1)),
1003
vec_xor (avg0, avg1))));
1004
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1005
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1008
MPEG2_MC_EXTERN (altivec)