2
* The contents of this file are subject to the Mozilla Public
3
* License Version 1.1 (the "License"); you may not use this file
4
* except in compliance with the License. You may obtain a copy of
5
* the License at http://www.mozilla.org/MPL/
7
* Software distributed under the License is distributed on an "AS
8
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9
* implied. See the License for the specific language governing
10
* rights and limitations under the License.
12
* The Original Code is a SPARC/VIS optimized multiply and add function
14
* The Initial Developer of the Original Code is Sun Microsystems Inc.
15
* Portions created by Sun Microsystems Inc. are
16
* Copyright (C) 1999-2000 Sun Microsystems Inc. All Rights Reserved.
20
* Alternatively, the contents of this file may be used under the
21
* terms of the GNU General Public License Version 2 or later (the
22
* "GPL"), in which case the provisions of the GPL are applicable
23
* instead of those above. If you wish to allow use of your
24
* version of this file only under the terms of the GPL and not to
25
* allow others to use your version of this file under the MPL,
26
* indicate your decision by deleting the provisions above and
27
* replace them with the notice and other provisions required by
28
* the GPL. If you do not delete the provisions above, a recipient
29
* may use your version of this file under either the MPL or the
31
* $Id: mpv_sparc.c,v 1.2 2000/12/08 00:20:36 nelsonb%netscape.com Exp $
34
#include "vis_proto.h"
36
/***************************************************************/
39
typedef unsigned int t_u32;
40
#if defined(__sparcv9)
42
typedef unsigned long t_u64;
44
typedef long long t_s64;
45
typedef unsigned long long t_u64;
49
/***************************************************************/
59
/***************************************************************/
64
#define A_MASK ((1 << A_BITS) - 1)
66
/***************************************************************/
68
static t_u64 mask_cnst[] = {
72
/***************************************************************/
75
t_d64 *py = (t_d64*)y; \
76
t_d64 mask = *((t_d64*)mask_cnst); \
77
t_d64 ca = (1u << 31) - 1; \
78
t_d64 da = (t_d64)a; \
82
/***************************************************************/
84
#define MUL_U32_S64_2(i) \
85
dy.d64 = vis_fxnor(mask, py[i]); \
86
buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \
87
buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
89
#define MUL_U32_S64_2_D(i) \
90
dy.d64 = vis_fxnor(mask, py[i]); \
91
d0 = ca - (t_d64)dy.i32s.i0; \
92
d1 = ca - (t_d64)dy.i32s.i1; \
93
buff[4*(i) ] = (t_s64)(d0 * da); \
94
buff[4*(i)+1] = (t_s64)(d0 * db); \
95
buff[4*(i)+2] = (t_s64)(d1 * da); \
96
buff[4*(i)+3] = (t_s64)(d1 * db)
98
/***************************************************************/
100
#define ADD_S64_U32(i) \
101
s = buff[i] + x[i] + c; \
105
#define ADD_S64_U32_D(i) \
106
s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \
108
uc = ((t_u64)s >> 32)
110
/***************************************************************/
112
#define MUL_U32_S64_8(i) \
114
MUL_U32_S64_2(i+1); \
115
MUL_U32_S64_2(i+2); \
118
#define MUL_U32_S64_D_8(i) \
119
MUL_U32_S64_2_D(i); \
120
MUL_U32_S64_2_D(i+1); \
121
MUL_U32_S64_2_D(i+2); \
124
/***************************************************************/
126
#define ADD_S64_U32_8(i) \
136
#define ADD_S64_U32_D_8(i) \
138
ADD_S64_U32_D(i+1); \
139
ADD_S64_U32_D(i+2); \
140
ADD_S64_U32_D(i+3); \
141
ADD_S64_U32_D(i+4); \
142
ADD_S64_U32_D(i+5); \
143
ADD_S64_U32_D(i+6); \
146
/***************************************************************/
148
t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
150
if (a < (1 << A_BITS)) {
161
} else if (n == 16) {
177
for (i = 0; i < (n+1)/2; i ++) {
182
for (i = 0; i < n; i ++) {
196
da = (t_d64)(a & A_MASK);
197
db = (t_d64)(a >> A_BITS);
204
} else if (n == 16) {
209
da = (t_d64)(a & A_MASK);
210
db = (t_d64)(a >> A_BITS);
220
DEF_VARS(2*BUFF_SIZE);
224
da = (t_d64)(a & A_MASK);
225
db = (t_d64)(a >> A_BITS);
228
for (i = 0; i < (n+1)/2; i ++) {
233
for (i = 0; i < n; i ++) {
242
/***************************************************************/
244
t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
246
return mul_add(x, x, y, n, a);
249
/***************************************************************/