3
* Automatically Tuned Linear Algebra Software v3.6.0
4
* (C) Copyright 2000 Peter Soendergaard
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions, and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* 3. The name of the ATLAS group or the names of its contributers may
15
* not be used to endorse or promote products derived from this
16
* software without specific written permission.
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
22
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGE.
35
#define gen_vec_rr(op,reg1,reg2) \
36
__asm__ __volatile__ (#op " %%" #reg1 ", %%" #reg2 \
41
#define gen_vec_mr(op,mem,reg) \
42
__asm__ __volatile__ (#op " %0, %%" #reg \
44
: "m" (((mem)[0])), "m" (((mem)[1])))
46
#define gen_vec_rm(op,reg,mem) \
47
__asm__ __volatile__ (#op " %%" #reg ", %0" \
48
: "=m" (((mem)[0])), "=m" (((mem)[1])) \
52
#define vec_mov_mr(mem,reg) gen_vec_mr(movq,mem,reg)
53
#define vec_mov_rm(reg,mem) gen_vec_rm(movq,reg,mem)
54
#define vec_add_rr(reg1,reg2) gen_vec_rr(pfadd,reg1,reg2)
63
float testv1[VECLEN],testv2[VECLEN],testv3[VECLEN];
65
for (i=0;i<VECLEN;i++)
72
__asm__ __volatile__ ("femms");
74
vec_mov_mr(testv1,reg0);
75
vec_mov_mr(testv2,reg1);
76
vec_add_rr(reg1,reg0);
77
vec_mov_rm(reg0,testv3);
79
__asm__ __volatile__ ("femms");
81
for (i=0;i<VECLEN;i++)
83
if (testv3[i]!=(2*i+2))