4
#error "This kernel requires SSE2"
14
#elif defined(ATL_GAS_x8664)
20
#error "This kernel requires x86 assembly!"
35
# byte offset 4 8 16 20
36
# void ATL_UAXPY(const int N, const SCALAR alpha, const TYPE *X, const int incX,
37
# TYPE *Y, const int incY)
39
.global ATL_asmdecor(ATL_UAXPY)
40
ATL_asmdecor(ATL_UAXPY):
48
movlpd OFF+8(%esp), alpha
89
prefetchw PFDIST(Y,N,8)
92
prefetchnta PFDIST(X,N,8)
96
# This loop assumes num of iterations is in Nr