217
217
#if defined(ATL_MULADD) && ATL_mmnreg >= 29
218
static void axpy_16(const int N, const SCALAR alpha, const TYPE *x, TYPE *y)
218
static void axpy_16(ATL_CINT N, const SCALAR alpha, const TYPE *x, TYPE *y)
220
220
* 4 register prefetch on X (assumed to be in L1), 16 register prefetch on
221
221
* Y (L2 or main), with 8-cycle muladd. Unrolled by 16 to ensure multiple
222
222
* cacheline usage for both single and double.
227
227
const TYPE *stX = x + N16 - 32;
228
228
const register TYPE alp = alpha;
229
229
register TYPE m0, m1, m2, m3, m4, m5, m6, m7;