2
(const int M, const int N, const int K, const double alpha, const double *A, const int lda0, const double *B, const int ldb0, const double beta, double *C, const int ldc)
4
register double b0, b1, c0;
11
for (j=0; j != NB; j++)
15
for (k=0; k != KB; k += 2)
19
for (i=0; i != MB; i += 8)
21
pC[i] += pA[i*lda] * b0 + pA[i*lda+1] * b1;
22
pC[i+1] += pA[(i+1)*lda] * b0 + pA[(i+1)*lda+1] * b1;
23
pC[i+2] += pA[(i+2)*lda] * b0 + pA[(i+2)*lda+1] * b1;
24
pC[i+3] += pA[(i+3)*lda] * b0 + pA[(i+3)*lda+1] * b1;
25
pC[i+4] += pA[(i+4)*lda] * b0 + pA[(i+4)*lda+1] * b1;
26
pC[i+5] += pA[(i+5)*lda] * b0 + pA[(i+5)*lda+1] * b1;
27
pC[i+6] += pA[(i+6)*lda] * b0 + pA[(i+6)*lda+1] * b1;
28
pC[i+7] += pA[(i+7)*lda] * b0 + pA[(i+7)*lda+1] * b1;