1146
1124
PetscFunctionReturn(0);
1127
/* ------------------------------------------------------------------------------*/
1129
#define __FUNCT__ "MatMult_SeqMAIJ_9"
1130
int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1132
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1133
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1134
PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1135
int ierr,m = b->AIJ->m,*idx,*ii;
1139
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1140
ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1145
for (i=0; i<m; i++) {
1157
for (j=0; j<n; j++) {
1158
sum1 += v[jrow]*x[9*idx[jrow]];
1159
sum2 += v[jrow]*x[9*idx[jrow]+1];
1160
sum3 += v[jrow]*x[9*idx[jrow]+2];
1161
sum4 += v[jrow]*x[9*idx[jrow]+3];
1162
sum5 += v[jrow]*x[9*idx[jrow]+4];
1163
sum6 += v[jrow]*x[9*idx[jrow]+5];
1164
sum7 += v[jrow]*x[9*idx[jrow]+6];
1165
sum8 += v[jrow]*x[9*idx[jrow]+7];
1166
sum9 += v[jrow]*x[9*idx[jrow]+8];
1180
PetscLogFlops(18*a->nz - 9*m);
1181
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1182
ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1183
PetscFunctionReturn(0);
1187
#define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1188
int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1190
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1191
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1192
PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1193
int ierr,m = b->AIJ->m,n,i,*idx;
1196
ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1197
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1198
ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1200
for (i=0; i<m; i++) {
1201
idx = a->j + a->i[i] ;
1202
v = a->a + a->i[i] ;
1203
n = a->i[i+1] - a->i[i];
1214
y[9*(*idx)] += alpha1*(*v);
1215
y[9*(*idx)+1] += alpha2*(*v);
1216
y[9*(*idx)+2] += alpha3*(*v);
1217
y[9*(*idx)+3] += alpha4*(*v);
1218
y[9*(*idx)+4] += alpha5*(*v);
1219
y[9*(*idx)+5] += alpha6*(*v);
1220
y[9*(*idx)+6] += alpha7*(*v);
1221
y[9*(*idx)+7] += alpha8*(*v);
1222
y[9*(*idx)+8] += alpha9*(*v);
1226
PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1227
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1228
ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1229
PetscFunctionReturn(0);
1233
#define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1234
int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1236
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1237
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1238
PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1239
int ierr,m = b->AIJ->m,*idx,*ii;
1243
if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1244
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1245
ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1250
for (i=0; i<m; i++) {
1262
for (j=0; j<n; j++) {
1263
sum1 += v[jrow]*x[9*idx[jrow]];
1264
sum2 += v[jrow]*x[9*idx[jrow]+1];
1265
sum3 += v[jrow]*x[9*idx[jrow]+2];
1266
sum4 += v[jrow]*x[9*idx[jrow]+3];
1267
sum5 += v[jrow]*x[9*idx[jrow]+4];
1268
sum6 += v[jrow]*x[9*idx[jrow]+5];
1269
sum7 += v[jrow]*x[9*idx[jrow]+6];
1270
sum8 += v[jrow]*x[9*idx[jrow]+7];
1271
sum9 += v[jrow]*x[9*idx[jrow]+8];
1285
PetscLogFlops(18*a->nz);
1286
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1287
ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1288
PetscFunctionReturn(0);
1292
#define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1293
int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1295
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1296
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1297
PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1298
int ierr,m = b->AIJ->m,n,i,*idx;
1301
if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1302
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1303
ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1304
for (i=0; i<m; i++) {
1305
idx = a->j + a->i[i] ;
1306
v = a->a + a->i[i] ;
1307
n = a->i[i+1] - a->i[i];
1318
y[9*(*idx)] += alpha1*(*v);
1319
y[9*(*idx)+1] += alpha2*(*v);
1320
y[9*(*idx)+2] += alpha3*(*v);
1321
y[9*(*idx)+3] += alpha4*(*v);
1322
y[9*(*idx)+4] += alpha5*(*v);
1323
y[9*(*idx)+5] += alpha6*(*v);
1324
y[9*(*idx)+6] += alpha7*(*v);
1325
y[9*(*idx)+7] += alpha8*(*v);
1326
y[9*(*idx)+8] += alpha9*(*v);
1330
PetscLogFlops(18*a->nz);
1331
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1332
ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1333
PetscFunctionReturn(0);
1336
/*--------------------------------------------------------------------------------------------*/
1338
#define __FUNCT__ "MatMult_SeqMAIJ_16"
1339
int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1341
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1342
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1343
PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1344
PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1345
int ierr,m = b->AIJ->m,*idx,*ii;
1349
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1350
ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1355
for (i=0; i<m; i++) {
1374
for (j=0; j<n; j++) {
1375
sum1 += v[jrow]*x[16*idx[jrow]];
1376
sum2 += v[jrow]*x[16*idx[jrow]+1];
1377
sum3 += v[jrow]*x[16*idx[jrow]+2];
1378
sum4 += v[jrow]*x[16*idx[jrow]+3];
1379
sum5 += v[jrow]*x[16*idx[jrow]+4];
1380
sum6 += v[jrow]*x[16*idx[jrow]+5];
1381
sum7 += v[jrow]*x[16*idx[jrow]+6];
1382
sum8 += v[jrow]*x[16*idx[jrow]+7];
1383
sum9 += v[jrow]*x[16*idx[jrow]+8];
1384
sum10 += v[jrow]*x[16*idx[jrow]+9];
1385
sum11 += v[jrow]*x[16*idx[jrow]+10];
1386
sum12 += v[jrow]*x[16*idx[jrow]+11];
1387
sum13 += v[jrow]*x[16*idx[jrow]+12];
1388
sum14 += v[jrow]*x[16*idx[jrow]+13];
1389
sum15 += v[jrow]*x[16*idx[jrow]+14];
1390
sum16 += v[jrow]*x[16*idx[jrow]+15];
1411
PetscLogFlops(32*a->nz - 16*m);
1412
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1413
ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1414
PetscFunctionReturn(0);
1418
#define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1419
int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1421
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1422
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1423
PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1424
PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1425
int ierr,m = b->AIJ->m,n,i,*idx;
1428
ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1429
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1430
ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1432
for (i=0; i<m; i++) {
1433
idx = a->j + a->i[i] ;
1434
v = a->a + a->i[i] ;
1435
n = a->i[i+1] - a->i[i];
1445
alpha10 = x[16*i+9];
1446
alpha11 = x[16*i+10];
1447
alpha12 = x[16*i+11];
1448
alpha13 = x[16*i+12];
1449
alpha14 = x[16*i+13];
1450
alpha15 = x[16*i+14];
1451
alpha16 = x[16*i+15];
1453
y[16*(*idx)] += alpha1*(*v);
1454
y[16*(*idx)+1] += alpha2*(*v);
1455
y[16*(*idx)+2] += alpha3*(*v);
1456
y[16*(*idx)+3] += alpha4*(*v);
1457
y[16*(*idx)+4] += alpha5*(*v);
1458
y[16*(*idx)+5] += alpha6*(*v);
1459
y[16*(*idx)+6] += alpha7*(*v);
1460
y[16*(*idx)+7] += alpha8*(*v);
1461
y[16*(*idx)+8] += alpha9*(*v);
1462
y[16*(*idx)+9] += alpha10*(*v);
1463
y[16*(*idx)+10] += alpha11*(*v);
1464
y[16*(*idx)+11] += alpha12*(*v);
1465
y[16*(*idx)+12] += alpha13*(*v);
1466
y[16*(*idx)+13] += alpha14*(*v);
1467
y[16*(*idx)+14] += alpha15*(*v);
1468
y[16*(*idx)+15] += alpha16*(*v);
1472
PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1473
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1474
ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1475
PetscFunctionReturn(0);
1479
#define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1480
int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1482
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1483
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1484
PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1485
PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1486
int ierr,m = b->AIJ->m,*idx,*ii;
1490
if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1491
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1492
ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1497
for (i=0; i<m; i++) {
1516
for (j=0; j<n; j++) {
1517
sum1 += v[jrow]*x[16*idx[jrow]];
1518
sum2 += v[jrow]*x[16*idx[jrow]+1];
1519
sum3 += v[jrow]*x[16*idx[jrow]+2];
1520
sum4 += v[jrow]*x[16*idx[jrow]+3];
1521
sum5 += v[jrow]*x[16*idx[jrow]+4];
1522
sum6 += v[jrow]*x[16*idx[jrow]+5];
1523
sum7 += v[jrow]*x[16*idx[jrow]+6];
1524
sum8 += v[jrow]*x[16*idx[jrow]+7];
1525
sum9 += v[jrow]*x[16*idx[jrow]+8];
1526
sum10 += v[jrow]*x[16*idx[jrow]+9];
1527
sum11 += v[jrow]*x[16*idx[jrow]+10];
1528
sum12 += v[jrow]*x[16*idx[jrow]+11];
1529
sum13 += v[jrow]*x[16*idx[jrow]+12];
1530
sum14 += v[jrow]*x[16*idx[jrow]+13];
1531
sum15 += v[jrow]*x[16*idx[jrow]+14];
1532
sum16 += v[jrow]*x[16*idx[jrow]+15];
1545
y[16*i+10] += sum11;
1546
y[16*i+11] += sum12;
1547
y[16*i+12] += sum13;
1548
y[16*i+13] += sum14;
1549
y[16*i+14] += sum15;
1550
y[16*i+15] += sum16;
1553
PetscLogFlops(32*a->nz);
1554
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1555
ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1556
PetscFunctionReturn(0);
1560
#define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1561
int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1563
Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1564
Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1565
PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1566
PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1567
int ierr,m = b->AIJ->m,n,i,*idx;
1570
if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1571
ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1572
ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1573
for (i=0; i<m; i++) {
1574
idx = a->j + a->i[i] ;
1575
v = a->a + a->i[i] ;
1576
n = a->i[i+1] - a->i[i];
1586
alpha10 = x[16*i+9];
1587
alpha11 = x[16*i+10];
1588
alpha12 = x[16*i+11];
1589
alpha13 = x[16*i+12];
1590
alpha14 = x[16*i+13];
1591
alpha15 = x[16*i+14];
1592
alpha16 = x[16*i+15];
1594
y[16*(*idx)] += alpha1*(*v);
1595
y[16*(*idx)+1] += alpha2*(*v);
1596
y[16*(*idx)+2] += alpha3*(*v);
1597
y[16*(*idx)+3] += alpha4*(*v);
1598
y[16*(*idx)+4] += alpha5*(*v);
1599
y[16*(*idx)+5] += alpha6*(*v);
1600
y[16*(*idx)+6] += alpha7*(*v);
1601
y[16*(*idx)+7] += alpha8*(*v);
1602
y[16*(*idx)+8] += alpha9*(*v);
1603
y[16*(*idx)+9] += alpha10*(*v);
1604
y[16*(*idx)+10] += alpha11*(*v);
1605
y[16*(*idx)+11] += alpha12*(*v);
1606
y[16*(*idx)+12] += alpha13*(*v);
1607
y[16*(*idx)+13] += alpha14*(*v);
1608
y[16*(*idx)+14] += alpha15*(*v);
1609
y[16*(*idx)+15] += alpha16*(*v);
1613
PetscLogFlops(32*a->nz);
1614
ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1615
ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1616
PetscFunctionReturn(0);
1149
1619
/*===================================================================================*/
1150
1620
#undef __FUNCT__
1151
1621
#define __FUNCT__ "MatMult_MPIMAIJ_dof"