13
13
<a name="line3"> 3: </a><font color="#B22222"> Factorization code for BAIJ format. </font>
14
14
<a name="line4"> 4: </a><font color="#B22222">*/</font>
15
15
<a name="line5"> 5: </a> #include <A href="../../../../../src/mat/impls/baij/seq/baij.h.html">src/mat/impls/baij/seq/baij.h</A>
16
<a name="line6"> 6: </a> #include <A href="../../../../../src/vec/vecimpl.h.html">src/vec/vecimpl.h</A>
17
<a name="line7"> 7: </a> #include <A href="../../../../../src/inline/ilu.h.html">src/inline/ilu.h</A>
18
<a name="line8"> 8: </a><font color="#B22222">/*</font>
19
<a name="line9"> 9: </a><font color="#B22222"> Version for when blocks are 3 by 3 Using natural ordering</font>
20
<a name="line10"> 10: </a><font color="#B22222">*/</font>
21
<a name="line11"> 11: </a><strong><font color="#4169E1"><a name="MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering"></a>int MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(<A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> A,<A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> *B)</font></strong>
22
<a name="line12"> 12: </a>{
23
<a name="line13"> 13: </a> <A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> C = *B;
24
<a name="line14"> 14: </a> Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
25
<a name="line15"> 15: </a> int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
26
<a name="line16"> 16: </a> int *ajtmpold,*ajtmp,nz,row;
27
<a name="line17"> 17: </a> int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
28
<a name="line18"> 18: </a> MatScalar *pv,*v,*rtmp,*pc,*w,*x;
29
<a name="line19"> 19: </a> MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
30
<a name="line20"> 20: </a> MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9;
31
<a name="line21"> 21: </a> MatScalar *ba = b->a,*aa = a->a;
33
<a name="line24"> 24: </a> <A href="../../../../../docs/manualpages/Sys/PetscMalloc.html#PetscMalloc">PetscMalloc</A>(9*(n+1)*<font color="#4169E1">sizeof</font>(MatScalar),&rtmp);
35
<a name="line26"> 26: </a> <font color="#4169E1">for</font> (i=0; i<n; i++) {
36
<a name="line27"> 27: </a> nz = bi[i+1] - bi[i];
37
<a name="line28"> 28: </a> ajtmp = bj + bi[i];
38
<a name="line29"> 29: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
39
<a name="line30"> 30: </a> x = rtmp+9*ajtmp[j];
40
<a name="line31"> 31: </a> x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = 0.0;
41
<a name="line32"> 32: </a> }
42
<a name="line33"> 33: </a> <font color="#B22222">/* load in initial (unfactored row) */</font>
43
<a name="line34"> 34: </a> nz = ai[i+1] - ai[i];
44
<a name="line35"> 35: </a> ajtmpold = aj + ai[i];
45
<a name="line36"> 36: </a> v = aa + 9*ai[i];
46
<a name="line37"> 37: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
47
<a name="line38"> 38: </a> x = rtmp+9*ajtmpold[j];
48
<a name="line39"> 39: </a> x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
49
<a name="line40"> 40: </a> x[4] = v[4]; x[5] = v[5]; x[6] = v[6]; x[7] = v[7]; x[8] = v[8];
50
<a name="line41"> 41: </a> v += 9;
51
<a name="line42"> 42: </a> }
52
<a name="line43"> 43: </a> row = *ajtmp++;
53
<a name="line44"> 44: </a> <font color="#4169E1">while</font> (row < i) {
54
<a name="line45"> 45: </a> pc = rtmp + 9*row;
55
<a name="line46"> 46: </a> p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
56
<a name="line47"> 47: </a> p5 = pc[4]; p6 = pc[5]; p7 = pc[6]; p8 = pc[7]; p9 = pc[8];
57
<a name="line48"> 48: </a> <font color="#4169E1">if</font> (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 ||
58
<a name="line49"> 49: </a> p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0) {
59
<a name="line50"> 50: </a> pv = ba + 9*diag_offset[row];
60
<a name="line51"> 51: </a> pj = bj + diag_offset[row] + 1;
61
<a name="line52"> 52: </a> x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
62
<a name="line53"> 53: </a> x5 = pv[4]; x6 = pv[5]; x7 = pv[6]; x8 = pv[7]; x9 = pv[8];
63
<a name="line54"> 54: </a> pc[0] = m1 = p1*x1 + p4*x2 + p7*x3;
64
<a name="line55"> 55: </a> pc[1] = m2 = p2*x1 + p5*x2 + p8*x3;
65
<a name="line56"> 56: </a> pc[2] = m3 = p3*x1 + p6*x2 + p9*x3;
67
<a name="line58"> 58: </a> pc[3] = m4 = p1*x4 + p4*x5 + p7*x6;
68
<a name="line59"> 59: </a> pc[4] = m5 = p2*x4 + p5*x5 + p8*x6;
69
<a name="line60"> 60: </a> pc[5] = m6 = p3*x4 + p6*x5 + p9*x6;
71
<a name="line62"> 62: </a> pc[6] = m7 = p1*x7 + p4*x8 + p7*x9;
72
<a name="line63"> 63: </a> pc[7] = m8 = p2*x7 + p5*x8 + p8*x9;
73
<a name="line64"> 64: </a> pc[8] = m9 = p3*x7 + p6*x8 + p9*x9;
75
<a name="line66"> 66: </a> nz = bi[row+1] - diag_offset[row] - 1;
76
<a name="line67"> 67: </a> pv += 9;
77
<a name="line68"> 68: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
78
<a name="line69"> 69: </a> x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
79
<a name="line70"> 70: </a> x5 = pv[4]; x6 = pv[5]; x7 = pv[6]; x8 = pv[7]; x9 = pv[8];
80
<a name="line71"> 71: </a> x = rtmp + 9*pj[j];
81
<a name="line72"> 72: </a> x[0] -= m1*x1 + m4*x2 + m7*x3;
82
<a name="line73"> 73: </a> x[1] -= m2*x1 + m5*x2 + m8*x3;
83
<a name="line74"> 74: </a> x[2] -= m3*x1 + m6*x2 + m9*x3;
84
<a name="line75"> 75: </a>
85
<a name="line76"> 76: </a> x[3] -= m1*x4 + m4*x5 + m7*x6;
86
<a name="line77"> 77: </a> x[4] -= m2*x4 + m5*x5 + m8*x6;
87
<a name="line78"> 78: </a> x[5] -= m3*x4 + m6*x5 + m9*x6;
89
<a name="line80"> 80: </a> x[6] -= m1*x7 + m4*x8 + m7*x9;
90
<a name="line81"> 81: </a> x[7] -= m2*x7 + m5*x8 + m8*x9;
91
<a name="line82"> 82: </a> x[8] -= m3*x7 + m6*x8 + m9*x9;
92
<a name="line83"> 83: </a> pv += 9;
93
<a name="line84"> 84: </a> }
94
<a name="line85"> 85: </a> <A href="../../../../../docs/manualpages/Profiling/PetscLogFlops.html#PetscLogFlops">PetscLogFlops</A>(54*nz+36);
95
<a name="line86"> 86: </a> }
96
<a name="line87"> 87: </a> row = *ajtmp++;
97
<a name="line88"> 88: </a> }
98
<a name="line89"> 89: </a> <font color="#B22222">/* finished row so stick it into b->a */</font>
99
<a name="line90"> 90: </a> pv = ba + 9*bi[i];
100
<a name="line91"> 91: </a> pj = bj + bi[i];
101
<a name="line92"> 92: </a> nz = bi[i+1] - bi[i];
102
<a name="line93"> 93: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
103
<a name="line94"> 94: </a> x = rtmp+9*pj[j];
104
<a name="line95"> 95: </a> pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
105
<a name="line96"> 96: </a> pv[4] = x[4]; pv[5] = x[5]; pv[6] = x[6]; pv[7] = x[7]; pv[8] = x[8];
106
<a name="line97"> 97: </a> pv += 9;
107
<a name="line98"> 98: </a> }
108
<a name="line99"> 99: </a> <font color="#B22222">/* invert diagonal block */</font>
109
<a name="line100">100: </a> w = ba + 9*diag_offset[i];
110
<a name="line101">101: </a> Kernel_A_gets_inverse_A_3(w);
111
<a name="line102">102: </a> }
113
<a name="line104">104: </a> <A href="../../../../../docs/manualpages/Sys/PetscFree.html#PetscFree">PetscFree</A>(rtmp);
114
<a name="line105">105: </a> C->factor = FACTOR_LU;
115
<a name="line106">106: </a> C->assembled = PETSC_TRUE;
116
<a name="line107">107: </a> <A href="../../../../../docs/manualpages/Profiling/PetscLogFlops.html#PetscLogFlops">PetscLogFlops</A>(1.3333*27*b->mbs); <font color="#B22222">/* from inverting diagonal blocks */</font>
117
<a name="line108">108: </a> <font color="#4169E1">return</font>(0);
118
<a name="line109">109: </a>}
16
<a name="line6"> 6: </a> #include <A href="../../../../../src/inline/ilu.h.html">src/inline/ilu.h</A>
17
<a name="line7"> 7: </a><font color="#B22222">/*</font>
18
<a name="line8"> 8: </a><font color="#B22222"> Version for when blocks are 3 by 3 Using natural ordering</font>
19
<a name="line9"> 9: </a><font color="#B22222">*/</font>
20
<a name="line12"> 12: </a><strong><font color="#4169E1"><a name="MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering"></a>int MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(<A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> A,<A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> *B)</font></strong>
21
<a name="line13"> 13: </a>{
22
<a name="line14"> 14: </a> <A href="../../../../../docs/manualpages/Mat/Mat.html#Mat">Mat</A> C = *B;
23
<a name="line15"> 15: </a> Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
24
<a name="line16"> 16: </a> int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
25
<a name="line17"> 17: </a> int *ajtmpold,*ajtmp,nz,row;
26
<a name="line18"> 18: </a> int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
27
<a name="line19"> 19: </a> MatScalar *pv,*v,*rtmp,*pc,*w,*x;
28
<a name="line20"> 20: </a> MatScalar p1,p2,p3,p4,m1,m2,m3,m4,m5,m6,m7,m8,m9,x1,x2,x3,x4;
29
<a name="line21"> 21: </a> MatScalar p5,p6,p7,p8,p9,x5,x6,x7,x8,x9;
30
<a name="line22"> 22: </a> MatScalar *ba = b->a,*aa = a->a;
32
<a name="line25"> 25: </a> <A href="../../../../../docs/manualpages/Sys/PetscMalloc.html#PetscMalloc">PetscMalloc</A>(9*(n+1)*<font color="#4169E1">sizeof</font>(MatScalar),&rtmp);
34
<a name="line27"> 27: </a> <font color="#4169E1">for</font> (i=0; i<n; i++) {
35
<a name="line28"> 28: </a> nz = bi[i+1] - bi[i];
36
<a name="line29"> 29: </a> ajtmp = bj + bi[i];
37
<a name="line30"> 30: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
38
<a name="line31"> 31: </a> x = rtmp+9*ajtmp[j];
39
<a name="line32"> 32: </a> x[0] = x[1] = x[2] = x[3] = x[4] = x[5] = x[6] = x[7] = x[8] = 0.0;
40
<a name="line33"> 33: </a> }
41
<a name="line34"> 34: </a> <font color="#B22222">/* load in initial (unfactored row) */</font>
42
<a name="line35"> 35: </a> nz = ai[i+1] - ai[i];
43
<a name="line36"> 36: </a> ajtmpold = aj + ai[i];
44
<a name="line37"> 37: </a> v = aa + 9*ai[i];
45
<a name="line38"> 38: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
46
<a name="line39"> 39: </a> x = rtmp+9*ajtmpold[j];
47
<a name="line40"> 40: </a> x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
48
<a name="line41"> 41: </a> x[4] = v[4]; x[5] = v[5]; x[6] = v[6]; x[7] = v[7]; x[8] = v[8];
49
<a name="line42"> 42: </a> v += 9;
50
<a name="line43"> 43: </a> }
51
<a name="line44"> 44: </a> row = *ajtmp++;
52
<a name="line45"> 45: </a> <font color="#4169E1">while</font> (row < i) {
53
<a name="line46"> 46: </a> pc = rtmp + 9*row;
54
<a name="line47"> 47: </a> p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
55
<a name="line48"> 48: </a> p5 = pc[4]; p6 = pc[5]; p7 = pc[6]; p8 = pc[7]; p9 = pc[8];
56
<a name="line49"> 49: </a> <font color="#4169E1">if</font> (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0 || p5 != 0.0 ||
57
<a name="line50"> 50: </a> p6 != 0.0 || p7 != 0.0 || p8 != 0.0 || p9 != 0.0) {
58
<a name="line51"> 51: </a> pv = ba + 9*diag_offset[row];
59
<a name="line52"> 52: </a> pj = bj + diag_offset[row] + 1;
60
<a name="line53"> 53: </a> x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
61
<a name="line54"> 54: </a> x5 = pv[4]; x6 = pv[5]; x7 = pv[6]; x8 = pv[7]; x9 = pv[8];
62
<a name="line55"> 55: </a> pc[0] = m1 = p1*x1 + p4*x2 + p7*x3;
63
<a name="line56"> 56: </a> pc[1] = m2 = p2*x1 + p5*x2 + p8*x3;
64
<a name="line57"> 57: </a> pc[2] = m3 = p3*x1 + p6*x2 + p9*x3;
66
<a name="line59"> 59: </a> pc[3] = m4 = p1*x4 + p4*x5 + p7*x6;
67
<a name="line60"> 60: </a> pc[4] = m5 = p2*x4 + p5*x5 + p8*x6;
68
<a name="line61"> 61: </a> pc[5] = m6 = p3*x4 + p6*x5 + p9*x6;
70
<a name="line63"> 63: </a> pc[6] = m7 = p1*x7 + p4*x8 + p7*x9;
71
<a name="line64"> 64: </a> pc[7] = m8 = p2*x7 + p5*x8 + p8*x9;
72
<a name="line65"> 65: </a> pc[8] = m9 = p3*x7 + p6*x8 + p9*x9;
74
<a name="line67"> 67: </a> nz = bi[row+1] - diag_offset[row] - 1;
75
<a name="line68"> 68: </a> pv += 9;
76
<a name="line69"> 69: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
77
<a name="line70"> 70: </a> x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
78
<a name="line71"> 71: </a> x5 = pv[4]; x6 = pv[5]; x7 = pv[6]; x8 = pv[7]; x9 = pv[8];
79
<a name="line72"> 72: </a> x = rtmp + 9*pj[j];
80
<a name="line73"> 73: </a> x[0] -= m1*x1 + m4*x2 + m7*x3;
81
<a name="line74"> 74: </a> x[1] -= m2*x1 + m5*x2 + m8*x3;
82
<a name="line75"> 75: </a> x[2] -= m3*x1 + m6*x2 + m9*x3;
83
<a name="line76"> 76: </a>
84
<a name="line77"> 77: </a> x[3] -= m1*x4 + m4*x5 + m7*x6;
85
<a name="line78"> 78: </a> x[4] -= m2*x4 + m5*x5 + m8*x6;
86
<a name="line79"> 79: </a> x[5] -= m3*x4 + m6*x5 + m9*x6;
88
<a name="line81"> 81: </a> x[6] -= m1*x7 + m4*x8 + m7*x9;
89
<a name="line82"> 82: </a> x[7] -= m2*x7 + m5*x8 + m8*x9;
90
<a name="line83"> 83: </a> x[8] -= m3*x7 + m6*x8 + m9*x9;
91
<a name="line84"> 84: </a> pv += 9;
92
<a name="line85"> 85: </a> }
93
<a name="line86"> 86: </a> <A href="../../../../../docs/manualpages/Profiling/PetscLogFlops.html#PetscLogFlops">PetscLogFlops</A>(54*nz+36);
94
<a name="line87"> 87: </a> }
95
<a name="line88"> 88: </a> row = *ajtmp++;
96
<a name="line89"> 89: </a> }
97
<a name="line90"> 90: </a> <font color="#B22222">/* finished row so stick it into b->a */</font>
98
<a name="line91"> 91: </a> pv = ba + 9*bi[i];
99
<a name="line92"> 92: </a> pj = bj + bi[i];
100
<a name="line93"> 93: </a> nz = bi[i+1] - bi[i];
101
<a name="line94"> 94: </a> <font color="#4169E1">for</font> (j=0; j<nz; j++) {
102
<a name="line95"> 95: </a> x = rtmp+9*pj[j];
103
<a name="line96"> 96: </a> pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
104
<a name="line97"> 97: </a> pv[4] = x[4]; pv[5] = x[5]; pv[6] = x[6]; pv[7] = x[7]; pv[8] = x[8];
105
<a name="line98"> 98: </a> pv += 9;
106
<a name="line99"> 99: </a> }
107
<a name="line100">100: </a> <font color="#B22222">/* invert diagonal block */</font>
108
<a name="line101">101: </a> w = ba + 9*diag_offset[i];
109
<a name="line102">102: </a> Kernel_A_gets_inverse_A_3(w);
110
<a name="line103">103: </a> }
112
<a name="line105">105: </a> <A href="../../../../../docs/manualpages/Sys/PetscFree.html#PetscFree">PetscFree</A>(rtmp);
113
<a name="line106">106: </a> C->factor = FACTOR_LU;
114
<a name="line107">107: </a> C->assembled = PETSC_TRUE;
115
<a name="line108">108: </a> <A href="../../../../../docs/manualpages/Profiling/PetscLogFlops.html#PetscLogFlops">PetscLogFlops</A>(1.3333*27*b->mbs); <font color="#B22222">/* from inverting diagonal blocks */</font>
116
<a name="line109">109: </a> <font color="#4169E1">return</font>(0);
117
<a name="line110">110: </a>}