~ubuntu-branches/ubuntu/utopic/brian/utopic

« back to all changes in this revision

Viewing changes to dev/ideas/morphology/Remy/spatialstateupdater_linear.py

  • Committer: Package Import Robot
  • Author(s): Yaroslav Halchenko
  • Date: 2014-07-30 11:29:44 UTC
  • mfrom: (6.2.1 sid)
  • Revision ID: package-import@ubuntu.com-20140730112944-x43rsnf5hack4mhx
Tags: 1.4.1-2
* Forgotten upload to unstable
* debian/control
  - policy boost to 3.9.5
  - updated Vcs- fields given migration to anonscm.d.o and provided -b
    debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
import pycuda.driver as cuda
 
2
import pycuda.autoinit
 
3
from pycuda.compiler import SourceModule
 
4
 
 
5
from morphology import Morphology
 
6
from brian.stdunits import *
 
7
from brian.units import *
 
8
from brian.reset import NoReset
 
9
from brian.stateupdater import StateUpdater
 
10
from gpustateupdater import *
 
11
from brian.inspection import *
 
12
from brian.optimiser import *
 
13
from itertools import count
 
14
from brian.neurongroup import NeuronGroup
 
15
from scipy.linalg import solve_banded
 
16
from numpy import zeros, ones, isscalar, diag_indices
 
17
from numpy.linalg import solve
 
18
from brian.clock import guess_clock
 
19
from brian.equations import Equations
 
20
import functools
 
21
import warnings
 
22
from math import ceil, log
 
23
from scipy import weave
 
24
from time import time
 
25
import trace
 
26
 
 
27
try:
 
28
    import sympy
 
29
    use_sympy = True
 
30
except:
 
31
    warnings.warn('sympy not installed: some features in SpatialNeuron will not be available')
 
32
    use_sympy = False
 
33
 
 
34
__all__ = ['LinearSpatialStateUpdater']
 
35
 
 
36
class LinearSpatialStateUpdater(StateUpdater):
 
37
        """
 
38
        State updater for compartmental models.
 
39
 
 
40
        """
 
41
        def __init__(self, neuron, clock=None):
 
42
                self.eqs = neuron._eqs
 
43
                self.neuron = neuron
 
44
                self._isprepared = False
 
45
                self.first_step = True
 
46
                self._state_updater=neuron._state_updater # to update the currents
 
47
                self.first_test_gtot=True
 
48
                self.callcount=0
 
49
                
 
50
 
 
51
        def prepare_branch(self, morphology, mid_diameter,ante=0):
 
52
                '''
 
53
                1) fill neuron.branches and neuron.index with information about the morphology of the neuron
 
54
                2) change some wrong values in Aplus and Aminus. Indeed these were correct only if the neuron is linear.
 
55
                        Knowledge of the morphology gives correct values
 
56
                3) fill neuron.bc (boundary conditions)
 
57
                '''
 
58
                branch = morphology.branch()
 
59
                i=branch._origin
 
60
                j= i + len(branch) - 2
 
61
                endpoint = j + 1
 
62
                self.neuron.index[i:endpoint+1] = self.neuron.BPcount
 
63
                children_number = 0
 
64
                
 
65
                for x in (morphology.children):#parent of segment n isnt always n-1 at branch points. We need to change Aplus and Aminus
 
66
                        #gc = 2 * msiemens/cm**2
 
67
                        startpoint = x._origin
 
68
                        mid_diameter[startpoint] = .5*(self.neuron.diameter[endpoint]+self.neuron.diameter[startpoint])
 
69
                        self.Aminus[startpoint]=mid_diameter[startpoint]**2/(4*self.neuron.diameter[startpoint]*self.neuron.length[startpoint]**2*self.neuron.Ri)
 
70
                        if endpoint>0:
 
71
                                self.Aplus[startpoint]=mid_diameter[startpoint]**2/(4*self.neuron.diameter[endpoint]*self.neuron.length[endpoint]**2*self.neuron.Ri)
 
72
                        #else :
 
73
                                #self.Aplus[startpoint]=gc
 
74
                                #self.Aminus[startpoint]=gc
 
75
                                
 
76
                        
 
77
                        children_number+=1
 
78
                
 
79
                
 
80
                pointType = self.neuron.bc[endpoint]
 
81
                hasChild = (children_number>0)
 
82
                if (not hasChild) and (pointType == 1):
 
83
                        self.neuron.bc[endpoint] = self.neuron.bc_type  
 
84
                
 
85
                pointType = self.neuron.bc[endpoint]
 
86
                index_ante = self.neuron.index[ante]
 
87
                self.neuron.branches.append((i,j,endpoint,ante,index_ante,pointType))
 
88
                self.neuron.BPcount += 1
 
89
 
 
90
                if (j-i+2)>1:   #j-i+2 = len(branch)
 
91
                        self.neuron.long_branches_count +=1
 
92
                        #initialize ab
 
93
                        self.ab[0,i:j]= self.Aplus[i+1:j+1]
 
94
                        self.ab0[i:j] = self.Aplus[i+1:j+1]
 
95
                        self.ab[2,i+1:j+1]= self.Aminus[i+1:j+1]
 
96
                        self.ab2[i+1:j+1] = self.Aminus[i+1:j+1]
 
97
                        
 
98
                        #initialize bL
 
99
                        VL0 = 1 * volt
 
100
                        self.bL[i] = (- VL0 * self.Aminus[i])
 
101
                        
 
102
                        #initialize bR
 
103
                        VR0 = 1 * volt
 
104
                        self.bR[j] = (- VR0 * self.Aplus[j+1])
 
105
        
 
106
                for x in (morphology.children):
 
107
                        self.prepare_branch(x,mid_diameter,endpoint)
 
108
                
 
109
        def prepare(self):
 
110
                '''
 
111
                From Hines 1984 paper, discrete formula is:
 
112
                A_plus*V(i+1)-(A_plus+A_minus)*V(i)+A_minus*V(i-1)=Cm/dt*(V(i,t+dt)-V(i,t))+gtot(i)*V(i)-I0(i)
 
113
       
 
114
                A_plus: i->i+1
 
115
                A_minus: i->i-1
 
116
                
 
117
        This gives the following tridiagonal system:
 
118
        A_plus*V(i+1)-(Cm/dt+gtot(i)+A_plus+A_minus)*V(i)+A_minus*V(i-1)=-Cm/dt*V(i,t)-I0(i)
 
119
        
 
120
        Boundaries, one simple possibility (sealed ends):
 
121
        -(Cm/dt+gtot(n)+A_minus)*V(n)+A_minus*V(n-1)=-Cm/dt*V(n,t)-I0(n)
 
122
        A_plus*V(1)-(Cm/dt+gtot(0)+A_plus)*V(0)=-Cm/dt*V(0,t)-I0(0)
 
123
        '''
 
124
                mid_diameter = zeros(len(self.neuron)) # mid(i) : (i-1) <-> i
 
125
                mid_diameter[1:] = .5*(self.neuron.diameter[:-1]+self.neuron.diameter[1:])
 
126
                
 
127
                self.Aplus = zeros(len(self.neuron)) # A+ i -> j = Aplus(j)
 
128
                self.Aminus = zeros(len(self.neuron)) # A- i <- j = Aminus(j)
 
129
                self.Aplus[1]= mid_diameter[1]**2/(4*self.neuron.diameter[1]*self.neuron.length[1]**2*self.neuron.Ri)
 
130
                self.Aplus[2:]=mid_diameter[2:]**2/(4*self.neuron.diameter[1:-1]*self.neuron.length[1:-1]**2*self.neuron.Ri)
 
131
                self.Aminus[1:]=mid_diameter[1:]**2/(4*self.neuron.diameter[1:]*self.neuron.length[1:]**2*self.neuron.Ri) 
 
132
                
 
133
                self.neuron.index = zeros(len(self.neuron),int) # gives the index of the branch containing the current compartment
 
134
                self.neuron.branches = [] # (i,j,bp,ante,ante_index,pointType)
 
135
                # i is the first compartment
 
136
                # bp is the last, a branch point
 
137
                # j is the end of the "inner branch". j = bp-1
 
138
                # ante is the branch point to which i is connected
 
139
                
 
140
                self.neuron.BPcount = 0 # number of branch points (or branches). = len(self.neuron.branches)
 
141
                self.neuron.long_branches_count = 0 # number of branches with len(branch) > 1
 
142
                
 
143
                #self.vL = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
144
                #self.vR = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
145
                #self.d = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
146
                
 
147
                self.bL = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
148
                self.bR = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
149
                #self.bd = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
150
                self.ab = zeros((3,len(self.neuron)))
 
151
                self.ab0 = zeros(len(self.neuron))
 
152
                self.ab1 = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
 
153
                self.ab2 = zeros(len(self.neuron))
 
154
                self.ab1_base = zeros(len(self.neuron))
 
155
                #self.res = cuda.pagelocked_zeros((3 * len(self.neuron)),numpy.float64)
 
156
                
 
157
                self.mTrunc = 0 # used to truncate vL and vR
 
158
                self.delta_list = zeros(len(self.neuron)) #used to find mTrunc
 
159
                
 
160
                # prepare_branch : fill neuron.index, neuron.branches, changes Aplus & Aminus
 
161
                self.prepare_branch(self.neuron.morphology, mid_diameter,0)
 
162
                
 
163
                # linear system P V = B used to deal with the voltage at branch points and take boundary conditions into account.
 
164
                self.P = zeros((self.neuron.BPcount,self.neuron.BPcount))
 
165
                self.B = zeros(self.neuron.BPcount)
 
166
                self.solution_bp = zeros(self.neuron.BPcount)
 
167
                
 
168
                self.gtot = zeros(len(self.neuron))
 
169
                self.I0 = zeros(len(self.neuron))
 
170
                self.i_list = []
 
171
                self.j_list = []
 
172
                self.i_list_bis = []
 
173
                self.j_list_bis = []
 
174
                new_tridiag = True
 
175
                self.bp_list = []
 
176
                self.pointType_list = []
 
177
                self.pointTypeAnte_list = []
 
178
                self.index_ante_list0 = []
 
179
                self.index_ante_list1 = []
 
180
                self.index_ante_list2 = []
 
181
                self.ante_list = []
 
182
                self.post_list = []
 
183
                self.ante_list_idx = []
 
184
                self.post_list_idx = []
 
185
                self.id = []
 
186
                self.test_list = []
 
187
                temp = zeros(self.neuron.BPcount)
 
188
                self.ind0 = []
 
189
                self.ind_bctype_0 = []
 
190
                for index,(i,j,bp,ante,index_ante,pointType) in enumerate(self.neuron.branches) :
 
191
                        self.i_list.append(i)
 
192
                        self.j_list.append(j)
 
193
                        if new_tridiag:
 
194
                                self.i_list_bis.append(i)
 
195
                                ii = i
 
196
                        else:
 
197
                                ii = self.i_list[-1]
 
198
                        if j-ii+1>2:
 
199
                                self.j_list_bis.append(j)
 
200
                                new_tridiag = True
 
201
                        else :
 
202
                                new_tridiag = False
 
203
                        self.bp_list.append(bp)
 
204
                        self.pointType_list.append(max(1,pointType))
 
205
                        self.pointTypeAnte_list.append(max(1,self.neuron.bc[ante]))
 
206
                        temp[index] = index_ante
 
207
                        self.id.append(index)
 
208
                        if (j-i+2>1):
 
209
                                self.test_list.append(1)
 
210
                        else :
 
211
                                self.test_list.append(0)
 
212
                        for x in xrange(j-i+2):
 
213
                                self.ante_list.append(ante)
 
214
                                self.post_list.append(bp)
 
215
                                self.ante_list_idx.append(index_ante)
 
216
                                self.post_list_idx.append(index)
 
217
                        if index_ante == 0 and index != 0:
 
218
                                self.ind0.append(index)
 
219
                        if pointType==0 :
 
220
                                self.ind_bctype_0.append(bp)
 
221
                
 
222
                self.ante_arr = numpy.array(self.ante_list_idx)
 
223
                self.post_arr = numpy.array(self.post_list_idx)
 
224
                
 
225
                self.index_ante_list1, self.ind1 = numpy.unique(temp,return_index=True)
 
226
                self.ind1 = numpy.sort(self.ind1)
 
227
                self.index_ante_list1 = temp[self.ind1]
 
228
                self.index_ante_list1 = list(self.index_ante_list1)
 
229
                self.ind2 = []
 
230
                for x in xrange(self.neuron.BPcount):
 
231
                        self.ind2.append(x)
 
232
                self.ind2 = numpy.delete(self.ind2,self.ind1,None) 
 
233
                self.ind2 = numpy.setdiff1d(self.ind2, self.ind0, assume_unique=True)
 
234
                self.index_ante_list2 = temp[self.ind2]
 
235
                self.index_ante_list2 = list(self.index_ante_list2)
 
236
                
 
237
                self.index_ante_list = list(temp)
 
238
                self.Aminus_bp = self.Aminus[self.bp_list]
 
239
                self.Aminus_bp [:] *= self.pointType_list[:]
 
240
                self.Aplus_i = self.Aplus[self.i_list]
 
241
                self.Aplus_i[:] *= self.pointTypeAnte_list[:]
 
242
                #--------------------------------------------------------GPU------------------------
 
243
                n = len(self.neuron)
 
244
                
 
245
                mod = SourceModule("""
 
246
        __global__ void updateAB_gtot(double *ab1, double *ab1_base, double *gtot)
 
247
        { 
 
248
          int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
249
          
 
250
          ab1[idx] = ab1_base[idx] - gtot[idx];
 
251
          ab1[idx+gridDim.x] = ab1_base[idx] - gtot[idx];
 
252
          ab1[idx+2*gridDim.x] = ab1_base[idx] - gtot[idx];
 
253
        }
 
254
        
 
255
        __global__ void updateBD(double *bd, double *Cm, double dt,double *v, double *I0)
 
256
        { 
 
257
          int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
258
          
 
259
          bd[idx] = - Cm[idx] / dt * v[idx] - I0[idx];
 
260
        }
 
261
        
 
262
        __global__ void finalizeFun(double *v, double *v_bp, int *ante,int *post, double *b, int m)
 
263
        { 
 
264
          int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
265
          int idx_a = ante[idx];
 
266
          int idx_p = post[idx];
 
267
          
 
268
          v[idx] = b[idx + m] * v_bp[idx_a] + b[idx + 2*m] * v_bp[idx_p] + b[idx]; // vL, vR, d
 
269
        }
 
270
        
 
271
        __global__ void finalizeFunBis(double *v, double *v_bp, int *GPU_data_int)
 
272
        { 
 
273
          int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
274
          int bp = GPU_data_int[4 * idx + 3]; 
 
275
          v[bp] = v_bp[idx];
 
276
        }
 
277
        
 
278
        __global__ void initPB(double *P, double *B, int BPcount)
 
279
        {
 
280
                int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
281
                int idy = threadIdx.x + blockIdx.y * blockDim.y;
 
282
                
 
283
                P[idx + idy * BPcount] = 0.0;
 
284
                B[idx] = 0.0;
 
285
        }
 
286
        
 
287
        //GPU_data_int is : ante_list, i_list, j_list, bp_list
 
288
        //GPU_data_double is : test_list, Aplus, Aminus
 
289
        __global__ void fillPB(double *P, double *B, double *b, double *Cm_l, double *gtot_l, int *GPU_data_int,
 
290
                        double *GPU_data_double, double *I0_l, double *v_l, int BPcount, double dt, int m)
 
291
        { 
 
292
          int idx = threadIdx.x + blockIdx.x * blockDim.x;
 
293
          int idx_ante = GPU_data_int[4 * idx];
 
294
          int i = GPU_data_int[4 * idx + 1];
 
295
          int j = GPU_data_int[4 * idx + 2];
 
296
          int bp = GPU_data_int[4 * idx + 3];
 
297
          double test = GPU_data_double[3 * idx];
 
298
          double Aplus = GPU_data_double[3 * idx + 1];
 
299
          double Aminus = GPU_data_double[3 * idx + 2];
 
300
          double Cm = Cm_l[bp];
 
301
          double gtot = gtot_l[bp];
 
302
          double I0 = I0_l[bp];
 
303
          double v_bp = v_l[bp];
 
304
          double vLright = b[j + m] * test;
 
305
          double vRleft = b[i + 2*m] * test;
 
306
          double vRright = b[j + 2*m] * test;
 
307
          double dright = b[j] * test;
 
308
          
 
309
          B[idx] += -Cm/dt * v_bp - I0 -Aminus * dright;
 
310
          P[idx * BPcount + idx] += -Cm/dt - gtot + Aminus * (vRright - 1.0);
 
311
          P[idx * BPcount + idx_ante] += Aminus * vLright;
 
312
          P[idx_ante * BPcount + idx] += Aplus * vRleft;
 
313
        }
 
314
        
 
315
        __global__ void fillPB_bis(double *P, double *B, double *b, int *GPU_data_int, double *GPU_data_double,
 
316
                        int BPcount, int *indices, int m)
 
317
        { 
 
318
          int idx_temp = threadIdx.x + blockIdx.x * blockDim.x;
 
319
          int idx = indices[idx_temp];
 
320
          int idx_ante = GPU_data_int[4 * idx];
 
321
          int i = GPU_data_int[4 * idx + 1];
 
322
          int test = GPU_data_double[3 * idx];
 
323
          double Aplus = GPU_data_double[3 * idx + 1];
 
324
          double vLleft = b[i + m] * test;
 
325
          double dleft = b[i] * test;
 
326
          
 
327
          B[idx_ante] += - Aplus * dleft;
 
328
          P[idx_ante * (BPcount + 1)] += Aplus * (vLleft - 1.0);
 
329
        }
 
330
        
 
331
        __global__ void badFillPB_0(double *P, double *B, double *b, int *GPU_data_int, double *GPU_data_double,
 
332
                        int *indices, double *Cm_l, double *gtot_l, double *I0_l, double *v_l, int len_indices, int m, double dt)
 
333
        { 
 
334
          double Cm = Cm_l[0];
 
335
          double gtot = gtot_l[0];
 
336
          double I0 = I0_l[0];
 
337
          double v_0 = v_l[0];
 
338
          
 
339
          B[0] = - Cm/dt * v_0 - I0;
 
340
                  P[0] = - Cm/dt - gtot;
 
341
                  
 
342
                  int idx;
 
343
                  int i;
 
344
                  int test;
 
345
                  double Aplus;
 
346
                  double vLleft;
 
347
                  double dleft;
 
348
          for (int idx_temp=0;idx_temp<len_indices;idx_temp++)
 
349
          {
 
350
                idx = indices[idx_temp];
 
351
                i = GPU_data_int[4 * idx + 1];
 
352
                test = GPU_data_double[3 * idx];
 
353
                Aplus = GPU_data_double[3 * idx + 1];
 
354
                vLleft = b[i + m] * test;
 
355
                dleft = b[i] * test;
 
356
          
 
357
                        P[0] += Aplus * (vLleft - 1.0);
 
358
                        B[0] += - Aplus * dleft;
 
359
                  }
 
360
        }
 
361
        
 
362
        __global__ void resetPB_type0(double *P, double *B, double *v, int *indices,int BPcount)
 
363
        {
 
364
                int idx = indices[threadIdx.x] + blockIdx.x * blockDim.x;
 
365
                int idy = threadIdx.x + blockIdx.y * blockDim.y;
 
366
                
 
367
                P[idx + idy * BPcount] = 0.0;
 
368
                P[idx + idx * BPcount] = 1.0;
 
369
                B[idx] = v[idx];
 
370
        }
 
371
        """)
 
372
                
 
373
                
 
374
                self.updateAB_gtot = mod.get_function("updateAB_gtot")
 
375
                self.updateAB_gtot.prepare(["P","P",'P'],block=(1,1,1))
 
376
                
 
377
                self.updateBD = mod.get_function("updateBD")
 
378
                self.updateBD.prepare(["P","P",'d','P','P'],block=(1,1,1))
 
379
                
 
380
                self.finalizeFun = mod.get_function("finalizeFun")
 
381
                self.finalizeFun.prepare(['P','P','P','P','P','i'],block=(1,1,1))
 
382
                
 
383
                self.finalizeFunBis = mod.get_function("finalizeFunBis")
 
384
                self.finalizeFunBis.prepare(['P','P','P'],block=(1,1,1))
 
385
                
 
386
                self.initPB = mod.get_function("initPB")
 
387
                self.initPB.prepare(['P',"P",'i'],block=(1,1,1))
 
388
                
 
389
                self.fillPB = mod.get_function("fillPB")
 
390
                self.fillPB.prepare(["P","P",'P',"P",'P','P','P','P','P','i','d','i'],block=(1,1,1))
 
391
                
 
392
                self.fillPB_bis = mod.get_function("fillPB_bis")
 
393
                self.fillPB_bis.prepare(["P","P",'P',"P",'P','i','P','i'],block=(1,1,1))
 
394
                
 
395
                self.badFillPB_0 = mod.get_function("badFillPB_0")
 
396
                self.badFillPB_0.prepare(["P","P",'P',"P",'P','P','P','P','P',"P",'i','i','d'],block=(1,1,1))
 
397
                
 
398
                self.resetPB_type0 = mod.get_function("resetPB_type0")
 
399
                self.resetPB_type0.prepare(['P',"P",'P','P','i'],block=(1,1,1))
 
400
                
 
401
                dtype = numpy.dtype(numpy.float64)
 
402
                int_type = numpy.dtype(numpy.int32)
 
403
                
 
404
                self.P_gpu = cuda.mem_alloc(self.P.size * dtype.itemsize)
 
405
                self.B_gpu = cuda.mem_alloc(self.B.size * dtype.itemsize)
 
406
                
 
407
                GPU_data_int = zeros((self.neuron.BPcount,4))
 
408
                GPU_data_double = zeros((self.neuron.BPcount,3))
 
409
                GPU_data_int[:,0] = self.index_ante_list[:]
 
410
                GPU_data_int[:,1] = self.i_list[:]
 
411
                GPU_data_int[:,2] = self.j_list[:]
 
412
                GPU_data_int[:,3] = self.bp_list[:]
 
413
                GPU_data_double[:,0] = self.test_list[:]
 
414
                GPU_data_double[:,1] = self.Aplus_i[:]
 
415
                GPU_data_double[:,2] = self.Aminus_bp[:]
 
416
                self.GPU_data_int = cuda.mem_alloc(4 * self.neuron.BPcount * int_type.itemsize)
 
417
                self.GPU_data_double = cuda.mem_alloc(3 * self.neuron.BPcount * dtype.itemsize)
 
418
                cuda.memcpy_htod(self.GPU_data_int,GPU_data_int.astype(numpy.int32))
 
419
                cuda.memcpy_htod(self.GPU_data_double,GPU_data_double.astype(numpy.float64))
 
420
                
 
421
                self.ind0_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
 
422
                cuda.memcpy_htod(self.ind0_gpu,numpy.array(self.ind0,numpy.int32))
 
423
                
 
424
                self.ind1_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
 
425
                cuda.memcpy_htod(self.ind1_gpu,numpy.array(self.ind1,numpy.int32))
 
426
                
 
427
                self.ind2_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
 
428
                cuda.memcpy_htod(self.ind2_gpu,numpy.array(self.ind2,numpy.int32))
 
429
                
 
430
                self.ind_bctype_0_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
 
431
                cuda.memcpy_htod(self.ind_bctype_0_gpu,numpy.array(self.ind_bctype_0,numpy.int32))
 
432
                
 
433
                self.ab1_base_gpu =  cuda.mem_alloc(n * dtype.itemsize)
 
434
                
 
435
                self.ab1_gpu =  cuda.mem_alloc(3 * n * dtype.itemsize)
 
436
                self.ab1_gpu_ptr = int(self.ab1_gpu)
 
437
                
 
438
                self.Cm_gpu =  cuda.mem_alloc(n * dtype.itemsize)
 
439
                cuda.memcpy_htod(self.Cm_gpu,self.neuron.Cm.astype(numpy.float64))
 
440
                
 
441
                self.gtot_gpu =  cuda.mem_alloc(n * dtype.itemsize)
 
442
                
 
443
                self.I0_gpu = cuda.mem_alloc(n * dtype.itemsize)
 
444
                
 
445
                self.v_gpu = cuda.mem_alloc(n * dtype.itemsize)
 
446
                cuda.memcpy_htod(self.v_gpu,self.neuron.v)
 
447
                
 
448
                ab0 = zeros(3*n).astype(numpy.float64)
 
449
                ab0[:n] = self.ab0[:]
 
450
                ab0[n:2*n] = self.ab0[:]
 
451
                ab0[2*n:3*n] = self.ab0[:]
 
452
                
 
453
                ab2 = zeros(3*n).astype(numpy.float64)
 
454
                ab2[:n] = self.ab2[:]
 
455
                ab2[n:2*n] = self.ab2[:]
 
456
                ab2[2*n:3*n] = self.ab2[:]
 
457
                
 
458
                dtype = numpy.dtype(numpy.float64)
 
459
                
 
460
                self.ab0_gpu =  cuda.mem_alloc(ab0.size * dtype.itemsize)
 
461
                self.ab0_gpu_ptr = int(self.ab0_gpu)
 
462
                self.ab2_gpu =  cuda.mem_alloc(ab2.size * dtype.itemsize)
 
463
                self.ab2_gpu_ptr = int(self.ab2_gpu)
 
464
                
 
465
                self.bL_gpu =  cuda.mem_alloc(self.bL.size * dtype.itemsize)
 
466
                self.bL_gpu_ptr = int(self.bL_gpu)
 
467
                self.bR_gpu =  cuda.mem_alloc(self.bR.size * dtype.itemsize)
 
468
                self.bR_gpu_ptr = int(self.bR_gpu)
 
469
                
 
470
                self.b_gpu =  cuda.mem_alloc(3 * self.bR.size * dtype.itemsize)
 
471
                self.b_gpu_ptr = int(self.b_gpu) # bd + bL + bR -> vd + vL + vR
 
472
                
 
473
                cuda.memcpy_htod(self.ab0_gpu, ab0)
 
474
                cuda.memcpy_htod(self.ab2_gpu, ab2)
 
475
                cuda.memcpy_htod(self.bL_gpu, self.bL)
 
476
                cuda.memcpy_htod(self.bR_gpu, self.bR)
 
477
                
 
478
                self.ante_gpu = cuda.mem_alloc(self.ante_arr.size * self.ante_arr.dtype.itemsize)
 
479
                self.post_gpu = cuda.mem_alloc(self.ante_arr.size * self.ante_arr.dtype.itemsize)
 
480
                self.v_old_gpu = cuda.mem_alloc(self.neuron.v.size * dtype.itemsize)
 
481
                
 
482
                cuda.memcpy_htod(self.ante_gpu,self.ante_arr)
 
483
                cuda.memcpy_htod(self.post_gpu,self.post_arr)
 
484
                
 
485
                
 
486
                #----------------------------------------------------------------------------------
 
487
                
 
488
                self.v_branchpoints = zeros(self.neuron.BPcount)
 
489
                self.v_bp_gpu = cuda.mem_alloc(self.v_branchpoints.size * dtype.itemsize)
 
490
                
 
491
                self.timeDevice = [0]
 
492
                self.timeDeviceU = [0]
 
493
                self.timeDeviceT = [0]
 
494
                self.timeHost = [0]
 
495
                self.timeUpdater = [0]
 
496
                self.timeSolveHost = [0]
 
497
                self.timeFillHost = [0]
 
498
                self.timeFin = [0]
 
499
                
 
500
        def step(self, neuron):
 
501
                
 
502
                startDevice = time()
 
503
                start = time()
 
504
                
 
505
                if self.first_test_gtot:
 
506
                        self.first_test_gtot=False
 
507
                        neuron._gtot = ones(len(neuron)) * neuron._gtot
 
508
                if self.first_step :
 
509
                        self.gtot = neuron._gtot
 
510
                        cuda.memcpy_htod(self.gtot_gpu,self.gtot)
 
511
                        self.update_ab_base_gpu()
 
512
                        self.update_ab_gtot_gpu()
 
513
                        
 
514
                if self.neuron.changed :
 
515
                        self.I0 = neuron._I0
 
516
                        cuda.memcpy_htod(self.I0_gpu,self.I0)
 
517
                
 
518
                self.update_bd_gpu()
 
519
                
 
520
                end = time()
 
521
                self.timeDeviceU.append(self.timeDeviceU[-1] + end - start)
 
522
                
 
523
                start = time()
 
524
                
 
525
                if self.first_step :
 
526
                        self.calculate_vd_vL_vR_gpu()
 
527
                else:
 
528
                        self.calculate_vd_gpu()
 
529
                
 
530
                end = time()
 
531
                self.timeDeviceT.append(self.timeDeviceT[-1] + end - start)
 
532
                endDevice = time()
 
533
                self.timeDevice.append(self.timeDevice[-1] + endDevice-startDevice)
 
534
 
 
535
                startHost = time()
 
536
                
 
537
                #-----------------------------------------------------fill P and B----------------------------------
 
538
                # 1) initPB reset P and B. every value is 0.
 
539
                # 2) fillPB calculates the coefficients of P and B where there is no lock issue.
 
540
                # 3) fillPB_bis does the same on two exclusive lists of indexes in P and B, ind1 and ind2 to avoid lock issues
 
541
                # 3) badFillPB_0 ajusts the value in the particular case of 0. this one is not parallel but not big.
 
542
                start = time()
 
543
                
 
544
                self.initPB.prepared_call((neuron.BPcount, neuron.BPcount),self.P_gpu, self.B_gpu, numpy.int32(neuron.BPcount))
 
545
                
 
546
                self.fillPB.prepared_call((neuron.BPcount,1),self.P_gpu, self.B_gpu, self.b_gpu,
 
547
                                                                self.Cm_gpu, self.gtot_gpu, self.GPU_data_int, self.GPU_data_double, self.I0_gpu, self.v_gpu,
 
548
                                                                numpy.int32(neuron.BPcount), self.neuron.clock.dt, numpy.int32(len(neuron)))
 
549
                
 
550
                self.fillPB_bis.prepared_call((len(self.ind1),1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
 
551
                                                                        numpy.int32(neuron.BPcount), self.ind1_gpu, numpy.int32(len(neuron)))
 
552
                if len(self.ind2)>0:
 
553
                        self.fillPB_bis.prepared_call((len(self.ind2),1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
 
554
                                                                        numpy.int32(neuron.BPcount), self.ind2_gpu, numpy.int32(len(neuron)))
 
555
                        
 
556
                self.badFillPB_0.prepared_call((1,1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
 
557
                                                                        self.ind0_gpu, self.Cm_gpu, self.gtot_gpu, self.I0_gpu, self.v_gpu, numpy.int32(len(self.ind0)),
 
558
                                                                        numpy.int32(len(neuron)),neuron.clock.dt)#this is not parallel
 
559
                
 
560
                if len(self.ind_bctype_0)>0:
 
561
                        self.resetPB_type0.prepared_call((len(self.ind_bctype_0), neuron.BPcount), self.P_gpu, self.B_gpu, self.v_gpu, self.ind_bctype_0_gpu,
 
562
                                                                                numpy.int32(neuron.BPcount))
 
563
                        
 
564
                end = time()
 
565
                self.timeFillHost.append(self.timeFillHost[-1] + end - start)
 
566
                
 
567
                #------------------------------------------------------solve PV=B-----------------------------------
 
568
                
 
569
                cuda.memcpy_dtoh(self.P,self.P_gpu)
 
570
                cuda.memcpy_dtoh(self.B,self.B_gpu)
 
571
                
 
572
                start = time()
 
573
                self.solution_bp = solve(self.P,self.B) # better : do it on the GPU, use a sparse solver.
 
574
                end = time()
 
575
                self.timeSolveHost.append(self.timeSolveHost[-1] + end - start)
 
576
                
 
577
                cuda.memcpy_htod(self.v_bp_gpu, self.solution_bp)
 
578
                
 
579
                #-------------------------------------------------------update v-------------------------------------
 
580
                start = time()
 
581
                self.finalize_v_gpu()
 
582
                end = time()
 
583
                self.timeFin.append(self.timeFin[-1] + end - start)
 
584
                
 
585
                cuda.memcpy_dtoh(self.neuron.v,self.v_gpu)
 
586
                
 
587
                endHost = time()
 
588
                self.timeHost.append(self.timeHost[-1] + endHost - startHost)
 
589
                
 
590
                self.first_step = False
 
591
        
 
592
        def solve_branchpoints(self):
 
593
                BPcount = self.neuron.BPcount
 
594
                
 
595
                code ="""
 
596
            // where to perform the computation
 
597
                        typedef cusp::device_memory MemorySpace;
 
598
 
 
599
                        // which floating point type to use
 
600
                        typedef double ValueType;
 
601
                        
 
602
                        // create an empty sparse matrix structure (coo format)
 
603
                cusp::coo_matrix<int, ValueType, MemorySpace> P;
 
604
                        
 
605
                        // allocate storage for solution (x) and right hand side (b)
 
606
                cusp::array1d<ValueType, MemorySpace> x(P.num_rows, 0);
 
607
                cusp::array1d<ValueType, MemorySpace> B(P.num_rows, 1);
 
608
                        
 
609
                // fill P and B
 
610
                        
 
611
                // set stopping criteria:
 
612
                //  iteration_limit    = 100
 
613
                //  relative_tolerance = 1e-3
 
614
                cusp::default_monitor<ValueType> monitor(B, 100, 1e-3);
 
615
 
 
616
                // set preconditioner (identity)
 
617
                cusp::identity_operator<ValueType, MemorySpace> M(P.num_rows, P.num_rows);
 
618
 
 
619
                // solve the linear system P * x = B with the BiConjugate Gradient Stabilized method
 
620
                cusp::krylov::bicgstab(P, x, B, monitor, M);
 
621
                
 
622
        """
 
623
                support_code=''
 
624
                includepath = ["/usr/local/cuda/include/"]
 
625
                libpath = ['/usr/local/cuda/lib/','/usr/lib/']
 
626
                
 
627
                weave.inline(code,['BPcount'],support_code=support_code,headers = ["<cusp/coo_matrix.h>","<cusp/hyb_matrix.h>","<cusp/krylov/bicgstab.h>","<cuda_runtime_api.h>","<cuda.h>"],
 
628
                                        include_dirs=includepath, library_dirs=libpath, libraries=["cusp"], runtime_library_dirs=libpath, compiler="gcc")
 
629
                
 
630
        
 
631
        
 
632
        def calc(self,var,res_gpu): #compute the value of neuron.var and store it in res_gpu
 
633
                self._state_updater.calc(var,res_gpu)
 
634
                
 
635
        def update_ab_base_gpu(self):
 
636
                self.ab1_base[:-1] = (- self.neuron.Cm[:-1] / self.neuron.clock.dt * second - self.Aminus[:-1] - self.Aplus[1:])
 
637
                self.ab1_base[-1] = (- self.neuron.Cm[-1] / self.neuron.clock.dt * second - self.Aminus[-1])
 
638
                cuda.memcpy_htod(self.ab1_base_gpu,self.ab1_base)
 
639
                
 
640
        def update_ab_gtot_gpu(self):
 
641
                n = len(self.neuron)
 
642
                self.updateAB_gtot.prepared_call((n,1), self.ab1_gpu, self.ab1_base_gpu, self.gtot_gpu)
 
643
                
 
644
        def update_bd_gpu(self):
 
645
                n = len(self.neuron)
 
646
                dt = self.neuron.clock.dt
 
647
                
 
648
                self.updateBD.prepared_call((n,1), self.b_gpu, self.Cm_gpu, dt, self.v_gpu, self.I0_gpu)
 
649
                
 
650
        def calculate_vd_vL_vR_gpu(self):
 
651
                
 
652
                m = len(self.neuron)
 
653
                
 
654
                ab0_gpu_ptr = self.ab0_gpu_ptr
 
655
                ab1_gpu_ptr = self.ab1_gpu_ptr
 
656
                ab2_gpu_ptr = self.ab2_gpu_ptr
 
657
                bL_gpu_ptr = self.bL_gpu_ptr
 
658
                bR_gpu_ptr = self.bR_gpu_ptr
 
659
                b_gpu_ptr = self.b_gpu_ptr
 
660
                
 
661
                code ="""
 
662
            cusparseHandle_t handle = 0;
 
663
            cusparseCreate(&handle);
 
664
            
 
665
            double *bL_gpu = (double *)bL_gpu_ptr;
 
666
            double *bR_gpu = (double *)bR_gpu_ptr;
 
667
            double *b_gpu = (double *)b_gpu_ptr;
 
668
            double *ab0_gpu = (double *)ab0_gpu_ptr;
 
669
            double *ab1_gpu = (double *)ab1_gpu_ptr;
 
670
            double *ab2_gpu = (double *)ab2_gpu_ptr;
 
671
            
 
672
            //bd is up to date in b_gpu
 
673
            cudaMemcpy(b_gpu + m, bL_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
 
674
            cudaMemcpy(b_gpu + 2*m, bR_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
 
675
            
 
676
            cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 3, m);
 
677
            //now b_gpu contains vd, vL, vR
 
678
            cudaDeviceSynchronize();
 
679
            
 
680
            cusparseDestroy(handle);
 
681
        """
 
682
                support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
 
683
                includepath = ["/usr/local/cuda/include/"]
 
684
                libpath = ['/usr/local/cuda/lib/','/usr/lib/']
 
685
                
 
686
                
 
687
                weave.inline(code,['ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','bL_gpu_ptr','bR_gpu_ptr','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
 
688
                                        include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
 
689
        
 
690
        def calculate_vd_gpu(self):
 
691
                
 
692
                m = len(self.neuron)
 
693
                
 
694
                ab0_gpu_ptr = self.ab0_gpu_ptr
 
695
                ab1_gpu_ptr = self.ab1_gpu_ptr
 
696
                ab2_gpu_ptr = self.ab2_gpu_ptr
 
697
                b_gpu_ptr = self.b_gpu_ptr
 
698
                
 
699
                code ="""
 
700
            cusparseHandle_t handle = 0;
 
701
            cusparseCreate(&handle);
 
702
            
 
703
            double *b_gpu = (double *)b_gpu_ptr;
 
704
            double *ab0_gpu = (double *)ab0_gpu_ptr;
 
705
            double *ab1_gpu = (double *)ab1_gpu_ptr;
 
706
            double *ab2_gpu = (double *)ab2_gpu_ptr;
 
707
            
 
708
            //bd is up to date in b_gpu
 
709
            
 
710
            cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 1, m);
 
711
            //now b_gpu contains vd, vL, vR
 
712
            cudaDeviceSynchronize();
 
713
            
 
714
            cusparseDestroy(handle);
 
715
        """
 
716
                support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
 
717
                includepath = ["/usr/local/cuda/include/"]
 
718
                libpath = ['/usr/local/cuda/lib/','/usr/lib/']
 
719
                
 
720
                
 
721
                weave.inline(code,['ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
 
722
                                        include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
 
723
                        
 
724
        def calculate_vd_vL_vR_gpu_branches(self):
 
725
                
 
726
                m = len(self.neuron)
 
727
                systemsCount = len(self.i_list_bis)
 
728
                
 
729
                i_list = numpy.array(self.i_list_bis)
 
730
                j_list = numpy.array(self.j_list_bis)
 
731
                
 
732
                ab0_gpu_ptr = self.ab0_gpu_ptr
 
733
                ab1_gpu_ptr = self.ab1_gpu_ptr
 
734
                ab2_gpu_ptr = self.ab2_gpu_ptr
 
735
                bL_gpu_ptr = self.bL_gpu_ptr
 
736
                bR_gpu_ptr = self.bR_gpu_ptr
 
737
                b_gpu_ptr = self.b_gpu_ptr
 
738
                
 
739
                code ="""
 
740
            cusparseHandle_t handle = 0;
 
741
            cusparseCreate(&handle);
 
742
            
 
743
            double *bL_gpu = (double *)bL_gpu_ptr;
 
744
            double *bR_gpu = (double *)bR_gpu_ptr;
 
745
            
 
746
            double *b_gpu = (double *)b_gpu_ptr;
 
747
            
 
748
            double *ab0_gpu = (double *)ab0_gpu_ptr;
 
749
            double *ab1_gpu = (double *)ab1_gpu_ptr;
 
750
            double *ab2_gpu = (double *)ab2_gpu_ptr;
 
751
            
 
752
            //bd is up to date in b_gpu
 
753
            cudaMemcpy(b_gpu + m, bL_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
 
754
            cudaMemcpy(b_gpu + 2*m, bR_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
 
755
            
 
756
            int i;
 
757
            int j;
 
758
            int n;
 
759
            
 
760
            for(int idx=0;idx<systemsCount;idx++){
 
761
                i = i_list[idx];
 
762
                j = j_list[idx];
 
763
                n = j-i+1;
 
764
                cusparseDgtsvStridedBatch(handle, n , ab2_gpu+i, ab1_gpu+i, ab0_gpu+i, b_gpu+i, 3, m);
 
765
            }
 
766
            
 
767
            //cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 3, m);
 
768
            cudaDeviceSynchronize();
 
769
            
 
770
            cusparseDestroy(handle);
 
771
        """
 
772
                support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
 
773
                includepath = ["/usr/local/cuda/include/"]
 
774
                libpath = ['/usr/local/cuda/lib/','/usr/lib/']
 
775
                
 
776
                
 
777
                weave.inline(code,['i_list','j_list','ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','bL_gpu_ptr','bR_gpu_ptr','systemsCount','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
 
778
                                        include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
 
779
                
 
780
        
 
781
        def finalize_v_gpu(self):
 
782
                n = len(self.neuron)
 
783
                
 
784
                self.finalizeFun.prepared_call((n,1), self.v_gpu, self.v_bp_gpu, self.ante_gpu, self.post_gpu, self.b_gpu, numpy.int32(len(self.neuron)))
 
785
                self.finalizeFunBis.prepared_call((self.neuron.BPcount,1), self.v_gpu, self.v_bp_gpu, self.GPU_data_int)
 
786
                
 
787
                
 
788
        def had_variation(self):
 
789
                return self._state_updater.had_variation(self.neuron.clock.dt);
 
790
        
 
791
        
 
792
        def __call__(self, neuron):
 
793
                '''
 
794
                Updates the state variables.
 
795
                '''
 
796
                dt_max = 0.1 * ms
 
797
                dt_min = 0.01 * ms
 
798
                
 
799
                if not self._isprepared:
 
800
                        self.prepare()
 
801
                        self._isprepared=True
 
802
                        print "state updater prepared"
 
803
                self.callcount+=1
 
804
                print self.callcount
 
805
                #Update I,V
 
806
                startUpdater = time()
 
807
                if neuron.changed:
 
808
                        self._state_updater.changed = True
 
809
                self._state_updater(neuron)
 
810
                endUpdater = time()
 
811
                self.timeUpdater.append(self.timeUpdater[-1] + endUpdater-startUpdater)
 
812
                self.step(neuron)
 
813
                total_time = self.timeUpdater[-1]+self.timeDevice[-1]+self.timeHost[-1]
 
814
                print " tot:",total_time," U:",self.timeUpdater[-1]," D:",self.timeDevice[-1]," DU:",self.timeDeviceU[-1]," DT:",self.timeDeviceT[-1]-self.timeDeviceT[1]," H:",self.timeHost[-1]," SH:",self.timeSolveHost[-1]," FH:",self.timeFillHost[-1]," Fin:",self.timeFin[-1]
 
815
        
 
816
        def __len__(self):
 
817
                '''
 
818
                Number of state variables
 
819
                '''
 
820
                return len(self.eqs)