1
import pycuda.driver as cuda
3
from pycuda.compiler import SourceModule
5
from morphology import Morphology
6
from brian.stdunits import *
7
from brian.units import *
8
from brian.reset import NoReset
9
from brian.stateupdater import StateUpdater
10
from gpustateupdater import *
11
from brian.inspection import *
12
from brian.optimiser import *
13
from itertools import count
14
from brian.neurongroup import NeuronGroup
15
from scipy.linalg import solve_banded
16
from numpy import zeros, ones, isscalar, diag_indices
17
from numpy.linalg import solve
18
from brian.clock import guess_clock
19
from brian.equations import Equations
22
from math import ceil, log
23
from scipy import weave
31
warnings.warn('sympy not installed: some features in SpatialNeuron will not be available')
34
__all__ = ['LinearSpatialStateUpdater']
36
class LinearSpatialStateUpdater(StateUpdater):
38
State updater for compartmental models.
41
def __init__(self, neuron, clock=None):
42
self.eqs = neuron._eqs
44
self._isprepared = False
45
self.first_step = True
46
self._state_updater=neuron._state_updater # to update the currents
47
self.first_test_gtot=True
51
def prepare_branch(self, morphology, mid_diameter,ante=0):
53
1) fill neuron.branches and neuron.index with information about the morphology of the neuron
54
2) change some wrong values in Aplus and Aminus. Indeed these were correct only if the neuron is linear.
55
Knowledge of the morphology gives correct values
56
3) fill neuron.bc (boundary conditions)
58
branch = morphology.branch()
60
j= i + len(branch) - 2
62
self.neuron.index[i:endpoint+1] = self.neuron.BPcount
65
for x in (morphology.children):#parent of segment n isnt always n-1 at branch points. We need to change Aplus and Aminus
66
#gc = 2 * msiemens/cm**2
67
startpoint = x._origin
68
mid_diameter[startpoint] = .5*(self.neuron.diameter[endpoint]+self.neuron.diameter[startpoint])
69
self.Aminus[startpoint]=mid_diameter[startpoint]**2/(4*self.neuron.diameter[startpoint]*self.neuron.length[startpoint]**2*self.neuron.Ri)
71
self.Aplus[startpoint]=mid_diameter[startpoint]**2/(4*self.neuron.diameter[endpoint]*self.neuron.length[endpoint]**2*self.neuron.Ri)
73
#self.Aplus[startpoint]=gc
74
#self.Aminus[startpoint]=gc
80
pointType = self.neuron.bc[endpoint]
81
hasChild = (children_number>0)
82
if (not hasChild) and (pointType == 1):
83
self.neuron.bc[endpoint] = self.neuron.bc_type
85
pointType = self.neuron.bc[endpoint]
86
index_ante = self.neuron.index[ante]
87
self.neuron.branches.append((i,j,endpoint,ante,index_ante,pointType))
88
self.neuron.BPcount += 1
90
if (j-i+2)>1: #j-i+2 = len(branch)
91
self.neuron.long_branches_count +=1
93
self.ab[0,i:j]= self.Aplus[i+1:j+1]
94
self.ab0[i:j] = self.Aplus[i+1:j+1]
95
self.ab[2,i+1:j+1]= self.Aminus[i+1:j+1]
96
self.ab2[i+1:j+1] = self.Aminus[i+1:j+1]
100
self.bL[i] = (- VL0 * self.Aminus[i])
104
self.bR[j] = (- VR0 * self.Aplus[j+1])
106
for x in (morphology.children):
107
self.prepare_branch(x,mid_diameter,endpoint)
111
From Hines 1984 paper, discrete formula is:
112
A_plus*V(i+1)-(A_plus+A_minus)*V(i)+A_minus*V(i-1)=Cm/dt*(V(i,t+dt)-V(i,t))+gtot(i)*V(i)-I0(i)
117
This gives the following tridiagonal system:
118
A_plus*V(i+1)-(Cm/dt+gtot(i)+A_plus+A_minus)*V(i)+A_minus*V(i-1)=-Cm/dt*V(i,t)-I0(i)
120
Boundaries, one simple possibility (sealed ends):
121
-(Cm/dt+gtot(n)+A_minus)*V(n)+A_minus*V(n-1)=-Cm/dt*V(n,t)-I0(n)
122
A_plus*V(1)-(Cm/dt+gtot(0)+A_plus)*V(0)=-Cm/dt*V(0,t)-I0(0)
124
mid_diameter = zeros(len(self.neuron)) # mid(i) : (i-1) <-> i
125
mid_diameter[1:] = .5*(self.neuron.diameter[:-1]+self.neuron.diameter[1:])
127
self.Aplus = zeros(len(self.neuron)) # A+ i -> j = Aplus(j)
128
self.Aminus = zeros(len(self.neuron)) # A- i <- j = Aminus(j)
129
self.Aplus[1]= mid_diameter[1]**2/(4*self.neuron.diameter[1]*self.neuron.length[1]**2*self.neuron.Ri)
130
self.Aplus[2:]=mid_diameter[2:]**2/(4*self.neuron.diameter[1:-1]*self.neuron.length[1:-1]**2*self.neuron.Ri)
131
self.Aminus[1:]=mid_diameter[1:]**2/(4*self.neuron.diameter[1:]*self.neuron.length[1:]**2*self.neuron.Ri)
133
self.neuron.index = zeros(len(self.neuron),int) # gives the index of the branch containing the current compartment
134
self.neuron.branches = [] # (i,j,bp,ante,ante_index,pointType)
135
# i is the first compartment
136
# bp is the last, a branch point
137
# j is the end of the "inner branch". j = bp-1
138
# ante is the branch point to which i is connected
140
self.neuron.BPcount = 0 # number of branch points (or branches). = len(self.neuron.branches)
141
self.neuron.long_branches_count = 0 # number of branches with len(branch) > 1
143
#self.vL = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
144
#self.vR = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
145
#self.d = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
147
self.bL = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
148
self.bR = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
149
#self.bd = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
150
self.ab = zeros((3,len(self.neuron)))
151
self.ab0 = zeros(len(self.neuron))
152
self.ab1 = cuda.pagelocked_zeros((len(self.neuron)),numpy.float64)
153
self.ab2 = zeros(len(self.neuron))
154
self.ab1_base = zeros(len(self.neuron))
155
#self.res = cuda.pagelocked_zeros((3 * len(self.neuron)),numpy.float64)
157
self.mTrunc = 0 # used to truncate vL and vR
158
self.delta_list = zeros(len(self.neuron)) #used to find mTrunc
160
# prepare_branch : fill neuron.index, neuron.branches, changes Aplus & Aminus
161
self.prepare_branch(self.neuron.morphology, mid_diameter,0)
163
# linear system P V = B used to deal with the voltage at branch points and take boundary conditions into account.
164
self.P = zeros((self.neuron.BPcount,self.neuron.BPcount))
165
self.B = zeros(self.neuron.BPcount)
166
self.solution_bp = zeros(self.neuron.BPcount)
168
self.gtot = zeros(len(self.neuron))
169
self.I0 = zeros(len(self.neuron))
176
self.pointType_list = []
177
self.pointTypeAnte_list = []
178
self.index_ante_list0 = []
179
self.index_ante_list1 = []
180
self.index_ante_list2 = []
183
self.ante_list_idx = []
184
self.post_list_idx = []
187
temp = zeros(self.neuron.BPcount)
189
self.ind_bctype_0 = []
190
for index,(i,j,bp,ante,index_ante,pointType) in enumerate(self.neuron.branches) :
191
self.i_list.append(i)
192
self.j_list.append(j)
194
self.i_list_bis.append(i)
199
self.j_list_bis.append(j)
203
self.bp_list.append(bp)
204
self.pointType_list.append(max(1,pointType))
205
self.pointTypeAnte_list.append(max(1,self.neuron.bc[ante]))
206
temp[index] = index_ante
207
self.id.append(index)
209
self.test_list.append(1)
211
self.test_list.append(0)
212
for x in xrange(j-i+2):
213
self.ante_list.append(ante)
214
self.post_list.append(bp)
215
self.ante_list_idx.append(index_ante)
216
self.post_list_idx.append(index)
217
if index_ante == 0 and index != 0:
218
self.ind0.append(index)
220
self.ind_bctype_0.append(bp)
222
self.ante_arr = numpy.array(self.ante_list_idx)
223
self.post_arr = numpy.array(self.post_list_idx)
225
self.index_ante_list1, self.ind1 = numpy.unique(temp,return_index=True)
226
self.ind1 = numpy.sort(self.ind1)
227
self.index_ante_list1 = temp[self.ind1]
228
self.index_ante_list1 = list(self.index_ante_list1)
230
for x in xrange(self.neuron.BPcount):
232
self.ind2 = numpy.delete(self.ind2,self.ind1,None)
233
self.ind2 = numpy.setdiff1d(self.ind2, self.ind0, assume_unique=True)
234
self.index_ante_list2 = temp[self.ind2]
235
self.index_ante_list2 = list(self.index_ante_list2)
237
self.index_ante_list = list(temp)
238
self.Aminus_bp = self.Aminus[self.bp_list]
239
self.Aminus_bp [:] *= self.pointType_list[:]
240
self.Aplus_i = self.Aplus[self.i_list]
241
self.Aplus_i[:] *= self.pointTypeAnte_list[:]
242
#--------------------------------------------------------GPU------------------------
245
mod = SourceModule("""
246
__global__ void updateAB_gtot(double *ab1, double *ab1_base, double *gtot)
248
int idx = threadIdx.x + blockIdx.x * blockDim.x;
250
ab1[idx] = ab1_base[idx] - gtot[idx];
251
ab1[idx+gridDim.x] = ab1_base[idx] - gtot[idx];
252
ab1[idx+2*gridDim.x] = ab1_base[idx] - gtot[idx];
255
__global__ void updateBD(double *bd, double *Cm, double dt,double *v, double *I0)
257
int idx = threadIdx.x + blockIdx.x * blockDim.x;
259
bd[idx] = - Cm[idx] / dt * v[idx] - I0[idx];
262
__global__ void finalizeFun(double *v, double *v_bp, int *ante,int *post, double *b, int m)
264
int idx = threadIdx.x + blockIdx.x * blockDim.x;
265
int idx_a = ante[idx];
266
int idx_p = post[idx];
268
v[idx] = b[idx + m] * v_bp[idx_a] + b[idx + 2*m] * v_bp[idx_p] + b[idx]; // vL, vR, d
271
__global__ void finalizeFunBis(double *v, double *v_bp, int *GPU_data_int)
273
int idx = threadIdx.x + blockIdx.x * blockDim.x;
274
int bp = GPU_data_int[4 * idx + 3];
278
__global__ void initPB(double *P, double *B, int BPcount)
280
int idx = threadIdx.x + blockIdx.x * blockDim.x;
281
int idy = threadIdx.x + blockIdx.y * blockDim.y;
283
P[idx + idy * BPcount] = 0.0;
287
//GPU_data_int is : ante_list, i_list, j_list, bp_list
288
//GPU_data_double is : test_list, Aplus, Aminus
289
__global__ void fillPB(double *P, double *B, double *b, double *Cm_l, double *gtot_l, int *GPU_data_int,
290
double *GPU_data_double, double *I0_l, double *v_l, int BPcount, double dt, int m)
292
int idx = threadIdx.x + blockIdx.x * blockDim.x;
293
int idx_ante = GPU_data_int[4 * idx];
294
int i = GPU_data_int[4 * idx + 1];
295
int j = GPU_data_int[4 * idx + 2];
296
int bp = GPU_data_int[4 * idx + 3];
297
double test = GPU_data_double[3 * idx];
298
double Aplus = GPU_data_double[3 * idx + 1];
299
double Aminus = GPU_data_double[3 * idx + 2];
300
double Cm = Cm_l[bp];
301
double gtot = gtot_l[bp];
302
double I0 = I0_l[bp];
303
double v_bp = v_l[bp];
304
double vLright = b[j + m] * test;
305
double vRleft = b[i + 2*m] * test;
306
double vRright = b[j + 2*m] * test;
307
double dright = b[j] * test;
309
B[idx] += -Cm/dt * v_bp - I0 -Aminus * dright;
310
P[idx * BPcount + idx] += -Cm/dt - gtot + Aminus * (vRright - 1.0);
311
P[idx * BPcount + idx_ante] += Aminus * vLright;
312
P[idx_ante * BPcount + idx] += Aplus * vRleft;
315
__global__ void fillPB_bis(double *P, double *B, double *b, int *GPU_data_int, double *GPU_data_double,
316
int BPcount, int *indices, int m)
318
int idx_temp = threadIdx.x + blockIdx.x * blockDim.x;
319
int idx = indices[idx_temp];
320
int idx_ante = GPU_data_int[4 * idx];
321
int i = GPU_data_int[4 * idx + 1];
322
int test = GPU_data_double[3 * idx];
323
double Aplus = GPU_data_double[3 * idx + 1];
324
double vLleft = b[i + m] * test;
325
double dleft = b[i] * test;
327
B[idx_ante] += - Aplus * dleft;
328
P[idx_ante * (BPcount + 1)] += Aplus * (vLleft - 1.0);
331
__global__ void badFillPB_0(double *P, double *B, double *b, int *GPU_data_int, double *GPU_data_double,
332
int *indices, double *Cm_l, double *gtot_l, double *I0_l, double *v_l, int len_indices, int m, double dt)
335
double gtot = gtot_l[0];
339
B[0] = - Cm/dt * v_0 - I0;
340
P[0] = - Cm/dt - gtot;
348
for (int idx_temp=0;idx_temp<len_indices;idx_temp++)
350
idx = indices[idx_temp];
351
i = GPU_data_int[4 * idx + 1];
352
test = GPU_data_double[3 * idx];
353
Aplus = GPU_data_double[3 * idx + 1];
354
vLleft = b[i + m] * test;
357
P[0] += Aplus * (vLleft - 1.0);
358
B[0] += - Aplus * dleft;
362
__global__ void resetPB_type0(double *P, double *B, double *v, int *indices,int BPcount)
364
int idx = indices[threadIdx.x] + blockIdx.x * blockDim.x;
365
int idy = threadIdx.x + blockIdx.y * blockDim.y;
367
P[idx + idy * BPcount] = 0.0;
368
P[idx + idx * BPcount] = 1.0;
374
self.updateAB_gtot = mod.get_function("updateAB_gtot")
375
self.updateAB_gtot.prepare(["P","P",'P'],block=(1,1,1))
377
self.updateBD = mod.get_function("updateBD")
378
self.updateBD.prepare(["P","P",'d','P','P'],block=(1,1,1))
380
self.finalizeFun = mod.get_function("finalizeFun")
381
self.finalizeFun.prepare(['P','P','P','P','P','i'],block=(1,1,1))
383
self.finalizeFunBis = mod.get_function("finalizeFunBis")
384
self.finalizeFunBis.prepare(['P','P','P'],block=(1,1,1))
386
self.initPB = mod.get_function("initPB")
387
self.initPB.prepare(['P',"P",'i'],block=(1,1,1))
389
self.fillPB = mod.get_function("fillPB")
390
self.fillPB.prepare(["P","P",'P',"P",'P','P','P','P','P','i','d','i'],block=(1,1,1))
392
self.fillPB_bis = mod.get_function("fillPB_bis")
393
self.fillPB_bis.prepare(["P","P",'P',"P",'P','i','P','i'],block=(1,1,1))
395
self.badFillPB_0 = mod.get_function("badFillPB_0")
396
self.badFillPB_0.prepare(["P","P",'P',"P",'P','P','P','P','P',"P",'i','i','d'],block=(1,1,1))
398
self.resetPB_type0 = mod.get_function("resetPB_type0")
399
self.resetPB_type0.prepare(['P',"P",'P','P','i'],block=(1,1,1))
401
dtype = numpy.dtype(numpy.float64)
402
int_type = numpy.dtype(numpy.int32)
404
self.P_gpu = cuda.mem_alloc(self.P.size * dtype.itemsize)
405
self.B_gpu = cuda.mem_alloc(self.B.size * dtype.itemsize)
407
GPU_data_int = zeros((self.neuron.BPcount,4))
408
GPU_data_double = zeros((self.neuron.BPcount,3))
409
GPU_data_int[:,0] = self.index_ante_list[:]
410
GPU_data_int[:,1] = self.i_list[:]
411
GPU_data_int[:,2] = self.j_list[:]
412
GPU_data_int[:,3] = self.bp_list[:]
413
GPU_data_double[:,0] = self.test_list[:]
414
GPU_data_double[:,1] = self.Aplus_i[:]
415
GPU_data_double[:,2] = self.Aminus_bp[:]
416
self.GPU_data_int = cuda.mem_alloc(4 * self.neuron.BPcount * int_type.itemsize)
417
self.GPU_data_double = cuda.mem_alloc(3 * self.neuron.BPcount * dtype.itemsize)
418
cuda.memcpy_htod(self.GPU_data_int,GPU_data_int.astype(numpy.int32))
419
cuda.memcpy_htod(self.GPU_data_double,GPU_data_double.astype(numpy.float64))
421
self.ind0_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
422
cuda.memcpy_htod(self.ind0_gpu,numpy.array(self.ind0,numpy.int32))
424
self.ind1_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
425
cuda.memcpy_htod(self.ind1_gpu,numpy.array(self.ind1,numpy.int32))
427
self.ind2_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
428
cuda.memcpy_htod(self.ind2_gpu,numpy.array(self.ind2,numpy.int32))
430
self.ind_bctype_0_gpu = cuda.mem_alloc(self.neuron.BPcount * int_type.itemsize)
431
cuda.memcpy_htod(self.ind_bctype_0_gpu,numpy.array(self.ind_bctype_0,numpy.int32))
433
self.ab1_base_gpu = cuda.mem_alloc(n * dtype.itemsize)
435
self.ab1_gpu = cuda.mem_alloc(3 * n * dtype.itemsize)
436
self.ab1_gpu_ptr = int(self.ab1_gpu)
438
self.Cm_gpu = cuda.mem_alloc(n * dtype.itemsize)
439
cuda.memcpy_htod(self.Cm_gpu,self.neuron.Cm.astype(numpy.float64))
441
self.gtot_gpu = cuda.mem_alloc(n * dtype.itemsize)
443
self.I0_gpu = cuda.mem_alloc(n * dtype.itemsize)
445
self.v_gpu = cuda.mem_alloc(n * dtype.itemsize)
446
cuda.memcpy_htod(self.v_gpu,self.neuron.v)
448
ab0 = zeros(3*n).astype(numpy.float64)
449
ab0[:n] = self.ab0[:]
450
ab0[n:2*n] = self.ab0[:]
451
ab0[2*n:3*n] = self.ab0[:]
453
ab2 = zeros(3*n).astype(numpy.float64)
454
ab2[:n] = self.ab2[:]
455
ab2[n:2*n] = self.ab2[:]
456
ab2[2*n:3*n] = self.ab2[:]
458
dtype = numpy.dtype(numpy.float64)
460
self.ab0_gpu = cuda.mem_alloc(ab0.size * dtype.itemsize)
461
self.ab0_gpu_ptr = int(self.ab0_gpu)
462
self.ab2_gpu = cuda.mem_alloc(ab2.size * dtype.itemsize)
463
self.ab2_gpu_ptr = int(self.ab2_gpu)
465
self.bL_gpu = cuda.mem_alloc(self.bL.size * dtype.itemsize)
466
self.bL_gpu_ptr = int(self.bL_gpu)
467
self.bR_gpu = cuda.mem_alloc(self.bR.size * dtype.itemsize)
468
self.bR_gpu_ptr = int(self.bR_gpu)
470
self.b_gpu = cuda.mem_alloc(3 * self.bR.size * dtype.itemsize)
471
self.b_gpu_ptr = int(self.b_gpu) # bd + bL + bR -> vd + vL + vR
473
cuda.memcpy_htod(self.ab0_gpu, ab0)
474
cuda.memcpy_htod(self.ab2_gpu, ab2)
475
cuda.memcpy_htod(self.bL_gpu, self.bL)
476
cuda.memcpy_htod(self.bR_gpu, self.bR)
478
self.ante_gpu = cuda.mem_alloc(self.ante_arr.size * self.ante_arr.dtype.itemsize)
479
self.post_gpu = cuda.mem_alloc(self.ante_arr.size * self.ante_arr.dtype.itemsize)
480
self.v_old_gpu = cuda.mem_alloc(self.neuron.v.size * dtype.itemsize)
482
cuda.memcpy_htod(self.ante_gpu,self.ante_arr)
483
cuda.memcpy_htod(self.post_gpu,self.post_arr)
486
#----------------------------------------------------------------------------------
488
self.v_branchpoints = zeros(self.neuron.BPcount)
489
self.v_bp_gpu = cuda.mem_alloc(self.v_branchpoints.size * dtype.itemsize)
491
self.timeDevice = [0]
492
self.timeDeviceU = [0]
493
self.timeDeviceT = [0]
495
self.timeUpdater = [0]
496
self.timeSolveHost = [0]
497
self.timeFillHost = [0]
500
def step(self, neuron):
505
if self.first_test_gtot:
506
self.first_test_gtot=False
507
neuron._gtot = ones(len(neuron)) * neuron._gtot
509
self.gtot = neuron._gtot
510
cuda.memcpy_htod(self.gtot_gpu,self.gtot)
511
self.update_ab_base_gpu()
512
self.update_ab_gtot_gpu()
514
if self.neuron.changed :
516
cuda.memcpy_htod(self.I0_gpu,self.I0)
521
self.timeDeviceU.append(self.timeDeviceU[-1] + end - start)
526
self.calculate_vd_vL_vR_gpu()
528
self.calculate_vd_gpu()
531
self.timeDeviceT.append(self.timeDeviceT[-1] + end - start)
533
self.timeDevice.append(self.timeDevice[-1] + endDevice-startDevice)
537
#-----------------------------------------------------fill P and B----------------------------------
538
# 1) initPB reset P and B. every value is 0.
539
# 2) fillPB calculates the coefficients of P and B where there is no lock issue.
540
# 3) fillPB_bis does the same on two exclusive lists of indexes in P and B, ind1 and ind2 to avoid lock issues
541
# 3) badFillPB_0 ajusts the value in the particular case of 0. this one is not parallel but not big.
544
self.initPB.prepared_call((neuron.BPcount, neuron.BPcount),self.P_gpu, self.B_gpu, numpy.int32(neuron.BPcount))
546
self.fillPB.prepared_call((neuron.BPcount,1),self.P_gpu, self.B_gpu, self.b_gpu,
547
self.Cm_gpu, self.gtot_gpu, self.GPU_data_int, self.GPU_data_double, self.I0_gpu, self.v_gpu,
548
numpy.int32(neuron.BPcount), self.neuron.clock.dt, numpy.int32(len(neuron)))
550
self.fillPB_bis.prepared_call((len(self.ind1),1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
551
numpy.int32(neuron.BPcount), self.ind1_gpu, numpy.int32(len(neuron)))
553
self.fillPB_bis.prepared_call((len(self.ind2),1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
554
numpy.int32(neuron.BPcount), self.ind2_gpu, numpy.int32(len(neuron)))
556
self.badFillPB_0.prepared_call((1,1),self.P_gpu, self.B_gpu, self.b_gpu, self.GPU_data_int, self.GPU_data_double,
557
self.ind0_gpu, self.Cm_gpu, self.gtot_gpu, self.I0_gpu, self.v_gpu, numpy.int32(len(self.ind0)),
558
numpy.int32(len(neuron)),neuron.clock.dt)#this is not parallel
560
if len(self.ind_bctype_0)>0:
561
self.resetPB_type0.prepared_call((len(self.ind_bctype_0), neuron.BPcount), self.P_gpu, self.B_gpu, self.v_gpu, self.ind_bctype_0_gpu,
562
numpy.int32(neuron.BPcount))
565
self.timeFillHost.append(self.timeFillHost[-1] + end - start)
567
#------------------------------------------------------solve PV=B-----------------------------------
569
cuda.memcpy_dtoh(self.P,self.P_gpu)
570
cuda.memcpy_dtoh(self.B,self.B_gpu)
573
self.solution_bp = solve(self.P,self.B) # better : do it on the GPU, use a sparse solver.
575
self.timeSolveHost.append(self.timeSolveHost[-1] + end - start)
577
cuda.memcpy_htod(self.v_bp_gpu, self.solution_bp)
579
#-------------------------------------------------------update v-------------------------------------
581
self.finalize_v_gpu()
583
self.timeFin.append(self.timeFin[-1] + end - start)
585
cuda.memcpy_dtoh(self.neuron.v,self.v_gpu)
588
self.timeHost.append(self.timeHost[-1] + endHost - startHost)
590
self.first_step = False
592
def solve_branchpoints(self):
593
BPcount = self.neuron.BPcount
596
// where to perform the computation
597
typedef cusp::device_memory MemorySpace;
599
// which floating point type to use
600
typedef double ValueType;
602
// create an empty sparse matrix structure (coo format)
603
cusp::coo_matrix<int, ValueType, MemorySpace> P;
605
// allocate storage for solution (x) and right hand side (b)
606
cusp::array1d<ValueType, MemorySpace> x(P.num_rows, 0);
607
cusp::array1d<ValueType, MemorySpace> B(P.num_rows, 1);
611
// set stopping criteria:
612
// iteration_limit = 100
613
// relative_tolerance = 1e-3
614
cusp::default_monitor<ValueType> monitor(B, 100, 1e-3);
616
// set preconditioner (identity)
617
cusp::identity_operator<ValueType, MemorySpace> M(P.num_rows, P.num_rows);
619
// solve the linear system P * x = B with the BiConjugate Gradient Stabilized method
620
cusp::krylov::bicgstab(P, x, B, monitor, M);
624
includepath = ["/usr/local/cuda/include/"]
625
libpath = ['/usr/local/cuda/lib/','/usr/lib/']
627
weave.inline(code,['BPcount'],support_code=support_code,headers = ["<cusp/coo_matrix.h>","<cusp/hyb_matrix.h>","<cusp/krylov/bicgstab.h>","<cuda_runtime_api.h>","<cuda.h>"],
628
include_dirs=includepath, library_dirs=libpath, libraries=["cusp"], runtime_library_dirs=libpath, compiler="gcc")
632
def calc(self,var,res_gpu): #compute the value of neuron.var and store it in res_gpu
633
self._state_updater.calc(var,res_gpu)
635
def update_ab_base_gpu(self):
636
self.ab1_base[:-1] = (- self.neuron.Cm[:-1] / self.neuron.clock.dt * second - self.Aminus[:-1] - self.Aplus[1:])
637
self.ab1_base[-1] = (- self.neuron.Cm[-1] / self.neuron.clock.dt * second - self.Aminus[-1])
638
cuda.memcpy_htod(self.ab1_base_gpu,self.ab1_base)
640
def update_ab_gtot_gpu(self):
642
self.updateAB_gtot.prepared_call((n,1), self.ab1_gpu, self.ab1_base_gpu, self.gtot_gpu)
644
def update_bd_gpu(self):
646
dt = self.neuron.clock.dt
648
self.updateBD.prepared_call((n,1), self.b_gpu, self.Cm_gpu, dt, self.v_gpu, self.I0_gpu)
650
def calculate_vd_vL_vR_gpu(self):
654
ab0_gpu_ptr = self.ab0_gpu_ptr
655
ab1_gpu_ptr = self.ab1_gpu_ptr
656
ab2_gpu_ptr = self.ab2_gpu_ptr
657
bL_gpu_ptr = self.bL_gpu_ptr
658
bR_gpu_ptr = self.bR_gpu_ptr
659
b_gpu_ptr = self.b_gpu_ptr
662
cusparseHandle_t handle = 0;
663
cusparseCreate(&handle);
665
double *bL_gpu = (double *)bL_gpu_ptr;
666
double *bR_gpu = (double *)bR_gpu_ptr;
667
double *b_gpu = (double *)b_gpu_ptr;
668
double *ab0_gpu = (double *)ab0_gpu_ptr;
669
double *ab1_gpu = (double *)ab1_gpu_ptr;
670
double *ab2_gpu = (double *)ab2_gpu_ptr;
672
//bd is up to date in b_gpu
673
cudaMemcpy(b_gpu + m, bL_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
674
cudaMemcpy(b_gpu + 2*m, bR_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
676
cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 3, m);
677
//now b_gpu contains vd, vL, vR
678
cudaDeviceSynchronize();
680
cusparseDestroy(handle);
682
support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
683
includepath = ["/usr/local/cuda/include/"]
684
libpath = ['/usr/local/cuda/lib/','/usr/lib/']
687
weave.inline(code,['ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','bL_gpu_ptr','bR_gpu_ptr','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
688
include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
690
def calculate_vd_gpu(self):
694
ab0_gpu_ptr = self.ab0_gpu_ptr
695
ab1_gpu_ptr = self.ab1_gpu_ptr
696
ab2_gpu_ptr = self.ab2_gpu_ptr
697
b_gpu_ptr = self.b_gpu_ptr
700
cusparseHandle_t handle = 0;
701
cusparseCreate(&handle);
703
double *b_gpu = (double *)b_gpu_ptr;
704
double *ab0_gpu = (double *)ab0_gpu_ptr;
705
double *ab1_gpu = (double *)ab1_gpu_ptr;
706
double *ab2_gpu = (double *)ab2_gpu_ptr;
708
//bd is up to date in b_gpu
710
cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 1, m);
711
//now b_gpu contains vd, vL, vR
712
cudaDeviceSynchronize();
714
cusparseDestroy(handle);
716
support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
717
includepath = ["/usr/local/cuda/include/"]
718
libpath = ['/usr/local/cuda/lib/','/usr/lib/']
721
weave.inline(code,['ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
722
include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
724
def calculate_vd_vL_vR_gpu_branches(self):
727
systemsCount = len(self.i_list_bis)
729
i_list = numpy.array(self.i_list_bis)
730
j_list = numpy.array(self.j_list_bis)
732
ab0_gpu_ptr = self.ab0_gpu_ptr
733
ab1_gpu_ptr = self.ab1_gpu_ptr
734
ab2_gpu_ptr = self.ab2_gpu_ptr
735
bL_gpu_ptr = self.bL_gpu_ptr
736
bR_gpu_ptr = self.bR_gpu_ptr
737
b_gpu_ptr = self.b_gpu_ptr
740
cusparseHandle_t handle = 0;
741
cusparseCreate(&handle);
743
double *bL_gpu = (double *)bL_gpu_ptr;
744
double *bR_gpu = (double *)bR_gpu_ptr;
746
double *b_gpu = (double *)b_gpu_ptr;
748
double *ab0_gpu = (double *)ab0_gpu_ptr;
749
double *ab1_gpu = (double *)ab1_gpu_ptr;
750
double *ab2_gpu = (double *)ab2_gpu_ptr;
752
//bd is up to date in b_gpu
753
cudaMemcpy(b_gpu + m, bL_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
754
cudaMemcpy(b_gpu + 2*m, bR_gpu, m*sizeof(double), cudaMemcpyDeviceToDevice);
760
for(int idx=0;idx<systemsCount;idx++){
764
cusparseDgtsvStridedBatch(handle, n , ab2_gpu+i, ab1_gpu+i, ab0_gpu+i, b_gpu+i, 3, m);
767
//cusparseDgtsvStridedBatch(handle, m, ab2_gpu, ab1_gpu, ab0_gpu, b_gpu, 3, m);
768
cudaDeviceSynchronize();
770
cusparseDestroy(handle);
772
support_code='extern "C" cusparseStatus_t cusparseDgtsvStridedBatch(cusparseHandle_t handle, int m,const double *dl, const double *d,const double *du, double *x,int batchCount, int batchStride);'
773
includepath = ["/usr/local/cuda/include/"]
774
libpath = ['/usr/local/cuda/lib/','/usr/lib/']
777
weave.inline(code,['i_list','j_list','ab0_gpu_ptr','ab1_gpu_ptr','ab2_gpu_ptr','bL_gpu_ptr','bR_gpu_ptr','systemsCount','m','b_gpu_ptr'],support_code=support_code,headers = ["<cusparse.h>","<cuda_runtime_api.h>","<cuda.h>"],
778
include_dirs=includepath, library_dirs=libpath, libraries=["cusparse"], runtime_library_dirs=libpath, compiler="gcc")
781
def finalize_v_gpu(self):
784
self.finalizeFun.prepared_call((n,1), self.v_gpu, self.v_bp_gpu, self.ante_gpu, self.post_gpu, self.b_gpu, numpy.int32(len(self.neuron)))
785
self.finalizeFunBis.prepared_call((self.neuron.BPcount,1), self.v_gpu, self.v_bp_gpu, self.GPU_data_int)
788
def had_variation(self):
789
return self._state_updater.had_variation(self.neuron.clock.dt);
792
def __call__(self, neuron):
794
Updates the state variables.
799
if not self._isprepared:
801
self._isprepared=True
802
print "state updater prepared"
806
startUpdater = time()
808
self._state_updater.changed = True
809
self._state_updater(neuron)
811
self.timeUpdater.append(self.timeUpdater[-1] + endUpdater-startUpdater)
813
total_time = self.timeUpdater[-1]+self.timeDevice[-1]+self.timeHost[-1]
814
print " tot:",total_time," U:",self.timeUpdater[-1]," D:",self.timeDevice[-1]," DU:",self.timeDeviceU[-1]," DT:",self.timeDeviceT[-1]-self.timeDeviceT[1]," H:",self.timeHost[-1]," SH:",self.timeSolveHost[-1]," FH:",self.timeFillHost[-1]," Fin:",self.timeFin[-1]
818
Number of state variables