1
# -*- coding:latin-1 -*-
2
#-----------------------------------------------------------------------------
4
# Purpose: Bayesian melding
6
# Author: Fl�vio Code�o Coelho
9
# RCS-ID: $Id: Melding.py $
13
#-----------------------------------------------------------------------------
16
import scipy.stats.kde as kde
21
from numpy.random import *
29
def model(r, p0, n=1):
32
Simulates the Population dynamic Model (PDM) Pt = rP0
34
P0 is the initial population size.
35
Example model for testing purposes.
37
Pt = zeros(n, float) # initialize the output vector
48
Draw k samples of Theta from its prior distribution, run the model with it
49
and obtain phi = M(theta). For testing purposes only.
51
#---q1theta---------------------------------------------------------------------
52
#r = genprior('uniform',(2,4), k) # Uniform [2,4]
53
#r = lhs.lhs(['r'],['Uniform'],[[2,4]],k)[0]
54
#p0 = genprior('uniform',(0,5), k)# Uniform[0,5]
55
#p0 = lhs.lhs(['p0'],['Uniform'],[[0,5]],k)[0]
56
r,p0 = lhs.lhs(['r', 'p0'],['Uniform','Uniform'],[[2,4],[0,5]],k,noCorrRestr=False)
58
#-------------------------------------------------------------------------------
60
print r.shape, p0.shape
62
phi[i] = model(r[i], p0[i])[-1] # Sets phi[i] to the last point of the simulation
67
def KDE(x, (ll, ul)=('','')):
70
performs a kernel density estimate using the R function density
71
if (ll,ul) enforce limits for the distribution.
77
rn=arange(ll,ul,(ul-ll)/512.)
78
#print x.shape,rn.shape
79
est = kde.gaussian_kde(x.ravel()).evaluate(rn)
82
#est = r('density(x,from=ll, to=ul)') #trims the density borders
86
rn=arange(ll,ul,(ul-ll)/512.)
87
est = kde.gaussian_kde(x).evaluate(rn)
88
#est = r('density(x)')
93
return {'y':est,'x':rn}
96
def Likeli(data, dist, limits,**kwargs):
98
Generates the likelihood function of data given dist.
99
limits is a tuple setting the interval of the parameter space that will
100
be used as the support for the Likelihood function.
101
returns a vector (512 elements).
103
n = len(data) # Number of data points
105
(ll,ul) = limits #limits for the parameter space
108
if dist == 'normal': # In this case, L is a function of the mean.
109
sd = std(data) #standard deviation of data
110
prec = 1/sd #precision of the data
111
res = [exp(like.Normal(data,mu,prec)) for mu in arange(ll,ul,step)] #empty list of results
112
#for mu in arange(ll,ul,step):
114
#res.append(exp(-0.5*sum(((data-mu)/sd)**2))) # Makes a list of the likelihood of every datum. Removed this constant term from formula because of dependence on n :1./((sd*sqrt(2*pi))**n)
116
lik = res/max(res) # Likelihood function
117
print max(lik), min(lik)
118
elif dist == 'exponential':
119
res = [lamb**n*exp(-lamb*sum(data)) for lamb in arange(ll,ul,step)]
120
## for lamb in arange(ll,ul,step):
121
## res.append(lamb**n*exp(-lamb*sum(data)))
122
lik = array(res)/max(array(res))
124
elif dist == 'bernoulli':
126
print "Parameter p of the bernoulli is out of range[0,1]"
127
res = [exp(like.Bernoulli(data,p)) for p in arange(ll,ul,step)]
128
#for p in arange(ll,ul,step):
129
#res.append(p**sum(data)*(1-p)**(n-sum(data)))
130
#res.append(exp(flib.bernoulli(data,p)))
131
lik = array(res)/max(array(res))
133
elif dist == 'poisson':
134
res = [exp(like.Poisson(data,lb)) for lb in arange(ll,ul,step)]
135
## for lb in arange(ll,ul,step):
136
## res.append(exp(-n*lb)*(lb*sum(data))/product(factorial(data)))
137
lik = array(res)/max(array(res))
139
elif dist == 'lognormal':
140
sd = std(data) #standard deviation of data
141
prec = 1/sd #precision of the data
142
res = [exp(like.Lognormal(data,mu,prec)) for mu in arange(ll,ul,step)]
143
lik = array(res)/max(array(res))
145
print 'Invalid distribution type. Valid distributions: normal, exponential, bernoulli and poisson'
151
calculates the factorial of a number, or a sequence of numbers.
153
l = ["<type 'array'>","<type 'list'>","<type 'tuple'>"]
154
if not str(type(numb)) in l:
159
return n * factorial(n-1)
163
res.append(factorial(i))
167
def Filt(cond, x, (ll, ul)):
169
filtering out Out-of-boundary thetas and phis.
170
for single output models.
171
ul and ll are the pre-model boundaries of phi.
172
cond is a vector over which the conditional operations will be applied.
173
x is a vector or matrix of data. matrices are filtered line by line
175
#print cond.shape, x.shape, ll, ul
178
if isinstance(x,tuple):
181
x.shape = (l,x.size/float(l))
182
print 'shape of x is', x.shape
184
print 'shape of x is', x.shape
187
#When more filtering theta
188
f = compress(less(cond,ul) & greater(cond,ll),x, axis=1)
191
f = compress(less(cond,ul) & greater(cond,ll),x)
196
def FiltM(cond,x,limits):
198
Multiple condition filtering.
199
for multiple output models
200
cond is an array of condition vectors
201
x is an array of data
202
limits is a list of tuples (ll,ul) with the length of cond
205
cnd = ones(len(cond[0]),int)
206
for i,j in zip(cond,limits):
209
#print cond.shape,cnd.shape,i.shape,ll,ul
210
cnd = cnd & less(i,ul) & greater(i,ll)
211
f = compress(cnd,x, axis=1)
216
def SIR(alpha,q2phi,limits,q2type,q1theta, phi,L, lik=[]):
218
Sampling Importance Resampling.
219
alpha = pooling weight;
220
q2phi = premodel of phi(vector or a tuple of vectors);
221
limits = limits for q2phi (tuple or list/tuple of tuples);
222
q2type = dist. type of q2phi (string or list of strings);
223
q1theta = premodel dists of thetas (tuple);
224
phi = model output (vector or tuple of vectors);
225
L = size of the resample.
226
Lik = list of likelihoods available
230
##==On Uniform Priors we have to trim the density borders========================
231
## The Density estimation with a gaussian kernel, extends beyond the limits of
232
## an uniform distribution, due to this fact, we clip the ends of the kde
233
## output in order the avoid artifacts.
234
##===============================================================================
235
np = len(q1theta) # Number of parameters(theta) in the model
236
#---for multicompartimental models-----------------------------------------------
237
multi = ["<type 'list'>","<type 'tuple'>"] #possible types of data structures of q2phi and phi
239
if not str(type(phi)) in multi:
240
(ll,ul) = limits # limits of q2phi for single compartment models
242
if str(type(q2phi)) in multi:
243
no = len(phi) #Number of output variables
247
(ll,ul) = limits[i] # limits of q2phi[i]
248
if q2type[i] == 'uniform':
249
q2pd.append(KDE(q2phi[i],(ll,ul)))
251
q2pd.append(KDE(q2phi[i]))
253
#---for single compartiment models----------------------------------------------------------------------------
255
if q2type == 'uniform':
256
q2phi = KDE(q2phi, (ll,ul)) #calculating the kernel density
261
#-------------------------------------------------------------------------------
263
#---filtering out Out-of-boundary thetas and phis-------------------------------
264
if str(type(phi)) in multi: #Multicompartimental models
265
#phi = array(phi)# Convert list/tuple of vectors in array, where each vector becomes a line of the array.
267
q1theta2 = array(q1theta) #Temporary copy to allow multiple filtering
269
phi_filt = FiltM(phi,phi,limits) #filter Phis
270
#print type(phi_filt)
271
if not phi_filt.any():
272
print "Due to bad specification of the prior distributions or of the model\nthe inference can't continue. please verify that your priors include at least\npart of the range of the output variables."
274
#Remove thetas that generate out-of-bound phis for every phi
275
q1theta_filt = FiltM(phi,q1theta2,limits)
276
q1theta2 = q1theta_filt
278
## for i in xrange(no):
279
## (ll,ul) = limits[i] # limits of q2phi[i]
280
## print 'no = %s'%no
281
## phi_filt.append(Filt(phi[i],phi[i],(ll,ul))) #filter Phis
282
## if not phi_filt[i].any():
283
## print "Due to bad specification of the prior distributions or of the model\nthe inference can't continue. please verify that your priors include at least\npart of the range of the output variables."
286
## q1theta_filt = Filt(phi[i],q1theta2,(ll,ul)) #Remove thetas that generate out-of-bound phis for every phi
287
## q1theta2 = q1theta_filt
289
phi_filt = array(phi_filt)
290
# TODO: check to see if thetas or phis go to empty due to bad priors!!!!
291
else: #Single compartment
292
phi_filt = Filt(phi,phi,(ll,ul)) #remove out-of-bound phis
293
q1theta_filt = Filt(phi,q1theta,(ll,ul)) #remove thetas that generate out-of-bound phis
294
#-------------------------------------------------------------------------------
296
#---Calculate Kernel Density of the filtered phis----------------------------------------------------------------------------
297
if no: #multicompartimental
300
(ll,ul) = limits[i] # limits of q2phi[i]
301
if q2type[i] == 'uniform':
302
q1ed.append(KDE(phi_filt[i],(ll,ul)))
304
q1ed.append(KDE(phi_filt[i]))
306
else: #Single compartment
307
if q2type == 'uniform':
308
q1est = KDE(phi_filt,(ll,ul)) # Density of of model outputs restricted to the range determined by the Priors, so that we can pool q1est and q2phi.
311
q1est = KDE(phi_filt)
313
#-------------------------------------------------------------------------------
315
##==============================================================================
316
##Now, the two priors for Phi q2phi (derived from prior information and q1est
317
##(generated by the model from the q1theta(priors on the inputs)), are pooled.
318
##The pooling is done by logarithmic pooling using alpha as a weighting factor.
319
##The higher the value of alpha the more wight is given to q1est.
320
##==============================================================================
321
#---Calculating the pooled prior of Phi------------------------------------------
322
if no: #multicompartimental
325
qtilphi.append((array(q2phi[i]['y'])**(1-alpha))*(array(q1est[i]['y'])**alpha))
326
qtilphi = array(qtilphi)
327
else: #Single compartment
328
qtilphi = (array(q2phi['y'])**(1-alpha))*(array(q1est['y'])**alpha) # Pooled prior of Phi
329
#-------------------------------------------------------------------------------
332
#---Calculating first term of the weigth expression-----------------------------
333
# TODO: Consider having a different alpha for each phi
334
if no:#multicompartimental
337
#pairwise pooling of the phis and q2phis
338
denslist.append((array(q2phi[i]['y'])/array(q1est[i]['y']))**(1-alpha))
340
firstterm = product(denslist,axis=0)
341
else:#Single compartment
342
firstterm = (array(q2phi['y'])/array(q1est['y']))**(1-alpha)
345
#---Weights---------------------------------------------------------------------
348
w = firstterm #---- without likelihoods -----#
351
prodlik = product(array(lik),axis=0)
353
#only one likelihood function
355
w = firstterm*prodlik
357
#-------------------------------------------------------------------------------
362
##========Link weights with each phi[i]=========================================
363
## The weight vector (w) to be used in the resampling of the thetas is calculated
364
## from operations on densities. Consequently,its values are associated with
365
## values on the support of Phi, not with the actual Phi[i] as output by the
366
## model. Thus, its is necessary to recover the asso-
367
## ciation between the Phi[i] (the outputs of each model run), and the weights
368
## associated with them. For that, the support for phi is divided into 512 bins
369
## (the length of the weight vector), and the filtered Phi[i] are assigned to these bins
370
## according to their value. This mapping is represented by the variable phi_bins
371
## in which each element is the bin number of the correponding element in Phi.
372
## A new weight vector(wi) is then created in which the elements of w are posi-
373
## tioned according to the position of the Phi[i] to which it corresponds. That
374
## is: w[i] = w[phi_bin[i]] repeated for each element i.
375
##==============================================================================
377
if no:#multicompartimental
382
(ll,ul) = limits[i] #limits of phi
384
bin_bound.append(arange(ll,ul,step)) # Bin boundaries of the weight vector
385
phi_bins.append(searchsorted(bin_bound[i], phi_filt[i])) # Return a vector of the bins for each phi
386
g = lambda x:w[x-1] # searchsorted returns 1 as the index for the first bin, not 0
387
phi_bins = array(phi_bins)
389
wi.append(map(g,phi_bins[i]))
390
wi = mean(wi, axis=0) #ATTENTION: Should this be averaged?
391
else: #single compartment
394
bin_bound = arange(ll,ul,step) # Bin boundaries of the weight vector
395
phi_bins = searchsorted(bin_bound, phi_filt) # Return a vector of the bins for each phi
396
g = lambda x:w[x-1] # searchsorted returns 1 as the index for the first bin, not 0
397
wi = map(g,phi_bins.ravel())
398
#-------------------------------------------------------------------------------
401
#---creating a biased die based on probabilities of w---------------------------
402
#die = cumsum(wi)#-Cumulative sum of resampling probabilities
403
#roll = uniform(die[0],die[-1],L)
406
##========Resampling q1theta=====================================================
407
## Here, the filtered q1theta are resampled according to the weight vector.
408
## L values are generated as indices to the weight vector wi(resamples) and used to resample
410
##===============================================================================
411
#sampled_is = searchsorted(die, roll)
412
#qtiltheta = transpose(array(map(h,sampled_is)))
413
#qtiltheta=zeros((np,L), Float) # Initialize the qtiltheta matrix
414
#resamples = randint(0,len(wi),(L,))# Random order of resamples
416
# A given value is going to be resampled if random() < wi
417
# A column of q1theta_filt is extracted for each value in resamples
420
print wi.size, wi.shape
422
sys.exit('Resampling weights are all zero, please check your model or data.')
424
while j < L: # Extract L samples from q1theta_filt
425
i=randint(0,wi.size)# Random position of wi and q1theta_filt
427
#print i, q1theta_filt.shape
428
q[j]=q1theta_filt[:,i]# retain the sample according with resampling prob.
430
# q is a list of arrays which is converted to an array and then transposed.
432
qtiltheta = transpose(array(q))
434
##---teste-----------------------------------------------------------------------
440
## plotmat(q1theta_filt[0])
443
## plotmat(q1theta_filt[1])
447
return (w, qtiltheta, qtilphi, q1est)
449
def plotmat(x, tit='title', b=50):
451
This funtion implements a simple 50 bin, normalized histogram using the matplotlib module.
454
P.hist(x,bins=b,normed=1)
455
P.ylabel(tit, fontsize=18)
457
def genprior(type, params, shape=[]):
459
genprior(type, params, shape)
460
The function returns a vector or a matrix containinin a sample of the specified distribution with size given by shape.
463
distlist=['uniform', 'normal', 'exponential', 'beta', 'gamma', 'chi-square', 'F', 'binomial', 'neg-binomial', 'poisson', 'multinomial']
464
if type == 'uniform':
465
prior = uniform(params[0], params[1], shape)
466
elif type == 'normal':
467
prior = normal(params[0], params[1], shape)
468
elif type == 'exponential':
469
prior = exponential(params, shape)
471
prior = beta(params[0], params[1], shape)
472
elif type == 'gamma':
473
prior = gamma(params[0], params[1], shape)
474
elif type == 'chi-square':
475
prior = chi_square(params, shape)
477
prior = F(params[0], params[1], shape)
478
elif type == 'binomial':
479
prior = binomial(params[0], params[1], shape)
480
elif type == 'neg-binomial':
481
prior = negative_binomial(params[0], params[1], shape)
482
elif type == 'poisson':
483
prior = poisson(params, shape)
484
elif type == 'multinomial':
485
prior = multinomial(params)
487
print 'Invalid distribution type.'
492
# TODO: Implement calculation of Bayes factors!
493
#-------------------------------------------------------------------------------
494
##==MAIN========================================================================
495
#-------------------------------------------------------------------------------
502
k = 20000 # Number of model runs
506
#data = [7,8,7,8,7,8,7]
507
data = normal(7.5,1,400)
508
lik = [] #initialize list of likelihoods
509
lik.append(Likeli(data,'normal',(ll,ul)))
510
#q2phi = genprior('uniform', (ll,ul), k) # uniform[6,9] - pre-model distribution of Phi
511
q2phi = lhs.lhs(['p'],['Uniform'],[[ll,ul]],k)[0]
513
(phi, q1theta) = Run(k) # Runs the model
515
#---Restricting the range of phi------------------------------------------------
517
(w, post_theta, qtilphi, q1est) = SIR(0.5,q2phi,(ll,ul), 'uniform',q1theta, phi,L, lik)
519
print post_theta.shape
520
#--generating the posterior of phi-------------------------------------------------------
521
r = uniform(0,len(post_theta[0]),L) #random index for the marginal posterior of r
522
p = uniform(0,len(post_theta[1]),L) #random index for the marginal posterior of p0
523
post_phi = zeros(L,float) #initializing post_phi
524
for i in xrange(L): #Monte Carlo with values of the posterior of Theta
525
post_phi[i] = model(post_theta[0][int(r[i])],post_theta[1][int(p[i])])[-1]
527
#---Plotting with matplotlib----------------------------------------------------------------------------
530
plotmat(post_theta[0], tit=r'$\pi^{[r]}(\theta)$')
531
P.title('Posteriors and weight vector')
533
plotmat(post_theta[1], tit=r'$\pi^{[P_0]}(\theta)$')
535
plotmat(post_phi, tit=r'$\pi^{[P]}(\phi)$')
536
##plot(q1est['x'],qtilphi)
537
##ylabel(r'$P$', fontsize=12)
540
P.ylabel(r'$W_i$', fontsize=12)
545
plotmat(q1theta[0], tit=r'$\theta r$')
548
plotmat(phi, tit=r'$\phi$')
550
plotmat(q1theta[1], tit=r'$\theta p_0$')
552
plotmat(q2phi, tit=r'$q_2 \phi$')
555
if __name__ == '__main__':
556
from time import clock
560
print end-start, ' seconds'