Package BIP :: Package Bayes :: Module Melding
[hide private]
[frames] | no frames]

Source Code for Module BIP.Bayes.Melding

  1  # -*- coding:utf-8 -*- 
  2  #----------------------------------------------------------------------------- 
  3  # Name:        Melding.py 
  4  # Purpose:     The Bayesian melding Class provides 
  5  #                   uncertainty analyses for deterministic models. 
  6  # 
  7  # Author:      Flávio Codeço Coelho 
  8  # 
  9  # Created:     2003/08/10 
 10  # Copyright:   (c) 2003-2008 by the Author 
 11  # Licence:     GPL 
 12  #----------------------------------------------------------------------------- 
 13  from numpy.core.records import recarray 
 14  import psyco 
 15  psyco.full() 
 16  import sys, os 
 17  import cPickle as CP 
 18  import like 
 19  import pylab as P 
 20  import scipy.stats.kde as kde 
 21  from scipy import stats 
 22  import numpy 
 23  from numpy import * 
 24  from time import time 
 25  from numpy.random import normal, randint,  random,  uniform  
 26  import lhs 
 27  if sys.version.startswith('2.5'): 
 28      from processing import Pool 
 29  else: 
 30      from multiprocessing import Pool 
 31   
 32  __docformat__ = "restructuredtext en" 
 33   
 34   
35 -class Meld:
36 """ 37 Bayesian Melding class 38 """
39 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False ):
40 """ 41 Initializes the Melding class. 42 43 :Parameters: 44 - `K`: Number of replicates of the model run. Also determines the prior sample size. 45 - `L`: Number of samples from the Posterior distributions. Usually 10% of K. 46 - `model`: Callable taking theta as argument and returning phi = M(theta). 47 - `ntheta`: Number of inputs to the model (parameters). 48 - `nphi`: Number of outputs of the model (State-variables) 49 """ 50 self.K = K 51 self.L = L 52 self.verbose = verbose 53 self.model = model 54 self.likelist = [] #list of likelihoods 55 self.q1theta = recarray(K,formats=['f8']*ntheta) #Theta Priors (record array) 56 self.post_theta = recarray(L,formats=['f8']*ntheta) #Theta Posteriors (record array) 57 self.q2phi = recarray(K,formats=['f8']*nphi) #Phi Priors (record array) 58 self.phi = recarray(K,formats=['f8']*nphi) #Phi model-induced Priors (record array) 59 self.q2type = [] #list of distribution types 60 self.post_phi = recarray(L,formats=['f8']*nphi) #Phi Posteriors (record array) 61 self.ntheta = ntheta 62 self.nphi = nphi 63 self.alpha = alpha #pooling weight of user-provided phi priors 64 self.done_running = False
65 # self.po = Pool() #pool of processes for parallel processing 66
67 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
68 """ 69 Setup the models Outputs, or Phi, and generate the samples from prior distributions 70 needed for the melding replicates. 71 72 :Parameters: 73 - `names`: list of string with the names of the variables. 74 - `dists`: is a list of RNG from scipy.stats 75 - `pars`: is a list of tuples of variables for each prior distribution, respectively. 76 - `limits`: lower and upper limits on the support of variables. 77 """ 78 if len(names) != self.nphi: 79 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi)) 80 self.q2phi.dtype.names = names 81 self.phi.dtype.names = names 82 self.post_phi.dtype.names = names 83 self.limits = limits 84 for n,d,p in zip(names,dists,pars): 85 self.q2phi[n] = lhs.lhs(d,p,self.K) 86 self.q2type.append(d.name)
87 88 89
90 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
91 """ 92 Setup the models inputs and generate the samples from prior distributions 93 needed for the dists the melding replicates. 94 95 :Parameters: 96 - `names`: list of string with the names of the parameters. 97 - `dists`: is a list of RNG from scipy.stats 98 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists 99 """ 100 self.q1theta.dtype.names = names 101 self.post_theta.dtype.names = names 102 if os.path.exists('q1theta'): 103 self.q1theta = CP.load(open('q1theta','r')) 104 else: 105 for n,d,p in zip(names,dists,pars): 106 self.q1theta[n] = lhs.lhs(d,p,self.K)
107
108 - def setThetaFromData(self,names,data):
109 """ 110 Setup the model inputs and set the prior distributions from the vectors 111 in data. 112 This method is to be used when the prior distributions are available in 113 the form of a sample from an empirical distribution such as a bayesian 114 posterior. 115 In order to expand the samples provided, K samples are generated from a 116 kernel density estimate of the original sample. 117 118 :Parameters: 119 - `names`: list of string with the names of the parameters. 120 - `data`: list of vectors. Samples of a proposed distribution 121 """ 122 self.q1theta.dtype.names = names 123 self.post_theta.dtype.names = names 124 if os.path.exists('q1theta'): 125 self.q1theta = CP.load(open('q1theta','r')) 126 else: 127 for n,d in zip(names,data): 128 self.q1theta[n] = kde.gaussian_kde(d).resample(self.K)
129
130 - def setPhiFromData(self,names,data,limits):
131 """ 132 Setup the model outputs and set their prior distributions from the 133 vectors in data. 134 This method is to be used when the prior distributions are available in 135 the form of a sample from an empirical distribution such as a bayesian 136 posterior. 137 In order to expand the samples provided, K samples are generated from a 138 kernel density estimate of the original sample. 139 140 :Parameters: 141 - `names`: list of string with the names of the variables. 142 - `data`: list of vectors. Samples of the proposed distribution. 143 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables. 144 """ 145 self.q2phi.dtype.names = names 146 self.phi.dtype.names = names 147 self.post_phi.dtype.names = names 148 self.limits = limits 149 for n,d in zip(names,data): 150 self.q2phi[n] = kde.gaussian_kde(d).resample(self.K) 151 self.q2type.append('empirical')
152
153 - def addData(self, data, model, limits,l=1024, **kwargs):
154 """ 155 Calculates the likelihood functions of the dataset presented and add to 156 self.likelist 157 Likelihood function is a vector of lenght l 158 159 :Parameters: 160 - `data`: vector containing observations on a given variable. 161 - `model`: string with the name of the distribution of the variable 162 - `limits`: (ll,ul) tuple with lower and upper limits for the variable 163 - `l`: Length (resolution) of the likelihood vector 164 """ 165 n = len(data) # Number of data points 166 data = array(data) 167 (ll,ul) = limits #limits for the parameter space 168 step = (ul-ll)/float(l) 169 170 if model == 'normal': # In this case, L is a function of the mean. SD is set to the SD(data) 171 sd = std(data) #standard deviation of data 172 prec = 1/sd #precision of the data 173 res = array([exp(like.Normal(data,mu,prec)) for mu in arange(ll,ul,step)]) 174 lik = res/max(res) # Likelihood function 175 print max(lik), min(lik) 176 elif model == 'exponential': 177 res = [lamb**n*exp(-lamb*sum(data)) for lamb in arange(ll,ul,step)] 178 lik = array(res)/max(array(res)) 179 elif model == 'beta': 180 # TODO: Make sure pars is passed as an extra parameter 181 res = [exp(like.Beta(data,*kwargs['pars'])) for i in arange(ll,ul,step)] 182 lik = array(res)/max(array(res)) 183 elif model == 'bernoulli': 184 if ll<0 or ul>1: 185 print "Parameter p of the bernoulli is out of range[0,1]" 186 res = [exp(like.Bernoulli(data,p)) for p in arange(ll,ul,step)] 187 lik = array(res)/max(array(res)) 188 189 elif model == 'poisson': 190 res = [exp(like.Poisson(data,lb)) for lb in arange(ll,ul,step)] 191 lik = array(res)/max(array(res)) 192 193 elif model == 'lognormal': 194 sd = std(data) #standard deviation of data 195 prec = 1/sd #precision of the data 196 res = [exp(like.Lognormal(data,mu,prec)) for mu in arange(ll,ul,step)] 197 lik = array(res)/max(array(res)) 198 else: 199 print 'Invalid distribution type. Valid distributions: normal,lognormal, exponential, bernoulli and poisson' 200 self.likelist.append(lik) 201 return lik
202
203 - def run(self,*args):
204 """ 205 Runs the model through the Melding inference.model 206 model is a callable which return the output of the deterministic model, 207 i.e. the model itself. 208 The model is run self.K times to obtain phi = M(theta). 209 """ 210 211 for i in xrange(self.K): 212 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 213 r = self.po.applyAsync(self.model, theta) 214 self.phi[i]= r.get()[-1]#self.model(*theta)[-1] #phi is the last point in the simulation 215 216 self.done_running = True
217
218 - def getPosteriors(self,t=1):
219 """ 220 Updates the the posteriors of the model for the last time step. 221 Returns two record arrays: 222 - The posteriors of the Theta 223 - the posterior of Phi last t values of time-series. self.L by `t` arrays. 224 225 :Parameters: 226 - `t`: length of the time-series to return as posterior. 227 """ 228 if not self.done_running: 229 return 230 if t > 1: 231 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi) 232 self.post_phi.dtype.names = self.phi.dtype.names 233 #random indices for the marginal posteriors of theta 234 po = Pool() 235 #pti = randint(0,self.L,size=(self.ntheta,self.L)) 236 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L)) 237 for i in xrange(self.L):#Monte Carlo with values of the posterior of Theta 238 r = po.applyAsync(self.model,[self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)] ) 239 if t == 1: 240 self.post_phi[i] = r.get()[-1] 241 else: 242 self.post_phi[i]= [tuple(l) for l in r.get()[-t:]] 243 if not i%100: 244 print "==> L = %s"%i 245 246 po.close() 247 po.join() 248 return self.post_theta, self.post_phi
249
250 - def filtM(self,cond,x,limits):
251 ''' 252 Multiple condition filtering. 253 Remove values in x[i], if corresponding values in 254 cond[i] are less than limits[i][0] or greater than 255 limits[i][1]. 256 257 :Parameters: 258 - `cond`: is an array of conditions. 259 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`. 260 - `x`: array to be filtered. 261 ''' 262 cond = array(cond) 263 cnd = ones(cond.shape[1],int) 264 for i,j in zip(cond,limits): 265 ll = j[0] 266 ul = j[1] 267 #print cond.shape,cnd.shape,i.shape,ll,ul 268 cnd = cnd & less(i,ul) & greater(i,ll) 269 f = compress(cnd,x, axis=1) 270 return f
271
272 - def basicfit(self,s1,s2):
273 ''' 274 Calculates a basic fitness calculation between a model- 275 generated time series and a observed time series. 276 it uses a normalized RMS variation. 277 278 :Parameters: 279 - `s1`: model-generated time series. record array. 280 - `s2`: observed time series. dictionary with keys matching names of s1 281 ''' 282 fit = [] 283 for k in s2.keys(): 284 if s2[k] == [] or (not s2[k].any()): 285 continue #no observations for this variable 286 e = sqrt(mean((s1[k]-s2[k])**2.)) 287 fit.append(e) #min to guarantee error is bounded to (0,1) 288 289 return mean(fit) #mean r-squared
290 291
292 - def logPooling(self,phi):
293 """ 294 Returns the probability associated with each phi[i] 295 on the pooled pdf of phi and q2phi. 296 297 :Parameters: 298 - `phi`: prior of Phi induced by the model and q1theta. 299 """ 300 301 # Estimating the multivariate joint probability densities 302 #print phi[phi.dtype.names[0]].shape 303 phidens = stats.gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names])) 304 q2dens = stats.gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names])) 305 # Determining the pooled probabilities for each phi[i] 306 # qtilphi = zeros(self.K) 307 lastp = array([list(phi[i,-1]) for i in xrange(self.K)]) 308 # print lastp,lastp.shape 309 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha 310 return qtilphi/sum(qtilphi)
311
312 - def abcRun(self,fitfun=None, data={}, t=1,savetemp=False):
313 """ 314 Runs the model for inference through Approximate Bayes Computation 315 techniques. This method should be used as an alternative to the sir. 316 317 :Parameters: 318 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit 319 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes. 320 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value. 321 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa 322 """ 323 if not fitfun: 324 fitfun = self.basicfit 325 if savetemp: 326 CP.dump(self.q1theta,open('q1theta','w')) 327 # Running the model ========================== 328 if os.path.exists('phi.temp'): 329 phi,j = CP.load(open('phi.temp','r')) 330 else: 331 j=0 332 phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 333 for i in xrange(j,self.K): 334 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 335 r = self.po.applyAsync(self.model, theta) 336 phi[i]= [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 337 if i%100 == 0: 338 print "==> K = %s"%i 339 if savetemp: 340 CP.dump((phi,i),open('phi.temp','w')) 341 if savetemp: #If all replicates are done, clear temporary save files. 342 os.unlink('phi.temp') 343 os.unlink('q1theta') 344 345 print "==> Done Running the K replicates\n" 346 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 347 qtilphi = nan_to_num(qtilphi) 348 print 'max(qtilphi): ', max(qtilphi) 349 # 350 # calculate weights 351 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])] 352 w /=sum(w) 353 w = 1-w 354 print "w=",w, mean(w), var(w) 355 print 356 print 'qtilphi=',qtilphi 357 # Resampling Thetas 358 w = nan_to_num(w) 359 w = array(w)*qtilphi 360 w /=sum(w) 361 w = nan_to_num(w) 362 print 'max(w): ',max(w) 363 # for n in phi.dtype.names: 364 # P.plot(mean(phi[n],axis=0),label=n) 365 # P.figure() 366 # P.plot(w,label='w') 367 # P.plot(qtilphi,label='qtilphi') 368 # P.title('Resampling vector(w) and pooled prior on Phi') 369 # P.legend() 370 if sum(w) == 0.0: 371 sys.exit('Resampling weights are all zero, please check your model or data.') 372 j = 0 373 while j < self.L: # Extract L samples from q1theta 374 i=randint(0,w.size)# Random position of w and q1theta 375 if random()<= w[i]: 376 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 377 j+=1 378 379 380 self.done_running = True
381
382 - def sir(self, data={}, t=1,savetemp=False):
383 """ 384 Run the model output through the Sampling-Importance-Resampling algorithm 385 386 :Parameters: 387 - `data`: observed time series on the model's output 388 - `t`: length of the observed time series 389 - `savetemp`: Boolean. create a temp file? 390 """ 391 qtilphi,phi = self.runModel(savetemp,t) 392 393 # Calculating the likelihood of each phi[i] considering the observed data 394 tau = 0.1 395 lik = zeros(self.K) 396 t0=time() 397 liklist = [] 398 def likcb(lh): 399 liklist.append(lh)
400 401 po = Pool() 402 for i in xrange(self.K): 403 l=1 404 for n in data.keys(): 405 if isinstance(data[n],list) and data[n] == []: 406 continue #no observations for this variable 407 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()): 408 continue #no observations for this variable 409 p = phi[n] 410 411 liklist=[po.applyAsync(like.Normal,(data[n][m], j, tau)) for m,j in enumerate(p[i])] 412 l=product([p.get() for p in liklist]) 413 414 # l *= product([like.Normal(data[n][m], j, tau) for m,j in enumerate(p[i])]) 415 lik[i]=l 416 po.close() 417 po.join() 418 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0) 419 # Calculating the weights 420 w = nan_to_num(qtilphi*lik) 421 w = nan_to_num(w/sum(w)) 422 423 if sum(w) == 0.0: 424 sys.exit('Resampling weights are all zero, please check your model or data.') 425 j = 0 426 while j < self.L: # Extract L samples from q1theta 427 i=randint(0,w.size)# Random position of w and q1theta 428 if random()<= w[i]: 429 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 430 j+=1 431 self.done_running = True
432 433
434 - def runModel(self,savetemp,t=1):
435 ''' 436 Handles running the model self.K times keeping a temporary savefile for 437 resuming calculation in case of interruption. 438 439 :Parameters: 440 - `savetemp`: Boolean. create a temp file? 441 ''' 442 if savetemp: 443 CP.dump(self.q1theta,open('q1theta','w')) 444 # Running the model ========================== 445 446 447 if os.path.exists('phi.temp'): 448 phi,j = CP.load(open('phi.temp','r')) 449 else: 450 j=0 451 phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 452 def cb(r): 453 ''' 454 callback function for the asynchronous model runs 455 ''' 456 if t == 1: 457 phi[r[1]] = (r[0][-1],) 458 else: 459 phi[r[1]] = [tuple(l) for l in r[0][-t:]]# #phi is the last t points in the simulation
460 461 po = Pool() 462 t0=time() 463 for i in xrange(j,self.K): 464 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 465 r = po.applyAsync(enumRun,(self.model,theta,i),callback=cb) 466 # r = po.applyAsync(self.model,theta) 467 # if t == 1: 468 # phi[i] = (r.get()[-1],) 469 # else: 470 # phi[i] = [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 471 if i%100 == 0 and self.verbose: 472 print "==> K = %s"%i 473 if savetemp: 474 CP.dump((phi,i),open('phi.temp','w')) 475 if savetemp: #If all replicates are done, clear temporary save files. 476 os.unlink('phi.temp') 477 os.unlink('q1theta') 478 po.close() 479 po.join() 480 print "==> Done Running the K replicates (took %s seconds)\n"%(time()-t0) 481 t0 = time() 482 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 483 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 484 print qtilphi,'max(qtilphi): ', max(qtilphi) 485 qtilphi = nan_to_num(qtilphi) 486 return qtilphi,phi
487 -def enumRun(model,theta,k):
488 res =model(*theta) 489 return (res,k)
490
491 -def model(r, p0, n=1):
492 """ 493 Model (r,p0, n=1) 494 Simulates the Population dynamic Model (PDM) Pt = rP0 495 for n time steps. 496 P0 is the initial population size. 497 Example model for testing purposes. 498 """ 499 # print "oi" 500 Pt = zeros(n, float) # initialize the output vector 501 P = p0 502 for i in xrange(n): 503 Pt[i] = r*P 504 P = Pt[i] 505 506 return Pt
507
508 -def Run(k):
509 """ 510 Run (k) 511 Draw k samples of Theta from its prior distribution, run the model with it 512 and obtain phi = M(theta). For testing purposes only. 513 """ 514 po = Pool() 515 #---q1theta--------------------------------------------------------------------- 516 #---Priors for the theta (model parameters)-------------------- 517 r = lhs.lhs(stats.uniform, [2, 4], k) 518 p0 = lhs.lhs(stats.uniform,[0,5],k) 519 q1theta = (r, p0) 520 #------------------------------------------------------------------------------- 521 phi=zeros(k, float) 522 #print r.shape, p0.shape 523 for i in xrange(k): 524 re = po.applyAsync(model,(r[i], p0[i])) 525 phi[i] = re.get()[-1]#model(r[i], p0[i])[-1] # Sets phi[i] to the last point of the simulation 526 527 528 return phi, q1theta
529
530 -def KDE(x, (ll, ul)=('',''),res=1024.):
531 """ 532 KDE(x) 533 performs a kernel density estimate using the scipy gaussian density 534 if (ll,ul), enforce limits for the distribution's support. 535 Returns a dictionary. 536 """ 537 #r.assign("x", x) 538 539 if ll : 540 rn=arange(ll,ul,(ul-ll)/res) 541 #print x.shape,rn.shape 542 est = kde.gaussian_kde(x.ravel()).evaluate(rn) 543 #r.assign("ll", ll) 544 #r.assign("ul", ul) 545 #est = r('density(x,from=ll, to=ul)') #trims the density borders 546 else: 547 ll = min(x) 548 ul = max(x) 549 rn=arange(ll,ul,(ul-ll)/res) 550 est = kde.gaussian_kde(x).evaluate(rn) 551 #est = r('density(x)') 552 print 'No - KDE' 553 return {'y':est,'x':rn}
554 555
556 -def Likeli(data, dist, limits,**kwargs):
557 """ 558 Generates the likelihood function of data given dist. 559 limits is a tuple setting the interval of the parameter space that will 560 be used as the support for the Likelihood function. 561 returns a vector (1024 elements). 562 """ 563 n = len(data) # Number of data points 564 data = array(data) 565 (ll,ul) = limits #limits for the parameter space 566 step = (ul-ll)/1024. 567 568 if dist == 'normal': # In this case, L is a function of the mean. SD is set to the SD(data) 569 sd = std(data) #standard deviation of data 570 prec = 1/sd #precision of the data 571 res = array([exp(like.Normal(data,mu,prec)) for mu in arange(ll,ul,step)]) 572 lik = res/max(res) # Likelihood function 573 print max(lik), min(lik) 574 elif dist == 'exponential': 575 res = [lamb**n*exp(-lamb*sum(data)) for lamb in arange(ll,ul,step)] 576 lik = array(res)/max(array(res)) 577 578 elif dist == 'bernoulli': 579 if ll<0 or ul>1: 580 print "Parameter p of the bernoulli is out of range[0,1]" 581 res = [exp(like.Bernoulli(data,p)) for p in arange(ll,ul,step)] 582 lik = array(res)/max(array(res)) 583 584 elif dist == 'poisson': 585 res = [exp(like.Poisson(data,lb)) for lb in arange(ll,ul,step)] 586 lik = array(res)/max(array(res)) 587 588 elif dist == 'lognormal': 589 sd = std(data) #standard deviation of data 590 prec = 1/sd #precision of the data 591 res = [exp(like.Lognormal(data,mu,prec)) for mu in arange(ll,ul,step)] 592 lik = array(res)/max(array(res)) 593 else: 594 print 'Invalid distribution type. Valid distributions: normal, exponential, bernoulli and poisson' 595 return lik
596 597
598 -def Filt(cond, x, (ll, ul)):
599 """ 600 filtering out Out-of-boundary thetas and phis. 601 for single output models. 602 ul and ll are the pre-model boundaries of phi. 603 cond is a vector over which the conditional operations will be applied. 604 x is a vector or matrix of data. matrices are filtered line by line 605 """ 606 #print cond.shape, x.shape, ll, ul 607 cond = array(cond) 608 cond = cond.ravel() 609 if isinstance(x,tuple): 610 l = len(x) 611 x = array(x) 612 x.shape = (l,x.size/float(l)) 613 #print 'shape of x is', x.shape 614 else: 615 #print 'shape of x is', x.shape 616 pass 617 try: 618 f = compress(less(cond,ul) & greater(cond,ll),x, axis=1) 619 except: 620 f = compress(less(cond,ul) & greater(cond,ll),x) 621 622 623 return f
624
625 -def FiltM(cond,x,limits):
626 """ 627 Multiple condition filtering. 628 for multiple output models 629 cond is an array of condition vectors 630 limits is a list of tuples (ll,ul) with the length of cond 631 """ 632 cond = array(cond) 633 cnd = ones(cond.shape[1],int) 634 for i,j in zip(cond,limits): 635 ll = j[0] 636 ul = j[1] 637 #print cond.shape,cnd.shape,i.shape,ll,ul 638 cnd = cnd & less(i,ul) & greater(i,ll) 639 f = compress(cnd,x, axis=1) 640 return f
641 642
643 -def SIR(alpha,q2phi,limits,q2type,q1theta, phi,L, lik=[]):
644 """ 645 Sampling Importance Resampling. 646 647 :Parameters: 648 - `alpha`: pooling weight; 649 - `q2phi`: premodel of phi(tuple of vectors); 650 - `limits`: limits for q2phi (list/tuple of tuples); 651 - `q2type`: dist. type of q2phi (list of strings); 652 - `q1theta`: premodel dists of thetas (tuple); 653 - `phi`: model output (tuple of vectors); 654 - `L`: size of the resample. 655 - `lik`: list of likelihoods available 656 """ 657 ##==On Uniform Priors we have to trim the density borders======================== 658 ## The Density estimation with a gaussian kernel, extends beyond the limits of 659 ## an uniform distribution, due to this fact, we clip the ends of the kde 660 ## output in order to avoid artifacts. 661 ##=============================================================================== 662 np = len(q1theta) # Number of parameters(theta) in the model 663 no = len(phi) #Number of output variables 664 665 q2pd =[] 666 for i in xrange(no): 667 (ll,ul) = limits[i] # limits of q2phi[i] 668 if q2type[i] == 'uniform': 669 q2pd.append(KDE(q2phi[i],(ll,ul))) 670 else: 671 q2pd.append(KDE(q2phi[i])) 672 q2phi = q2pd 673 #---filtering out Out-of-boundary thetas and phis------------------------------- 674 675 phi_filt=[] 676 print "shape de q1theta[0]: ",q1theta[0].shape 677 q1theta2 = array(q1theta) #Temporary copy to allow multiple filtering 678 679 phi_filt = FiltM(phi,phi,limits) #filter Phis 680 #print type(phi_filt) 681 if not phi_filt.any(): 682 print "Due to bad specification of the prior distributions or of the model\nthe inference can't continue. please verify that your priors include at least\npart of the range of the output variables." 683 return None 684 #Remove thetas that generate out-of-bound phis for every phi 685 q1theta_filt = FiltM(phi,q1theta2,limits) 686 # print "shape de q1theta_filt (ln272): ",q1theta_filt.shape 687 q1theta2 = q1theta_filt 688 689 phi_filt = array(phi_filt) 690 # TODO: check to see if thetas or phis get empty due to bad priors!!!! 691 #------------------------------------------------------------------------------- 692 693 #---Calculate Kernel Density of the filtered phis----------------------------------------------------------------------- 694 q1ed = [] 695 for i in xrange(no): 696 (ll,ul) = limits[i] # limits of q2phi[i] 697 if q2type[i] == 'uniform': 698 # print sum(isinf(phi_filt)) 699 q1ed.append(KDE(phi_filt[i],(ll,ul))) 700 else: 701 q1ed.append(KDE(phi_filt[i])) 702 q1est = q1ed 703 #------------------------------------------------------------------------------- 704 705 ##============================================================================== 706 ##Now, the two priors for Phi q2phi (derived from prior information and q1est 707 ##(generated by the model from the q1theta(priors on the inputs)), are pooled. 708 ##The pooling is done by logarithmic pooling using alpha as a weighting factor. 709 ##The higher the value of alpha the more wight is given to q1est. 710 ##============================================================================== 711 #---Calculating the pooled prior of Phi----------------------------------------- 712 qtilphi = [] 713 for i in xrange(no): 714 qtilphi.append((array(q2phi[i]['y'])**(1-alpha))*(array(q1est[i]['y'])**alpha)) 715 qtilphi = array(qtilphi) 716 #------------------------------------------------------------------------------- 717 #---Calculating first term of the weigth expression----------------------------- 718 # TODO: Consider having a different alpha for each phi 719 denslist=[] 720 for i in xrange(no): 721 #pairwise pooling of the phis and q2phis 722 denslist.append((array(q2phi[i]['y'])/array(q1est[i]['y']))**(1-alpha)) 723 724 firstterm = denslist#product(denslist, axis=0) 725 #---Weights--------------------------------------------------------------------- 726 727 if not lik: 728 w = firstterm #---- without likelihoods -----# 729 else: 730 if len(lik)>1: 731 prodlik = product(array(lik),axis=0) 732 else: 733 #only one likelihood function 734 prodlik = lik[0] 735 # w = firstterm*prodlik 736 w = [i*prodlik for i in firstterm] 737 #------------------------------------------------------------------------------- 738 ##========Link weights with each phi[i]========================================= 739 ## The weight vector (w) to be used in the resampling of the thetas is calculated 740 ## from operations on densities. Consequently,its values are associated with 741 ## values on the support of Phi, not with the actual Phi[i] as output by the 742 ## model. Thus, its is necessary to recover the association between 743 ## the Phi[i] (the outputs of each model run), and the weights 744 ## associated with them. For that, the support for phi is divided into 1024 bins 745 ## (the length of the weight vector), and the filtered Phi[i] are assigned to these bins 746 ## according to their value. This mapping is represented by the variable phi_bins 747 ## in which each element is the bin number of the correponding element in Phi. 748 ## A new weight vector(wi) is then created in which the elements of w are posi- 749 ## tioned according to the position of the Phi[i] to which it corresponds. That 750 ## is: w[i] = w[phi_bin[i]] repeated for each element i. 751 ##============================================================================== 752 753 bin_bound = [] 754 phi_bins = [] 755 wi = [] 756 for i in xrange(no): 757 (ll,ul) = limits[i] #limits of phi 758 step = (ul-ll)/1024. 759 bin_bound.append(arange(ll,ul,step)) # Bin boundaries of the weight vector 760 phi_bins.append(searchsorted(bin_bound[i], phi_filt[i])) # Return a vector of the bins for each phi 761 g = lambda x:w[i][x-1] # searchsorted returns 1 as the index for the first bin, not 0 762 phi_bins = array(phi_bins) 763 for i in xrange(no): 764 wi.append(map(g,phi_bins[i])) 765 wi = mean(array(wi),axis=0) #ATTENTION: Should this be averaged? 766 767 ##========Resampling q1theta===================================================== 768 ## Here, the filtered q1theta are resampled according to the weight vector. 769 ## L values are generated as indices to the weight vector wi(resamples) and used to resample 770 ## the parameters. 771 ##=============================================================================== 772 773 # A given value is going to be resampled if random() < wi 774 # A column of q1theta_filt is extracted for each value in resamples 775 q = [0]*L 776 wi = nan_to_num(array(wi)) 777 print sum(wi) 778 if sum(wi) == 0: 779 sys.exit('Resampling weights are all zero, please check your model or data.') 780 j = 0 781 while j < L: # Extract L samples from q1theta_filt 782 i=randint(0,wi.size)# Random position of wi and q1theta_filt 783 if random()<= wi[i]: 784 #print i, q1theta_filt.shape 785 q[j]=q1theta_filt[:,i]# retain the sample according with resampling prob. 786 j+=1 787 # q is a list of arrays which is converted to an array and then transposed. 788 #print "shape de q",len(q),q[0].shape 789 qtiltheta = transpose(array(q)) 790 #print qtiltheta.shape 791 return (w, qtiltheta, qtilphi, q1est)
792 793 794 795 # TODO: Implement calculation of Bayes factors! 796 #------------------------------------------------------------------------------- 797 ##==MAIN======================================================================== 798 #------------------------------------------------------------------------------- 799
800 -def plotRaHist(arr):
801 ''' 802 Plots a record array 803 as a panel of histograms 804 ''' 805 nv = len(arr.dtype.names) 806 fs = (ceil(sqrt(nv)),floor(sqrt(nv))+1) #figure size 807 P.figure() 808 for i,n in enumerate(arr.dtype.names): 809 P.subplot(nv/2+1,2,i+1) 810 P.hist(arr[n],bins=50, normed=1, label=n) 811 P.legend()
812
813 -def main():
814 """ 815 testing function 816 """ 817 start = time() 818 k = 20000 # Number of model runs 819 L = 2000 820 ll = 6 821 ul = 9 822 #data = [7,8,7,8,7,8,7] 823 data = normal(7.5,1,400) 824 lik = [] #initialize list of likelihoods 825 lik.append(Likeli(data,'normal',(ll,ul))) 826 827 q2phi = lhs.lhs(stats.uniform, (ll, ul), k) 828 829 (phi, q1theta) = Run(k) # Runs the model 830 print len(q1theta) 831 #---Restricting the range of phi------------------------------------------------ 832 833 (w, post_theta, qtilphi, q1est) = SIR(0.5,[q2phi],[(ll,ul)], ['uniform'],q1theta, [phi],L, lik) 834 print "out of SIR" 835 print post_theta.shape 836 #--generating the posterior of phi------------------------------------------------------- 837 r = randint(0,len(post_theta[0]),L) #random index for the marginal posterior of r 838 p = randint(0,len(post_theta[1]),L) #random index for the marginal posterior of p0 839 post_phi = zeros(L,float) #initializing post_phi 840 for i in xrange(L): #Monte Carlo with values of the posterior of Theta 841 post_phi[i] = model(post_theta[0][r[i]],post_theta[1][p[i]])[-1] 842 843 end = time() 844 print end-start, ' seconds' 845 #---Plotting with matplotlib---------------------------------------------------------------------------- 846 P.figure(1) 847 P.subplot(411) 848 P.hist(post_theta[0],bins=50) 849 P.ylabel(r'$\pi^{[r]}(\theta)$',fontsize=18) 850 P.title('Posteriors and weight vector') 851 P.subplot(412) 852 P.hist(post_theta[1],bins=50) 853 P.ylabel(r'$\pi^{[P_0]}(\theta)$',fontsize=18) 854 P.subplot(413) 855 P.hist(post_phi,bins=50) 856 P.ylabel(r'$\pi^{[P]}(\phi)$',fontsize=18) 857 ##plot(q1est['x'],qtilphi) 858 ##ylabel(r'$P$', fontsize=12) 859 P.subplot(414) 860 P.plot(w) 861 P.ylabel(r'$W_i$', fontsize=12) 862 863 864 P.figure(2) 865 P.subplot(411) 866 P.hist(q1theta[0],bins=50) 867 P.ylabel(r'$\theta r$',fontsize=18) 868 P.title('Priors') 869 P.subplot(412) 870 P.hist(phi,bins=50) 871 P.ylabel(r'$\phi$',fontsize=18) 872 P.subplot(413) 873 P.hist(q1theta[1],bins=50) 874 P.ylabel(r'$\theta p_0$',fontsize=18) 875 P.subplot(414) 876 P.hist(q2phi,bins=50) 877 P.ylabel(r'$q_2 \phi$',fontsize=18) 878 P.show()
879
880 -def main2():
881 start = time() 882 Me = Meld(K=10000,L=2000,model=model, ntheta=2,nphi=1,verbose=True) 883 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)]) 884 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)]) 885 #Me.addData(normal(7.5,1,400),'normal',(6,9)) 886 #Me.run() 887 Me.sir(data ={'p':[7.5]} ) 888 pt,pp = Me.getPosteriors() 889 end = time() 890 plotRaHist(pt) 891 plotRaHist(pp) 892 P.show() 893 print end-start, ' seconds'
894 895 if __name__ == '__main__': 896 # main() 897 main2() 898