import numpy as np
from scipy.stats import gaussian_kde
[docs]def kde_max(kde, tol=1e-3):
"""Determine the maximum value of a KDE
INPUT: scipy.stats.kde object"""
# 2010-07-23 11:14 IJC: Created
cenguess = np.median(kde.dataset,1)
init_steps = np.std(kde.dataset,1)
max_vals = np.zeros(kde.d)
for dim in range(kde.d):
this_step = init_steps[dim].copy()
tempkde = gaussian_kde(kde.dataset[dim,:])
this_cen = cenguess[dim].copy()
dguess = 1.
while dguess>tol:
pos_guess = this_cen + this_step
neg_guess = this_cen - this_step
if tempkde(pos_guess)>tempkde(this_cen):
dguess = abs(this_cen-pos_guess)
this_cen = pos_guess
elif tempkde(neg_guess)>tempkde(this_cen):
dguess = abs(this_cen-neg_guess)
this_cen = neg_guess
else:
this_step *= 0.5
dguess *=0.5
max_vals[dim] = this_cen
return max_vals
[docs]def findkdeval(kde, val, guess=None,tol=1e-6, maxiter=1000,verbose=False):
"""Find the "x"-value such that kde(x)=val +/- tol
Without a specified guess for a monomodal distribution, tends to
find the lower of the two possible values.
Uses scipy.stats.gaussian_kde objects"""
# 2010-07-23 11:49 IJC: Created
if maxiter is None:
maxiter = 1000
if kde.d>1:
retvals = np.zeros(kde.d,float)
if hasattr(val,'__iter__'):
pass
else:
val = np.tile(val,kde.d)
for dim in range(kde.d):
print dim
tempkde = gaussian_kde(kde.dataset[dim,:])
retvals[dim] = findkdeval(tempkde, val[dim],tol=tol, guess=guess,maxiter=maxiter)
else:
max_location = kde_max(kde)
maxval = kde(max_location)
if val>maxval:
print "Value entered (%01.3g) is greater than KDE's maximum value (%01.3g)" % (val,maxval)
return max_location
iter = 0
initial_step = np.std(kde.dataset)
step = initial_step
medval = np.median(kde.dataset)
if guess is None:
guess = medval - step
else:
pass
initial_guess = guess # Save this, just in case
kdeval = kde(guess)
dval = abs(kdeval-val)
while dval>tol and iter<=maxiter:
# pos = min(guess + step, max_location)
pos = guess + step
neg = guess - step
kdepos = kde(pos)
kdeneg = kde(neg)
if verbose and (iter/10)==(iter/10.):
print "iter, val, pos, neg, kdepos,kdeneg,guess, step"
if verbose:
print "%i %01.6g %01.6g %01.6g %01.6g %01.6g %01.6g" %(iter, val, pos, neg,kdepos,kdeneg, step)
dval_pos = abs(kdepos-val)
dval_neg = abs(kdeneg-val)
if dval_pos<dval: # we're closer than we were!
dval = dval_pos
guess = pos
elif dval_neg<dval: # we're closer than we were!
dval = dval_neg
guess = neg
elif dval_pos==dval_neg: # We must be really far away
if initial_guess>medval:
guess = medval + initial_step
else:
guess = medval - initial_step
else: # neither guess is closer, but we're still not within tol
step *= 0.5
iter += 1
retvals = guess
if iter>maxiter:
print "Exceeded maximum number of iterations (%i) without convergance" % maxiter
return retvals
[docs]def conflevel(kde, frac, ftol=1e-6, tol=1e-6, usespline=False, verbose=False,maxiter=None):
"""Determine the lower and upper confidence levels required to
enclose a given fraction 'frac' of a KDE object's dataset(s) to
within a tolerance ftol."""
# 2010-07-23 14:00 IJC: Created
from scipy import interpolate
if kde.d>1:
ret = []
for ii in range(kde.d):
if verbose: print ii,'/',kde.d
ret.append(conflevel(gaussian_kde(kde.dataset[ii]),frac,ftol=ftol,tol=tol,usespline=usespline,verbose=verbose,maxiter=maxiter))
return ret
else:
median_value = np.median(kde.dataset)
step = np.std(kde.dataset)
low_limit = median_value - 2*step
enclosed_fraction = np.Inf
thismovewasup=True
if usespline:
spx = np.linspace(-5*step,5*step,1e3)+median_value
sp = interpolate.UnivariateSpline(spx,kde(spx),k=3.0,s=0.0)
sp.d = kde.d
sp.dataset = kde.dataset
kde0 = kde
kde = sp
while abs(enclosed_fraction-frac)>ftol:
kdelow = kde(low_limit)
guess = 2*median_value-low_limit
if verbose:
print "kdelow,guess",kdelow,guess
high_limit = findkdeval(kde, kdelow, tol=tol, guess=guess,verbose=verbose,maxiter=maxiter)
if usespline:
enclosed_fraction = kde0.integrate_box_1d(low_limit, high_limit)
else:
enclosed_fraction = kde.integrate_box_1d(low_limit, high_limit)
lastmovewasup = thismovewasup
if enclosed_fraction > frac: # need to close in
low_limit += step
thismovewasup = True
else: # need to open up limits
low_limit += -step
thismovewasup = False
movingInSameDirection = lastmovewasup==thismovewasup
if movingInSameDirection: # keep going
pass
else: # We passed it; turn around and home in
step *= 0.5
return low_limit, high_limit
[docs]def confmap(map, frac, **kw):
"""Return the confidence level of a 2D histogram or array that
encloses the specified fraction of the total sum.
:INPUTS:
map : 1D or 2D numpy array
Probability map (from hist2d or kde)
frac : float, 0 <= frac <= 1
desired fraction of enclosed energy of map
:OPTIONS:
ordinate : None or 1D array
If 1D map, interpolates onto the desired value. This could
cause problems when you aren't just setting upper/lower
limits....
"""
# 2010-07-26 12:54 IJC: Created
# 2011-11-05 14:29 IJMC: Fixed so it actually does what it's supposed to!
from scipy.optimize import bisect
def diffsum(level, map, ndesired):
return ((1.0*map[map >= level].sum()/map.sum() - ndesired))
if hasattr(frac,'__iter__'):
return [confmap(map,thisfrac, **kw) for thisfrac in frac]
#nx, ny = map.shape
#ntot = map.size
#n = int(ntot*frac)
#guess = map.max()
#dx = 10.*float((guess-map.min())/ntot)
#thisn = map[map<=guess].sum()
ret = bisect(diffsum, map.min(), map.max(), args=(map, frac))
if kw.has_key('ordinate') and kw['ordinate'] is not None:
sortind = np.argsort(map)
ret = np.interp(ret, map[sortind], kw['ordinate'][sortind])
return ret
[docs]def kdehist2(x, y, npts, xrange=None, yrange=None):
"""Generate a 2D histogram map from data, using Gaussian KDEs
:INPUTS:
x : seq
X data
y : seq
Y data
npts : int or 2-seq
number of points across final histogram, or [nx, ny]
:OPTIONAL_INPUTS:
xrange : 2-seq
[x_min, x_max] values for final histogram
yrange : 2-seq
[y_min, y_max] values for final histogram
:RETURNS:
[kdehist, xbins, ybins]
:EXAMPLE:
::
import kdestats as kde
import numpy as np
import pylab as py
covmat = [[1., 1.5], [1.5, 4.]]
xy = np.random.multivariate_normal([0, 0], covmat, [1e4])
kdehist = kde.kdehist2(xy[:,0], xy[:,1], [30, 30])
clevels = kde.confmap(kdehist[0], [.6827,.9545,.9973])
py.figure() # Plot 1-, 2-, and 3-sigma contours
c = py.contour(kdehist[1], kdehist[2], kdehist[0], clevels)
"""
# 2012-02-11 19:45 IJMC: Created
if hasattr(npts, '__iter__'):
if len(npts)==1:
npts = [npts[0], npts[1]]
else:
npts = [npts, npts]
# Generate KDE:
thiskde = gaussian_kde([x, y])
# Generate coordinates for KDE evaluation:
if xrange is None:
thisx0 = np.median(thiskde.dataset[0])
thisdx0 = np.std(thiskde.dataset[0])
thisx = np.linspace(-5*thisdx0,5*thisdx0,npts[0])+thisx0
else:
thisx = np.linspace(xrange[0], xrange[1], npts[0])
if yrange is None:
thisy0 = np.median(thiskde.dataset[1])
thisdy0 = np.std(thiskde.dataset[1])
thisy = np.linspace(-5*thisdy0,5*thisdy0,npts[1])+thisy0
else:
thisy = np.linspace(yrange[0], yrange[1], npts[1])
thisxx,thisyy = np.meshgrid(thisx,thisy)
thishist = thiskde([thisxx.ravel(),thisyy.ravel()]).reshape(npts[0],npts[0])
return thishist, thisx, thisy