Source code for kdestats

import numpy as np
from scipy.stats import gaussian_kde


[docs]def kde_max(kde, tol=1e-3):
    """Determine the maximum value of a KDE
    INPUT: scipy.stats.kde object"""
    # 2010-07-23 11:14 IJC: Created
    cenguess = np.median(kde.dataset,1)
    init_steps = np.std(kde.dataset,1)
    max_vals = np.zeros(kde.d)
    for dim in range(kde.d):
        this_step = init_steps[dim].copy()
        tempkde = gaussian_kde(kde.dataset[dim,:])
        this_cen = cenguess[dim].copy()
        dguess = 1.
        while dguess>tol:
            pos_guess = this_cen + this_step
            neg_guess = this_cen - this_step
            if tempkde(pos_guess)>tempkde(this_cen):
                dguess = abs(this_cen-pos_guess)
                this_cen = pos_guess
            elif tempkde(neg_guess)>tempkde(this_cen):
                dguess = abs(this_cen-neg_guess)
                this_cen = neg_guess
            else: 
                this_step *= 0.5
                dguess *=0.5
        max_vals[dim] = this_cen
    return max_vals

[docs]def findkdeval(kde, val, guess=None,tol=1e-6, maxiter=1000,verbose=False):
    """Find the "x"-value such that kde(x)=val +/- tol

    Without a specified guess for a monomodal distribution, tends to
    find the lower of the two possible values.

    Uses scipy.stats.gaussian_kde objects"""
    # 2010-07-23 11:49 IJC: Created
    if maxiter is None:
        maxiter = 1000
    
    if kde.d>1:
        retvals = np.zeros(kde.d,float)
        if hasattr(val,'__iter__'):
            pass
        else:
            val = np.tile(val,kde.d)

        for dim in range(kde.d):
            print dim
            tempkde = gaussian_kde(kde.dataset[dim,:])
            retvals[dim] = findkdeval(tempkde, val[dim],tol=tol, guess=guess,maxiter=maxiter)
    else:
        max_location = kde_max(kde)
        maxval = kde(max_location)
        if val>maxval:
            print "Value entered (%01.3g) is greater than KDE's maximum value (%01.3g)" % (val,maxval)
            return max_location
        
        iter = 0
        initial_step = np.std(kde.dataset)
        step = initial_step
        medval = np.median(kde.dataset)
        if guess is None:
            guess = medval - step
        else:
            pass

        initial_guess = guess # Save this, just in case
        kdeval = kde(guess)
        dval = abs(kdeval-val)
        while dval>tol and iter<=maxiter:
#            pos = min(guess + step, max_location)
            pos = guess + step
            neg = guess - step
            kdepos = kde(pos)
            kdeneg = kde(neg)
            if verbose and (iter/10)==(iter/10.):
                print "iter, val, pos, neg, kdepos,kdeneg,guess, step"
            if verbose:
                print "%i %01.6g %01.6g %01.6g %01.6g %01.6g %01.6g" %(iter, val, pos, neg,kdepos,kdeneg, step)
            dval_pos = abs(kdepos-val)
            dval_neg = abs(kdeneg-val)
            if dval_pos<dval: # we're closer than we were!
                dval = dval_pos
                guess = pos
            elif dval_neg<dval: # we're closer than we were!
                dval = dval_neg
                guess = neg
            elif dval_pos==dval_neg: # We must be really far away
                if initial_guess>medval:
                    guess = medval + initial_step
                else:
                    guess = medval - initial_step
            else: # neither guess is closer, but we're still not within tol
                step *= 0.5

            iter += 1
        retvals = guess
        if iter>maxiter:
            print "Exceeded maximum number of iterations (%i) without convergance" % maxiter

    return retvals
                
            
 
[docs]def conflevel(kde, frac, ftol=1e-6, tol=1e-6, usespline=False, verbose=False,maxiter=None):
    """Determine the lower and upper confidence levels required to
    enclose a given fraction 'frac' of a KDE object's dataset(s) to
    within a tolerance ftol."""
    # 2010-07-23 14:00 IJC: Created
    from scipy import interpolate

    if kde.d>1:
        ret = []
        for ii in range(kde.d):
            if verbose: print ii,'/',kde.d
            ret.append(conflevel(gaussian_kde(kde.dataset[ii]),frac,ftol=ftol,tol=tol,usespline=usespline,verbose=verbose,maxiter=maxiter))
        return ret
    else:
        median_value = np.median(kde.dataset)
        step = np.std(kde.dataset)
        low_limit = median_value - 2*step
        enclosed_fraction = np.Inf
        thismovewasup=True
        if usespline:
            spx = np.linspace(-5*step,5*step,1e3)+median_value
            sp = interpolate.UnivariateSpline(spx,kde(spx),k=3.0,s=0.0)
            sp.d = kde.d
            sp.dataset = kde.dataset
            kde0 = kde
            kde = sp

        while abs(enclosed_fraction-frac)>ftol:
            kdelow = kde(low_limit)
            guess = 2*median_value-low_limit
            if verbose:
                print "kdelow,guess",kdelow,guess
            high_limit = findkdeval(kde, kdelow, tol=tol, guess=guess,verbose=verbose,maxiter=maxiter)
            if usespline:
                enclosed_fraction = kde0.integrate_box_1d(low_limit, high_limit)
            else:
                enclosed_fraction = kde.integrate_box_1d(low_limit, high_limit)

            lastmovewasup = thismovewasup
            if enclosed_fraction > frac: # need to close in
                low_limit += step
                thismovewasup = True
            else: # need to open up limits
                low_limit += -step
                thismovewasup = False

            movingInSameDirection = lastmovewasup==thismovewasup
            if movingInSameDirection: # keep going
                pass
            else: # We passed it; turn around and home in
                step *= 0.5

    return low_limit, high_limit


[docs]def confmap(map, frac, **kw):
    """Return the confidence level of a 2D histogram or array that
    encloses the specified fraction of the total sum.

    :INPUTS:
      map : 1D or 2D numpy array
        Probability map (from hist2d or kde)

      frac : float, 0 <= frac <= 1
        desired fraction of enclosed energy of map

    :OPTIONS:
      ordinate : None or 1D array
        If 1D map, interpolates onto the desired value.  This could
        cause problems when you aren't just setting upper/lower
        limits....
    """
    # 2010-07-26 12:54 IJC: Created
    # 2011-11-05 14:29 IJMC: Fixed so it actually does what it's supposed to!
    from scipy.optimize import bisect

    def diffsum(level, map, ndesired):
        return ((1.0*map[map >= level].sum()/map.sum() - ndesired))

    if hasattr(frac,'__iter__'):
        return [confmap(map,thisfrac, **kw) for thisfrac in frac]

    #nx, ny = map.shape
    #ntot = map.size
    #n = int(ntot*frac)

    #guess = map.max()
    #dx = 10.*float((guess-map.min())/ntot)
    #thisn = map[map<=guess].sum()

    ret = bisect(diffsum, map.min(), map.max(), args=(map, frac))
    if kw.has_key('ordinate') and kw['ordinate'] is not None:
        sortind = np.argsort(map)
        ret = np.interp(ret, map[sortind], kw['ordinate'][sortind])

    return ret


[docs]def kdehist2(x, y, npts, xrange=None, yrange=None):
    """Generate a 2D histogram map from data, using Gaussian KDEs

    :INPUTS:
      x : seq
        X data

      y : seq
        Y data

      npts : int or 2-seq
        number of points across final histogram, or [nx, ny]

    :OPTIONAL_INPUTS:
      xrange : 2-seq
        [x_min, x_max] values for final histogram

      yrange : 2-seq
        [y_min, y_max] values for final histogram
        
    :RETURNS:
      [kdehist, xbins, ybins]

    :EXAMPLE:
      ::
 
        import kdestats as kde
        import numpy as np
        import pylab as py

        covmat = [[1., 1.5], [1.5, 4.]]
        xy = np.random.multivariate_normal([0, 0], covmat, [1e4])
        kdehist = kde.kdehist2(xy[:,0], xy[:,1], [30, 30])
        clevels = kde.confmap(kdehist[0], [.6827,.9545,.9973])

        py.figure()  # Plot 1-, 2-, and 3-sigma contours
        c = py.contour(kdehist[1], kdehist[2], kdehist[0], clevels)
    """
    # 2012-02-11 19:45 IJMC: Created

    if hasattr(npts, '__iter__'):
        if len(npts)==1:
            npts = [npts[0], npts[1]]
    else:
        npts = [npts, npts]

    # Generate KDE:
    thiskde = gaussian_kde([x, y])

    # Generate coordinates for KDE evaluation:
    if xrange is None:
        thisx0 = np.median(thiskde.dataset[0])
        thisdx0 = np.std(thiskde.dataset[0])
        thisx = np.linspace(-5*thisdx0,5*thisdx0,npts[0])+thisx0
    else:
        thisx = np.linspace(xrange[0], xrange[1], npts[0])

    if yrange is None:
        thisy0 = np.median(thiskde.dataset[1])
        thisdy0 = np.std(thiskde.dataset[1])
        thisy = np.linspace(-5*thisdy0,5*thisdy0,npts[1])+thisy0
    else:
        thisy = np.linspace(yrange[0], yrange[1], npts[1])

    thisxx,thisyy = np.meshgrid(thisx,thisy)
    thishist = thiskde([thisxx.ravel(),thisyy.ravel()]).reshape(npts[0],npts[0])

    return thishist, thisx, thisy
Navigation

Source code for kdestats

Quick search

Navigation