Source code for pcsa

import pdb
import numpy as np

[docs]def prepdata(date, trim=None, telcor=True, getcal=False, divop='median', interp=True):
    """Collect data and prepare for LSD analysis.
    
    INPUTS:
       date -- string, observation date: '2008mar21' -- 55cnce
                                         '2008jun15' -- tauboob
                                         '2008jul12' -- 189733, L-band

    OPTIONAL INPUTS:
       trim -- list or 1D array, indices of loaded data to keep, e.g,
               [0,1,2,3,...,4000]
       telcor=True  -- load telluric-corrected data
       getcal=False -- load calibration spectra instead of sci. spectra
       divop='median' -- by default, divide each spectrum by its
                median to normalize it.  Can also set to 'none'
       interp=True -- whether to load wavelength-interpolated
                      (upsampled) spectra (if True) or the raw
                      1024-pixel spectra (if False)

    OUTPUT:  
    (assume L observations of M echelle orders with N wavelength bins)
        w -- N-length wavelength grid for data
        proc -- processed data cubes, L x M x N
        eproc -- computed _instrumental_ uncertainties, L x M x N
        sky -- sky background intensity, L x M x N
        hjd -- L-length time array of heliocentric Julian Date
        airmass -- L-length airmass array
        p -- "planet" object from analysis.getobj()
        rv -- planet's maximal Radial Velocity at specified HJD
        keyvals -- various specified header keywords at specified HJD
        spec_location -- location of star along slit at given HJD, L x M
    
    """
# 2008-07-28 15:28 IJC: Created
# 2009-06-03 15:21 IJC: Function-ified
# 2009-07-29 02:04 IJC: Added HD209458's trimsize
# 2009-08-02 01:01 IJC: Added telcor option
# 2009-08-05 12:08 IJC: Added multiple header keyword extraction
# 2009-10-22 11:53 IJC: Added sky, airmass, spec_location outputs
# 2009-10-23 10:40 IJC: Fixed a small bug with telcor outputs
# 2009-10-29 15:13 IJC: Concatenate inputs from multi-section nights
# 2010-08-24 15:41 IJC: Added "interp" option to load non-interpolated data

# A prefix "e" on a variable means it is the 1-sigma error quantity
    import analysis as an

    try:
        from astropy.io import fits as pyfits
    except:
        import pyfits
    import nsdata, os
    import pylab as py
    
    def concatresults(all):
        # Doesn't get anything from 'keys' field yet.
        from numpy import concatenate, zeros
        w = all[0][0]
        planet = all[0][6]
        ii=0
        nord = all[0][1].shape[1]
        nlam = all[0][1].shape[2]
        dat = zeros((0,nord,nlam),float)
        edat = zeros((0,nord,nlam),float)
        sky = zeros((0,nord,nlam),float)
        loc=zeros((0,nord),float)
        hjd = zeros(0,float)
        airmass = zeros(0,float)
        prv = zeros(0,float)
        while ii<len(all) and len(all[ii])>0:
            dat = concatenate((dat, all[ii][1]),0)
            edat = concatenate((edat, all[ii][2]),0)
            sky = concatenate((edat, all[ii][3]),0)
            hjd = concatenate((hjd, all[ii][4]),0)
            airmass = concatenate((airmass, all[ii][5]),0)
            prv = concatenate((prv, all[ii][7]),0)
            loc = concatenate((loc, all[ii][9]),0)
            ii +=1
        return w, dat, edat, sky, hjd, airmass, planet, prv, dict(), loc

#------------------Initialize things------------------
### Define some startup variables that could go in a GUI someday
    if trim==None and interp==True:
        if date[0:9] == '2008jul12':
            trimsize = py.arange(7770)
        elif date[0:9] == '2008jun15':
            trimsize = py.arange(4000)
        elif date == '2008mar21':
            trimsize = py.arange(4000)
        elif date[0:9] == '2009jul29':
            trimsize = py.arange(4000)
        elif date[0:9] == '2010aug16':
            trimsize = py.arange(4300)
        elif date[0:9] == '2010sep04':
            trimsize = py.arange(3460)
    elif interp==False:
        trimsize = py.arange(1024)
    else:
        trimsize = py.array(trim).copy()


    badval = 0.0
    inits = nsdata.initobs(date)
    if len(inits)<10:
        print "Loaded only dates, not full init parameters..."
        ret = [prepdata(init, telcor=telcor, trim=trim, getcal=getcal,divop=divop) \
                   for init in inits]
        return concatresults(ret)
    planet = inits[0]
    _proc = inits[1]
    if getcal:
        datalist = [el.replace('fn', 'sint.fits').replace(inits[1],'') \
                        for el in inits[11][1]]
    else:
        if telcor:
            datalist = inits[2]
        else:
            datalist = [os.path.basename(el).replace('fn', 'sint')+'.fits' \
                            for el in inits[12][1]]
        if not interp:
            datalist = [el.replace('sint', 's') for el in datalist]

    if len(datalist)==0:
        print "Datalist is zero-length!"
        return []
    if interp:
        wavefilename = inits[3]
    else:
        wavefilename = inits[3].replace('winterp','wmc')
    if getcal:
        aplist = inits[21]
    else:
        aplist = inits[6]
    telluric_fn = inits[7]
    pre_telllist = [el + 'int.fits' for el in inits[13][1]]
    
    os.chdir(_proc)

    raw  = an.loaddata(datalist, path=_proc, band=1)
    eraw = an.loaddata(datalist, path=_proc, band=2)
    #no_tel = an.loaddata(pre_telllist, path=_proc, band=1)
    if telcor:
        telluric = pyfits.getdata(telluric_fn)
        telluric = telluric[:,:,trimsize]
    else:
        sky = an.loaddata(datalist, path=_proc, band=3)
        sky = sky[:,:,trimsize]
    
    w    = pyfits.getdata(_proc + wavefilename)
    lamlab = 'Wavelength [' + ur'\u00c5' + ']'
    
    raw = raw[:,:,trimsize]
    eraw = eraw[:,:,trimsize]
    #no_tel = no_tel[:,:,trimsize]
    w = w[:,trimsize]
    
    
    rsh = raw.shape
    n_obs = rsh[0]
    n_ap  = rsh[1]
    n_lam = rsh[2]
    
    #keys = ['airmass', 'slitpa', 'scampa', 'cryotemp', 'az', 'el', 'parang', 'parantel',  'tubetemp']
    keyvals = dict()
    #for key in keys:
    #    val = nsdata.getval(datalist, key)
    #    exec('keyvals.update(dict('+key+'=val))')

    apertures = nsdata.getirafap(aplist)
    spec_location = apertures
    apertures = py.array(nsdata.collapse_objlist(apertures, 'center'))
    spec_location = apertures[:,:,1]

# Get the time and phase information from the files
    try:
        hjd = py.array(nsdata.getval(datalist, 'hjd'))
    except:
        print "Was unable to load HJD data from file: ", datalist
        hjd = []
    try:
        airmass = py.array(nsdata.getval(datalist, 'airmass'))
    except:
        print "Was unable to load airmass data from file: ", datalist
        hjd = []

    p =  an.getobj(planet)
    prv = an.rv(p, hjd)

# Normalize intensity over time axis (slit losses, clouds, etc):
    proc1  = py.zeros(rsh, float)
    eproc1 = py.zeros(rsh, float)
    for ii in range(n_ap):
        temp = nsdata.divdata(raw[:,ii,:], op=divop, axis=1, badval=badval, returndata=True)
        proc1[ :,ii,:] = temp[0] + 0.0
        eproc1[:,ii,:] = nsdata.repval(  eraw[:,ii,:]/temp[1] , py.nan, 1e6)
    if telcor: 
        ret = (w, proc1, eproc1, hjd, airmass, p, prv, keyvals, spec_location)
    else:
        ret = (w, proc1, eproc1, sky, hjd, airmass, p, prv, keyvals, spec_location)

    return ret


[docs]def detdata(block, t, mode='poly', ord=1, meansub=False, eblock=None, n=200, \
                verbose=False, polyind=None):
    """Remove temporal trends from blocks of data.

    INPUTS: (L observations of M echelle orders with N wavelength bins)
       block:  array of data from pcsa.prepdata, either (N x L) or (L x M x N)
       t: 1D array of times (e.g., HJD), of length (L)
    OPTIONAL INPUTS:
       mode='poly' -- method to use to detrend.  Either 'poly'
                      or 'pca'( for principal component analysis.)
       ord=1 -- if mode=='poly':  polynomial order to subtract
             -- if mode=='pca':  number of Principal Components to subtract
       polyind -- 1D numpy array of length L.  Use only 'True' indices
                  in computing polynomial fits, then remove the fit
                  from the entire data (e.g., to avoid fitting out a transit).
       
       meansub=False -- whether to zero-out the mean, or keep the means fixed

       eblock=None -- 1-sigma errors on the values in 'block' for PCA
       n=200 -- number of wavelength bins to use for each PCA calculation

       verbose=False -- bool flag to spit out lots of verbose messages

    OUTPUT:
       newblock -- detrended dataset

    EXAMPLE:
      TBW
      """
    # 2009-06-03 IJC: Created
    # 2009-06-04 IJC: Adding PCA
    # 2010-10-19 14:13 IJC: Added polyind option

    from numpy import array, zeros, polyfit, polyval, ones, ceil, linalg,delete,dot
    from pylab import find
    from analysis import wmean

    t = array(t).copy()

    block = array(block).copy()
    if eblock is None:
        eblock = ones(block.shape, float)
    else:
        eblock = array(eblock).copy()

    if polyind is None:
        polyind = ones(t.size, bool)
    else:
        polyind = array(polyind).copy()

    if mode=='poly':
        if len(block.shape)>=3:
            print "Currently only working for 2D, NxL data blocks."
            return -1
        if meansub:
            m = 1
        else:
            m = 0
        (nlam, nobs) = block.shape
        t = (t-t.min())/(t.max()-t.min())
        newblock = zeros(block.shape, float)
        for ii in range(nlam):
            vec = block[ii,:]
            p = polyfit(t[polyind], vec[polyind], ord)
            newblock[ii,:] = vec - polyval(p,t) + m*vec.mean()

    elif mode=='pca':
        pcamatrix, eival, eivec = pca(block,eblock,ord=ord, verbose=verbose, n=n)

        newblock = block - pcamatrix
        if verbose:
            from pylab import figure, subplot, colorbar, clim
            import nsdata as ns
            figure(); cl = [min([block.min(), pcamatrix.min()])/5, max([block.max(), pcamatrix.max()])/5]
            subplot(224); ns.imshow(newblock); clim(cl); colorbar()
            subplot(222); ns.imshow(block); clim(cl); colorbar()
            subplot(223); ns.imshow(pcamatrix); clim(cl); colorbar()


    return newblock
        

[docs]def pca(block, eblock, ord=1, verbose=False, trans=False):
    """Calculate principal component analysis for an N x M input array
    of N observations of M variables.

    (Helped function for detdata, but could eventually be stand-alone)
    
    ord -- number of principal components to remove, sorted by eigenvalue size
    trans -- take the transpose of block and eblock (i.e., they are 
              M x N instead of the desired N x M)

    WARNING: do not trust any PC's with coefficient magnitudes near-zero!
    """
    # IJC 2009/06/04: Created
    # 2010-10-13 15:01 IJC: Adapted to use 'MDP' module -- much faster!
    from numpy import array, zeros, polyfit, polyval, ones, ceil, linalg,delete,dot,transpose
    from pylab import find
    from analysis import wmean
    import mdp

    if trans:
        block = transpose(block)
        eblock = transpose(eblock)

    (nlam, nobs) = block.shape

    ##########
    # Now use MDP module for eigen-computation -- much faster!
    block_mdp = block.transpose()

    cov_eigv, node = pca_eigv(block_mdp, svd=True, retnode=True)
    proj_matrix = node.v[:,::-1]

    eivalcp = cov_eigv.copy() * (nobs - 1)  # Scale for backwards-compatibility
    eiveccp = proj_matrix.copy()
    eival = cov_eigv.copy() * (nobs - 1) # Scale for backwards-compatibility
    eivec = proj_matrix.copy()
    ##########

    pcamatrix = zeros(block.shape, float)

    # compute all desired PC projections:
    for hh in range(ord): 
        eigind = find(eival==eival.max())
        vec = eivec[:,eigind].copy().ravel()
        eival = delete(eival,eigind)
        eivec = delete(eivec,eigind,1)

        pcamatrix += pca_project(vec, block)

    return (pcamatrix, eivalcp, eiveccp)

[docs]def pca_project(pc, block):
    """Project a given principal component eigenvector onto a block of
    data

    pc -- length-N 1D numpy array
    block -- N x M input array of N observations of M variables.
    """
    from numpy import dot

    # 2010-10-15 10:11 IJC: Created
    nlam, nobs = block.shape 

    pca = dot(pc, block).reshape(1,nobs)
    a = dot(block, pca.transpose()) / dot(pca,pca.transpose())

    return dot(a,pca)



[docs]def salign2(s1, s2=None, retshift=False, offset=None):
    """Shift and align spectra.

    INPUT:
      s1 -- array.  If 1D, a second spectrum (to be aligned) must also
                    be input.  If 2D (of shape [nobs x npix]), the
                    first spectrum is used to align each of the
                    succeeding (nobs-1) spectra.

    OPTIONAL INPUT:
      retshift -- return pixel shifts as second tuple element (e.g.,
                  for use in aligning related vector sets.)
      offset -- if None, calculate the aligning offsets.  Otherwise,
                use these offsets (via SOFFSET) to align the vectors.

    OUTPUT:
      salign -- array.  A 1D or 2D array, corresponding to the shape of s1.

      
    NOTES:
      - If 1D, s1 and s2 must be the same length.
      - Uses cross-correlation and the nearest pixel.  The
        disadvantage can be poor alignment; the advantage is speed.
      """
    # 2009-08-05 13:43 IJC: Created

    from pylab import arange, find, zeros, array

    s1 = array(s1, copy=True)
    if offset==None:
        offset = soffset(s1, s2=s2, oversamp=4)
                              
    if s2==None:
        nobs, npix = s1.shape
        s3 = zeros(s1.shape, float)
        s3[0,:] = s1[0,:].copy()
        for ii in range(1,nobs):
            s3[ii,:] = salign(s1[0,:], s2=s1[ii,:], offset=offset[ii-1])
    else:
        s2 = array(s2, copy=True)

        if s1.shape<>s2.shape:
            print "S1 and S2 must have the same shape!"
            return []
        n = len(s1)
        s3 = zeros(s2.shape, float)
        if offset<0:
            ind0 = -offset
            ind1 = n
            s3[0:n+offset] = s2[ind0:ind1].copy()
        elif offset>0:
            ind0 = 0
            ind1 = n-offset
            s3[offset:n] = s2[ind0:ind1].copy()
        else:
            s3 = s2.copy()

    if retshift:
        ret = (s3, offset)
    else:
        ret = s3
    return ret
    
[docs]def salign(s1, s2=None, retshift=False, offset=None):
    """Shift and align spectra.

    INPUT:
      s1 -- array.  If 1D, a second spectrum (to be aligned) must also
                    be input.  If 2D (of shape [nobs x npix]), the
                    first spectrum is used to align each of the
                    succeeding (nobs-1) spectra.

    OPTIONAL INPUT:
      retshift -- return pixel shifts as second tuple element (e.g.,
                  for use in aligning related vector sets.)
      offset -- if None, calculate the aligning offsets.  Otherwise,
                use these offsets (via SOFFSET) to align the vectors.

    OUTPUT:
      salign -- array.  A 1D or 2D array, corresponding to the shape of s1.

      
    NOTES:
      - If 1D, s1 and s2 must be the same length.
      - Uses cross-correlation and the nearest pixel.  The
        disadvantage can be poor alignment; the advantage is speed.
      """
    # 2009-08-05 13:43 IJC: Created

    from pylab import arange, find, zeros, array

    s1 = array(s1, copy=True)
    if offset==None:
        offset = soffset(s1, s2=s2, oversamp=4)
    if s2==None:
        nobs, npix = s1.shape
        s3 = zeros(s1.shape, float)
        s3[0,:] = s1[0,:].copy()
        for ii in range(1,nobs):
            s3[ii,:] = salign(s1[0,:], s2=s1[ii,:], offset=offset[ii-1])
    else:
        s2 = array(s2, copy=True)

        if s1.shape<>s2.shape:
            print "S1 and S2 must have the same shape!"
            return []
        n = len(s1)
        s3 = zeros(s2.shape, float)
        if offset<0:
            ind0 = -offset
            ind1 = n
            s3[0:n+offset] = s2[ind0:ind1].copy()
        elif offset>0:
            ind0 = 0
            ind1 = n-offset
            s3[offset:n] = s2[ind0:ind1].copy()
        else:
            s3 = s2.copy()

    if retshift:
        ret = (s3, offset)
    else:
        ret = s3
    return ret
    
[docs]def shiftvec(vec, offset):
    """Shift a 1D vector by an offset.  Helper function for salign.  

    non-overlapping elements are replaced by zeros.
    """
    # 2009-08-05 16:31 IJC: Created

    vec = array(vec, copy=True)
    if len(vec.shape)>1:
        print "vec must be 1D!  Exiting."
        return []

    n = len(vec)
    vec2 = zeros(n)

    if offset<0:
        ind0 = -offset
        ind1 = n
        vec2[0:n+offset] = s2[ind0:ind1].copy()
    elif offset>0:
        ind0 = 0
        ind1 = n-offset
        vec2[offset:n] = s2[ind0:ind1].copy()
    else:
        vec2 = vec.copy()

    return vec2

[docs]def soffset(s1, s2=None, plotalot=False, oversamp=16, searchrange=[-2,+2], retnew=False, maxshift=10, method='interp'):
    """Get pixelwise offsets between two similar 1D vectors.

    INPUT:
      s1 -- array.  If 1D, a second vector (to be aligned) must also
                    be input.  If 2D (of shape [nvec x npix]), the
                    first vector is used to align each of the
                    succeeding (nvec-1) spectra.

      oversamp -- scalar. Factor by which to oversample the inputs, to
                          permit computation of more precise offsets.

      searchrange -- list. Minimum and maximum number of pixels on
                           each side of the maximum cross-correlation
                           value to use for parabolic fitting.

      maxshift -- int.  Min/max of pixel correlation lags to compute.

      method -- str: 'interp' to use spline interpolation, or 'lsq'
                      for least squares shift-and-fit

    OUTPUT:
      offset -- array.  A 1D array with elements corresponding to the
                vector offsets necessary to align s1 and s2 via
                salign.
                
      
    NOTES:
      - If 1D, s1 and s2 must be the same length.
      - Uses cross-correlation and the nearest pixel.  The
        disadvantage can be poorer alignment; the advantage is speed.
      - Helper function for salign.  
      """
    # 2009-08-05 13:43 IJC: Created
    # 2010-08-19 19:15 IJC: Adding oversamp option
    # 2010-12-09 13:27 IJC: Added "old-behavior" flag to correlate fcn. calls
    # 2010-12-27 16:50 IJC: Eliminated numpy-correlate entirely;
    #                       replaced with my DCF function.
    # 2012-09-23 19:51 IJMC: Fixed a major bug in the 'lsq' mode --
    #                        previously shifted vectors were being
    #                        compared against themselves, so the shift
    #                        returned was well-nigh meaningless.
    # 2013-02-25 22:16 IJMC: Added 'lsqlin' option
    

    from scipy.interpolate import UnivariateSpline
    from tools import dcf
    from pylab import arange, find, zeros, array, polyfit, polyval, concatenate, ones, tile, correlate
    import analysis as an

    s1 = array(s1, copy=True)
    if s2==None:
        offset = []
        newSpec = []
        nobs, npix = s1.shape
        for ii in range(1,nobs):
            off = soffset(s1[0,:], s2=s1[ii,:], plotalot=plotalot, \
                              retnew=retnew, oversamp=oversamp, \
                              searchrange=searchrange, maxshift=maxshift, \
                              method=method)
            if retnew:
                offset.append(off[0])
                newSpec.append(off[1])
            else:
                offset.append(off)

        offset = array([offset]).ravel()
        ret = offset
        if retnew:
            newSpec = array(newSpec)
            ret = [ret, newSpec]
            
    else:
        s2 = array(s2, copy=True)
        ndim1 = len(s1.shape)
        if ndim1==0 or s1.size==1:
            s1scalar = True
            n = len(s2)
        else:
            s1scalar = False
            if s1.shape<>s2.shape:
                print "S1 and S2 must have the same shape!"
                return []
            n = len(s1)

        if oversamp<1:
            print "Oversamp must be >= 1.  Exiting."
            return -1
        # Do something
        x0 = arange(n,dtype=float)
        x1 = arange(n*oversamp,dtype=float)/oversamp
        xspan = x0.max() - x0.min()
        # Pad the vectors so out-of-domain spline values aren't crazy 
        x0_extra = concatenate((arange(-maxshift,0) + x0.min(), x0, \
                                    arange(1,maxshift+1) + x0.max()))
        if not s1scalar:
            bins = arange(-maxshift, maxshift, 1./oversamp) + 0.5
            s1_extra = concatenate((tile(s1[x0==x0.min()], maxshift), s1, \
                                        tile(s1[x0==x0.max()], maxshift)))
            spline1 = UnivariateSpline(x0_extra, s1_extra, k=3.0, s=0.0)
            ss1 = spline1(x1)

        s2_extra = concatenate((tile(s2[x0==x0.min()], maxshift), s2, \
                                    tile(s2[x0==x0.max()], maxshift)))
        spline2 = UnivariateSpline(x0_extra, s2_extra, k=3.0, s=0.0)
        ss2 = spline2(x1)

        if s1scalar: # we were given the offset to apply!
            final_offset = s1
        else:  # s1 was a vector -- compute the offset
            if method=='interp':
                #lags, xc, exc = dcf(arange(n*oversamp, dtype=float)/oversamp, ss1, ss2, bins=bins)
                xc = correlate(ss1-ss1.mean(), ss2-ss2.mean(), mode=2) / oversamp
                maxlags = maxshift*oversamp
                lags = arange(-maxlags,maxlags+1, dtype=float) / oversamp
                xc = xc[len(ss1)-1-maxlags:len(ss1)+maxlags]
                offset0 = lags[find(xc==xc.max())][0]
                #pdb.set_trace()

            elif method=='lsq':
                lags = bins
                xc = zeros(len(lags))
                for ii, shift in enumerate(lags):
                    shifted_vec = spline2(x1 + shift)
                    lsqvecs = (ones(n*oversamp), shifted_vec)
                    #fit, efit = an.lsq(lsqvecs, ss1)
                    fit = np.dot(np.linalg.pinv(np.array(lsqvecs).transpose()), ss1)
                    offset, scaling = fit
                    xc[ii] = ((ss1 - (offset + shifted_vec * scaling))**2).sum()
                offset0 = lags[find(xc==xc.min())][0]

            elif method=='lsqlin':
                lags = bins
                xc = zeros(len(lags))
                onevec = ones(n*oversamp)
                linvec = np.arange(n*oversamp, dtype=float)/(n*oversamp)
                for ii, shift in enumerate(lags):
                    shifted_vec = spline2(x1 + shift)
                    lsqvecs = (onevec, linvec, shifted_vec)
                    #fit, efit = an.lsq(lsqvecs, ss1)
                    fit = np.dot(np.linalg.pinv(np.array(lsqvecs).transpose()), ss1)
                    offset, lincoef, scaling = fit
                    xc[ii] = ((ss1 - (offset + lincoef*linvec + shifted_vec * scaling))**2).sum()
                offset0 = lags[find(xc==xc.min())][0]

            fitInd = ((lags) >= (1.0*searchrange[0]/oversamp)) * \
                ((lags ) <= (1.0*searchrange[1]/oversamp))
            fitInd = ((lags-offset0) >= (1.0*searchrange[0]/oversamp)) * \
                ((lags - offset0) <= (1.0*searchrange[1]/oversamp))

            x4Fit = lags[fitInd]
            y4Fit = xc[fitInd]
            quadFit = polyfit(x4Fit, y4Fit, 2)
            final_offset = -0.5*quadFit[1]/quadFit[0]

            if plotalot:
                from pylab import plot, figure, subplot
                fig = figure()
                plot(lags, xc)
                plot(x4Fit, y4Fit, '.')
                plot(x4Fit, polyval(quadFit, x4Fit), '--')


        if retnew:
            if method=='interp':
                ret = [array([final_offset]).ravel(), spline2(x0+final_offset)]
            elif method=='lsq' or method=='lsqlin':
                ret = [array([final_offset]).ravel(), spline2(x0+final_offset)]
        else:
            if method=='interp':
                ret = array([final_offset]).ravel()
            elif method=='lsq' or method=='lsqlin':
                ret = array([final_offset]).ravel()

    return ret
    

[docs]def fscs(data):
    """Perform Swain et al.'s "Fourier Self-Complementary Spectrum" analysis.

    Input: Data, numpy array -- M x N: M observations of N variables"""
    # 2010-09-29 09:19 IJC: Created
    import numpy as np

    nobs, nlam = data.shape
    datasub = data 
    
    # Compute FFTs
    fdata = np.zeros((nobs, nlam), complex)
    magdata = np.zeros((nobs, nlam), float)
    angdata = np.zeros((nobs, nlam), float)
    for ii in range(nlam):
        fdata[:,ii] = np.fft.fft(datasub[:,ii])**(1./nlam)
        #magdata[:,ii] = np.abs(fdata[:,ii])**(1./nlam)
        #angdata[:,ii] = np.angle(fdata[:,ii])

    # Compute spectral self-similarity (?)
    fsss0 = fdata.prod(1)
    #fsss1 = magdata.prod(1) * np.exp(1j*angdata.sum(1)/nlam)

    sss0 = np.fft.ifft(fsss0) 
    #sss1 = np.fft.ifft(fsss1) 

    return sss0

[docs]def pca_eigv(block, ord=None, svd=True, retnode=False):
    """Compute the eigenvalues of covariance matrix (i.e., the PC
    coefficients) for input data block.

    ord -- number of eigenvalues to return.  If None, returns all.
    svd -- bool flag; whether to use SVD to compute PCA
    retnode -- also return the mdp.PCANode object"""
    # 2010-10-12 20:58 IJC: Created
    import mdp
    from numpy import random

    try:
        node = mdp.nodes.PCANode(svd=svd, output_dim=ord)
        node.train(block)
        node.stop_training()
    except:  # occasionally this crashes even when svd=True; a bit of
             # white noise prevents this without changing the results.
        node = mdp.nodes.PCANode(svd=svd, output_dim=ord)
        white_noise = random.randn(block.shape[0], block.shape[1]) * \
            block.ravel().std()*1e-15
        node.train(block + white_noise )
        node.stop_training()
        

    if retnode:
        ret = (node.d[::-1], node)
    else:
        ret = node.d[::-1]

    return ret



[docs]def pca2(block, eblock=None, ord=1, svd=True):
    """Compute Principal Component Analysis on a 2D array.

    NOTE: I don't trust the MDP module (or, I don't understand what
    it's doing), since it does not agree with my hand-coded (somewhat
    slower) PCA routine.

    block -- N x M numpy array -- N observations of M variables.

    eblock -- dummy option kept for backwards compatibility; does nothing.
    ord -- number of Principal Components to extract
    svd -- bool flag; whether to use SVD to compute PCA
    """
    # 2010-10-12 21:03 IJC: Created
    print "NOTE: I don't trust the MDP module (or, I don't understand what\n"
    print "it's doing), since it does not agree with my hand-coded (somewhat\n"
    print "slower) PCA routine.\n"

    import mdp

    # For historical reasons (pre-MDP), transpose the data:
    block = block.transpose()

    cov_eigv, node = pca_eigv(block, ord=ord, svd=svd, retnode=True)
    proj_matrix = node.v[:,::-1]

    # REconstruct the input data using n PCs:
    node2 = mdp.nodes.PCANode(svd=svd, output_dim=ord)
    node2.train(block)
    node2.stop_training()

    nodepca = node.execute(block, n=ord)
    nodepca2 = node2.execute(block, n=ord)
    reconstruction = node.inverse(nodepca, n=ord).transpose()
    reconstruction2 = node2.inverse(nodepca2, n=ord).transpose()

    return (reconstruction, cov_eigv, proj_matrix, reconstruction2, node, node2)

    
    


[docs]def princomp(A,numpc=0):
    """computing eigenvalues and eigenvectors of covariance matrix

    Taken from http://glowingpython.blogspot.it/2011/07/pca-and-image-compression-with-numpy.html
    """
    # 2014-08-18 14:29 IJMC: Inserted into my code.

    M = (A-np.mean(A.T,axis=1)).T # subtract the mean (along columns)
    [latent,coeff] = np.linalg.eig(np.cov(M))
    p = np.size(coeff,axis=1)
    idx = np.argsort(latent) # sorting the eigenvalues
    idx = idx[::-1]       # in ascending order
    # sorting eigenvectors according to the sorted eigenvalues
    coeff = coeff[:,idx]
    latent = latent[idx] # sorting eigenvalues
    if numpc < p and numpc >= 0:
        coeff = coeff[:,range(numpc)] # cutting some PCs if needed
    score = np.dot(coeff.T,M) # projection of the data in the new space
    return coeff,score,latent
Navigation

Source code for pcsa

Quick search

Navigation