import pdb
import numpy as np
[docs]def prepdata(date, trim=None, telcor=True, getcal=False, divop='median', interp=True):
"""Collect data and prepare for LSD analysis.
INPUTS:
date -- string, observation date: '2008mar21' -- 55cnce
'2008jun15' -- tauboob
'2008jul12' -- 189733, L-band
OPTIONAL INPUTS:
trim -- list or 1D array, indices of loaded data to keep, e.g,
[0,1,2,3,...,4000]
telcor=True -- load telluric-corrected data
getcal=False -- load calibration spectra instead of sci. spectra
divop='median' -- by default, divide each spectrum by its
median to normalize it. Can also set to 'none'
interp=True -- whether to load wavelength-interpolated
(upsampled) spectra (if True) or the raw
1024-pixel spectra (if False)
OUTPUT:
(assume L observations of M echelle orders with N wavelength bins)
w -- N-length wavelength grid for data
proc -- processed data cubes, L x M x N
eproc -- computed _instrumental_ uncertainties, L x M x N
sky -- sky background intensity, L x M x N
hjd -- L-length time array of heliocentric Julian Date
airmass -- L-length airmass array
p -- "planet" object from analysis.getobj()
rv -- planet's maximal Radial Velocity at specified HJD
keyvals -- various specified header keywords at specified HJD
spec_location -- location of star along slit at given HJD, L x M
"""
# 2008-07-28 15:28 IJC: Created
# 2009-06-03 15:21 IJC: Function-ified
# 2009-07-29 02:04 IJC: Added HD209458's trimsize
# 2009-08-02 01:01 IJC: Added telcor option
# 2009-08-05 12:08 IJC: Added multiple header keyword extraction
# 2009-10-22 11:53 IJC: Added sky, airmass, spec_location outputs
# 2009-10-23 10:40 IJC: Fixed a small bug with telcor outputs
# 2009-10-29 15:13 IJC: Concatenate inputs from multi-section nights
# 2010-08-24 15:41 IJC: Added "interp" option to load non-interpolated data
# A prefix "e" on a variable means it is the 1-sigma error quantity
import analysis as an
try:
from astropy.io import fits as pyfits
except:
import pyfits
import nsdata, os
import pylab as py
def concatresults(all):
# Doesn't get anything from 'keys' field yet.
from numpy import concatenate, zeros
w = all[0][0]
planet = all[0][6]
ii=0
nord = all[0][1].shape[1]
nlam = all[0][1].shape[2]
dat = zeros((0,nord,nlam),float)
edat = zeros((0,nord,nlam),float)
sky = zeros((0,nord,nlam),float)
loc=zeros((0,nord),float)
hjd = zeros(0,float)
airmass = zeros(0,float)
prv = zeros(0,float)
while ii<len(all) and len(all[ii])>0:
dat = concatenate((dat, all[ii][1]),0)
edat = concatenate((edat, all[ii][2]),0)
sky = concatenate((edat, all[ii][3]),0)
hjd = concatenate((hjd, all[ii][4]),0)
airmass = concatenate((airmass, all[ii][5]),0)
prv = concatenate((prv, all[ii][7]),0)
loc = concatenate((loc, all[ii][9]),0)
ii +=1
return w, dat, edat, sky, hjd, airmass, planet, prv, dict(), loc
#------------------Initialize things------------------
### Define some startup variables that could go in a GUI someday
if trim==None and interp==True:
if date[0:9] == '2008jul12':
trimsize = py.arange(7770)
elif date[0:9] == '2008jun15':
trimsize = py.arange(4000)
elif date == '2008mar21':
trimsize = py.arange(4000)
elif date[0:9] == '2009jul29':
trimsize = py.arange(4000)
elif date[0:9] == '2010aug16':
trimsize = py.arange(4300)
elif date[0:9] == '2010sep04':
trimsize = py.arange(3460)
elif interp==False:
trimsize = py.arange(1024)
else:
trimsize = py.array(trim).copy()
badval = 0.0
inits = nsdata.initobs(date)
if len(inits)<10:
print "Loaded only dates, not full init parameters..."
ret = [prepdata(init, telcor=telcor, trim=trim, getcal=getcal,divop=divop) \
for init in inits]
return concatresults(ret)
planet = inits[0]
_proc = inits[1]
if getcal:
datalist = [el.replace('fn', 'sint.fits').replace(inits[1],'') \
for el in inits[11][1]]
else:
if telcor:
datalist = inits[2]
else:
datalist = [os.path.basename(el).replace('fn', 'sint')+'.fits' \
for el in inits[12][1]]
if not interp:
datalist = [el.replace('sint', 's') for el in datalist]
if len(datalist)==0:
print "Datalist is zero-length!"
return []
if interp:
wavefilename = inits[3]
else:
wavefilename = inits[3].replace('winterp','wmc')
if getcal:
aplist = inits[21]
else:
aplist = inits[6]
telluric_fn = inits[7]
pre_telllist = [el + 'int.fits' for el in inits[13][1]]
os.chdir(_proc)
raw = an.loaddata(datalist, path=_proc, band=1)
eraw = an.loaddata(datalist, path=_proc, band=2)
#no_tel = an.loaddata(pre_telllist, path=_proc, band=1)
if telcor:
telluric = pyfits.getdata(telluric_fn)
telluric = telluric[:,:,trimsize]
else:
sky = an.loaddata(datalist, path=_proc, band=3)
sky = sky[:,:,trimsize]
w = pyfits.getdata(_proc + wavefilename)
lamlab = 'Wavelength [' + ur'\u00c5' + ']'
raw = raw[:,:,trimsize]
eraw = eraw[:,:,trimsize]
#no_tel = no_tel[:,:,trimsize]
w = w[:,trimsize]
rsh = raw.shape
n_obs = rsh[0]
n_ap = rsh[1]
n_lam = rsh[2]
#keys = ['airmass', 'slitpa', 'scampa', 'cryotemp', 'az', 'el', 'parang', 'parantel', 'tubetemp']
keyvals = dict()
#for key in keys:
# val = nsdata.getval(datalist, key)
# exec('keyvals.update(dict('+key+'=val))')
apertures = nsdata.getirafap(aplist)
spec_location = apertures
apertures = py.array(nsdata.collapse_objlist(apertures, 'center'))
spec_location = apertures[:,:,1]
# Get the time and phase information from the files
try:
hjd = py.array(nsdata.getval(datalist, 'hjd'))
except:
print "Was unable to load HJD data from file: ", datalist
hjd = []
try:
airmass = py.array(nsdata.getval(datalist, 'airmass'))
except:
print "Was unable to load airmass data from file: ", datalist
hjd = []
p = an.getobj(planet)
prv = an.rv(p, hjd)
# Normalize intensity over time axis (slit losses, clouds, etc):
proc1 = py.zeros(rsh, float)
eproc1 = py.zeros(rsh, float)
for ii in range(n_ap):
temp = nsdata.divdata(raw[:,ii,:], op=divop, axis=1, badval=badval, returndata=True)
proc1[ :,ii,:] = temp[0] + 0.0
eproc1[:,ii,:] = nsdata.repval( eraw[:,ii,:]/temp[1] , py.nan, 1e6)
if telcor:
ret = (w, proc1, eproc1, hjd, airmass, p, prv, keyvals, spec_location)
else:
ret = (w, proc1, eproc1, sky, hjd, airmass, p, prv, keyvals, spec_location)
return ret
[docs]def detdata(block, t, mode='poly', ord=1, meansub=False, eblock=None, n=200, \
verbose=False, polyind=None):
"""Remove temporal trends from blocks of data.
INPUTS: (L observations of M echelle orders with N wavelength bins)
block: array of data from pcsa.prepdata, either (N x L) or (L x M x N)
t: 1D array of times (e.g., HJD), of length (L)
OPTIONAL INPUTS:
mode='poly' -- method to use to detrend. Either 'poly'
or 'pca'( for principal component analysis.)
ord=1 -- if mode=='poly': polynomial order to subtract
-- if mode=='pca': number of Principal Components to subtract
polyind -- 1D numpy array of length L. Use only 'True' indices
in computing polynomial fits, then remove the fit
from the entire data (e.g., to avoid fitting out a transit).
meansub=False -- whether to zero-out the mean, or keep the means fixed
eblock=None -- 1-sigma errors on the values in 'block' for PCA
n=200 -- number of wavelength bins to use for each PCA calculation
verbose=False -- bool flag to spit out lots of verbose messages
OUTPUT:
newblock -- detrended dataset
EXAMPLE:
TBW
"""
# 2009-06-03 IJC: Created
# 2009-06-04 IJC: Adding PCA
# 2010-10-19 14:13 IJC: Added polyind option
from numpy import array, zeros, polyfit, polyval, ones, ceil, linalg,delete,dot
from pylab import find
from analysis import wmean
t = array(t).copy()
block = array(block).copy()
if eblock is None:
eblock = ones(block.shape, float)
else:
eblock = array(eblock).copy()
if polyind is None:
polyind = ones(t.size, bool)
else:
polyind = array(polyind).copy()
if mode=='poly':
if len(block.shape)>=3:
print "Currently only working for 2D, NxL data blocks."
return -1
if meansub:
m = 1
else:
m = 0
(nlam, nobs) = block.shape
t = (t-t.min())/(t.max()-t.min())
newblock = zeros(block.shape, float)
for ii in range(nlam):
vec = block[ii,:]
p = polyfit(t[polyind], vec[polyind], ord)
newblock[ii,:] = vec - polyval(p,t) + m*vec.mean()
elif mode=='pca':
pcamatrix, eival, eivec = pca(block,eblock,ord=ord, verbose=verbose, n=n)
newblock = block - pcamatrix
if verbose:
from pylab import figure, subplot, colorbar, clim
import nsdata as ns
figure(); cl = [min([block.min(), pcamatrix.min()])/5, max([block.max(), pcamatrix.max()])/5]
subplot(224); ns.imshow(newblock); clim(cl); colorbar()
subplot(222); ns.imshow(block); clim(cl); colorbar()
subplot(223); ns.imshow(pcamatrix); clim(cl); colorbar()
return newblock
[docs]def pca(block, eblock, ord=1, verbose=False, trans=False):
"""Calculate principal component analysis for an N x M input array
of N observations of M variables.
(Helped function for detdata, but could eventually be stand-alone)
ord -- number of principal components to remove, sorted by eigenvalue size
trans -- take the transpose of block and eblock (i.e., they are
M x N instead of the desired N x M)
WARNING: do not trust any PC's with coefficient magnitudes near-zero!
"""
# IJC 2009/06/04: Created
# 2010-10-13 15:01 IJC: Adapted to use 'MDP' module -- much faster!
from numpy import array, zeros, polyfit, polyval, ones, ceil, linalg,delete,dot,transpose
from pylab import find
from analysis import wmean
import mdp
if trans:
block = transpose(block)
eblock = transpose(eblock)
(nlam, nobs) = block.shape
##########
# Now use MDP module for eigen-computation -- much faster!
block_mdp = block.transpose()
cov_eigv, node = pca_eigv(block_mdp, svd=True, retnode=True)
proj_matrix = node.v[:,::-1]
eivalcp = cov_eigv.copy() * (nobs - 1) # Scale for backwards-compatibility
eiveccp = proj_matrix.copy()
eival = cov_eigv.copy() * (nobs - 1) # Scale for backwards-compatibility
eivec = proj_matrix.copy()
##########
pcamatrix = zeros(block.shape, float)
# compute all desired PC projections:
for hh in range(ord):
eigind = find(eival==eival.max())
vec = eivec[:,eigind].copy().ravel()
eival = delete(eival,eigind)
eivec = delete(eivec,eigind,1)
pcamatrix += pca_project(vec, block)
return (pcamatrix, eivalcp, eiveccp)
[docs]def pca_project(pc, block):
"""Project a given principal component eigenvector onto a block of
data
pc -- length-N 1D numpy array
block -- N x M input array of N observations of M variables.
"""
from numpy import dot
# 2010-10-15 10:11 IJC: Created
nlam, nobs = block.shape
pca = dot(pc, block).reshape(1,nobs)
a = dot(block, pca.transpose()) / dot(pca,pca.transpose())
return dot(a,pca)
[docs]def salign2(s1, s2=None, retshift=False, offset=None):
"""Shift and align spectra.
INPUT:
s1 -- array. If 1D, a second spectrum (to be aligned) must also
be input. If 2D (of shape [nobs x npix]), the
first spectrum is used to align each of the
succeeding (nobs-1) spectra.
OPTIONAL INPUT:
retshift -- return pixel shifts as second tuple element (e.g.,
for use in aligning related vector sets.)
offset -- if None, calculate the aligning offsets. Otherwise,
use these offsets (via SOFFSET) to align the vectors.
OUTPUT:
salign -- array. A 1D or 2D array, corresponding to the shape of s1.
NOTES:
- If 1D, s1 and s2 must be the same length.
- Uses cross-correlation and the nearest pixel. The
disadvantage can be poor alignment; the advantage is speed.
"""
# 2009-08-05 13:43 IJC: Created
from pylab import arange, find, zeros, array
s1 = array(s1, copy=True)
if offset==None:
offset = soffset(s1, s2=s2, oversamp=4)
if s2==None:
nobs, npix = s1.shape
s3 = zeros(s1.shape, float)
s3[0,:] = s1[0,:].copy()
for ii in range(1,nobs):
s3[ii,:] = salign(s1[0,:], s2=s1[ii,:], offset=offset[ii-1])
else:
s2 = array(s2, copy=True)
if s1.shape<>s2.shape:
print "S1 and S2 must have the same shape!"
return []
n = len(s1)
s3 = zeros(s2.shape, float)
if offset<0:
ind0 = -offset
ind1 = n
s3[0:n+offset] = s2[ind0:ind1].copy()
elif offset>0:
ind0 = 0
ind1 = n-offset
s3[offset:n] = s2[ind0:ind1].copy()
else:
s3 = s2.copy()
if retshift:
ret = (s3, offset)
else:
ret = s3
return ret
[docs]def salign(s1, s2=None, retshift=False, offset=None):
"""Shift and align spectra.
INPUT:
s1 -- array. If 1D, a second spectrum (to be aligned) must also
be input. If 2D (of shape [nobs x npix]), the
first spectrum is used to align each of the
succeeding (nobs-1) spectra.
OPTIONAL INPUT:
retshift -- return pixel shifts as second tuple element (e.g.,
for use in aligning related vector sets.)
offset -- if None, calculate the aligning offsets. Otherwise,
use these offsets (via SOFFSET) to align the vectors.
OUTPUT:
salign -- array. A 1D or 2D array, corresponding to the shape of s1.
NOTES:
- If 1D, s1 and s2 must be the same length.
- Uses cross-correlation and the nearest pixel. The
disadvantage can be poor alignment; the advantage is speed.
"""
# 2009-08-05 13:43 IJC: Created
from pylab import arange, find, zeros, array
s1 = array(s1, copy=True)
if offset==None:
offset = soffset(s1, s2=s2, oversamp=4)
if s2==None:
nobs, npix = s1.shape
s3 = zeros(s1.shape, float)
s3[0,:] = s1[0,:].copy()
for ii in range(1,nobs):
s3[ii,:] = salign(s1[0,:], s2=s1[ii,:], offset=offset[ii-1])
else:
s2 = array(s2, copy=True)
if s1.shape<>s2.shape:
print "S1 and S2 must have the same shape!"
return []
n = len(s1)
s3 = zeros(s2.shape, float)
if offset<0:
ind0 = -offset
ind1 = n
s3[0:n+offset] = s2[ind0:ind1].copy()
elif offset>0:
ind0 = 0
ind1 = n-offset
s3[offset:n] = s2[ind0:ind1].copy()
else:
s3 = s2.copy()
if retshift:
ret = (s3, offset)
else:
ret = s3
return ret
[docs]def shiftvec(vec, offset):
"""Shift a 1D vector by an offset. Helper function for salign.
non-overlapping elements are replaced by zeros.
"""
# 2009-08-05 16:31 IJC: Created
vec = array(vec, copy=True)
if len(vec.shape)>1:
print "vec must be 1D! Exiting."
return []
n = len(vec)
vec2 = zeros(n)
if offset<0:
ind0 = -offset
ind1 = n
vec2[0:n+offset] = s2[ind0:ind1].copy()
elif offset>0:
ind0 = 0
ind1 = n-offset
vec2[offset:n] = s2[ind0:ind1].copy()
else:
vec2 = vec.copy()
return vec2
[docs]def soffset(s1, s2=None, plotalot=False, oversamp=16, searchrange=[-2,+2], retnew=False, maxshift=10, method='interp'):
"""Get pixelwise offsets between two similar 1D vectors.
INPUT:
s1 -- array. If 1D, a second vector (to be aligned) must also
be input. If 2D (of shape [nvec x npix]), the
first vector is used to align each of the
succeeding (nvec-1) spectra.
oversamp -- scalar. Factor by which to oversample the inputs, to
permit computation of more precise offsets.
searchrange -- list. Minimum and maximum number of pixels on
each side of the maximum cross-correlation
value to use for parabolic fitting.
maxshift -- int. Min/max of pixel correlation lags to compute.
method -- str: 'interp' to use spline interpolation, or 'lsq'
for least squares shift-and-fit
OUTPUT:
offset -- array. A 1D array with elements corresponding to the
vector offsets necessary to align s1 and s2 via
salign.
NOTES:
- If 1D, s1 and s2 must be the same length.
- Uses cross-correlation and the nearest pixel. The
disadvantage can be poorer alignment; the advantage is speed.
- Helper function for salign.
"""
# 2009-08-05 13:43 IJC: Created
# 2010-08-19 19:15 IJC: Adding oversamp option
# 2010-12-09 13:27 IJC: Added "old-behavior" flag to correlate fcn. calls
# 2010-12-27 16:50 IJC: Eliminated numpy-correlate entirely;
# replaced with my DCF function.
# 2012-09-23 19:51 IJMC: Fixed a major bug in the 'lsq' mode --
# previously shifted vectors were being
# compared against themselves, so the shift
# returned was well-nigh meaningless.
# 2013-02-25 22:16 IJMC: Added 'lsqlin' option
from scipy.interpolate import UnivariateSpline
from tools import dcf
from pylab import arange, find, zeros, array, polyfit, polyval, concatenate, ones, tile, correlate
import analysis as an
s1 = array(s1, copy=True)
if s2==None:
offset = []
newSpec = []
nobs, npix = s1.shape
for ii in range(1,nobs):
off = soffset(s1[0,:], s2=s1[ii,:], plotalot=plotalot, \
retnew=retnew, oversamp=oversamp, \
searchrange=searchrange, maxshift=maxshift, \
method=method)
if retnew:
offset.append(off[0])
newSpec.append(off[1])
else:
offset.append(off)
offset = array([offset]).ravel()
ret = offset
if retnew:
newSpec = array(newSpec)
ret = [ret, newSpec]
else:
s2 = array(s2, copy=True)
ndim1 = len(s1.shape)
if ndim1==0 or s1.size==1:
s1scalar = True
n = len(s2)
else:
s1scalar = False
if s1.shape<>s2.shape:
print "S1 and S2 must have the same shape!"
return []
n = len(s1)
if oversamp<1:
print "Oversamp must be >= 1. Exiting."
return -1
# Do something
x0 = arange(n,dtype=float)
x1 = arange(n*oversamp,dtype=float)/oversamp
xspan = x0.max() - x0.min()
# Pad the vectors so out-of-domain spline values aren't crazy
x0_extra = concatenate((arange(-maxshift,0) + x0.min(), x0, \
arange(1,maxshift+1) + x0.max()))
if not s1scalar:
bins = arange(-maxshift, maxshift, 1./oversamp) + 0.5
s1_extra = concatenate((tile(s1[x0==x0.min()], maxshift), s1, \
tile(s1[x0==x0.max()], maxshift)))
spline1 = UnivariateSpline(x0_extra, s1_extra, k=3.0, s=0.0)
ss1 = spline1(x1)
s2_extra = concatenate((tile(s2[x0==x0.min()], maxshift), s2, \
tile(s2[x0==x0.max()], maxshift)))
spline2 = UnivariateSpline(x0_extra, s2_extra, k=3.0, s=0.0)
ss2 = spline2(x1)
if s1scalar: # we were given the offset to apply!
final_offset = s1
else: # s1 was a vector -- compute the offset
if method=='interp':
#lags, xc, exc = dcf(arange(n*oversamp, dtype=float)/oversamp, ss1, ss2, bins=bins)
xc = correlate(ss1-ss1.mean(), ss2-ss2.mean(), mode=2) / oversamp
maxlags = maxshift*oversamp
lags = arange(-maxlags,maxlags+1, dtype=float) / oversamp
xc = xc[len(ss1)-1-maxlags:len(ss1)+maxlags]
offset0 = lags[find(xc==xc.max())][0]
#pdb.set_trace()
elif method=='lsq':
lags = bins
xc = zeros(len(lags))
for ii, shift in enumerate(lags):
shifted_vec = spline2(x1 + shift)
lsqvecs = (ones(n*oversamp), shifted_vec)
#fit, efit = an.lsq(lsqvecs, ss1)
fit = np.dot(np.linalg.pinv(np.array(lsqvecs).transpose()), ss1)
offset, scaling = fit
xc[ii] = ((ss1 - (offset + shifted_vec * scaling))**2).sum()
offset0 = lags[find(xc==xc.min())][0]
elif method=='lsqlin':
lags = bins
xc = zeros(len(lags))
onevec = ones(n*oversamp)
linvec = np.arange(n*oversamp, dtype=float)/(n*oversamp)
for ii, shift in enumerate(lags):
shifted_vec = spline2(x1 + shift)
lsqvecs = (onevec, linvec, shifted_vec)
#fit, efit = an.lsq(lsqvecs, ss1)
fit = np.dot(np.linalg.pinv(np.array(lsqvecs).transpose()), ss1)
offset, lincoef, scaling = fit
xc[ii] = ((ss1 - (offset + lincoef*linvec + shifted_vec * scaling))**2).sum()
offset0 = lags[find(xc==xc.min())][0]
fitInd = ((lags) >= (1.0*searchrange[0]/oversamp)) * \
((lags ) <= (1.0*searchrange[1]/oversamp))
fitInd = ((lags-offset0) >= (1.0*searchrange[0]/oversamp)) * \
((lags - offset0) <= (1.0*searchrange[1]/oversamp))
x4Fit = lags[fitInd]
y4Fit = xc[fitInd]
quadFit = polyfit(x4Fit, y4Fit, 2)
final_offset = -0.5*quadFit[1]/quadFit[0]
if plotalot:
from pylab import plot, figure, subplot
fig = figure()
plot(lags, xc)
plot(x4Fit, y4Fit, '.')
plot(x4Fit, polyval(quadFit, x4Fit), '--')
if retnew:
if method=='interp':
ret = [array([final_offset]).ravel(), spline2(x0+final_offset)]
elif method=='lsq' or method=='lsqlin':
ret = [array([final_offset]).ravel(), spline2(x0+final_offset)]
else:
if method=='interp':
ret = array([final_offset]).ravel()
elif method=='lsq' or method=='lsqlin':
ret = array([final_offset]).ravel()
return ret
[docs]def fscs(data):
"""Perform Swain et al.'s "Fourier Self-Complementary Spectrum" analysis.
Input: Data, numpy array -- M x N: M observations of N variables"""
# 2010-09-29 09:19 IJC: Created
import numpy as np
nobs, nlam = data.shape
datasub = data
# Compute FFTs
fdata = np.zeros((nobs, nlam), complex)
magdata = np.zeros((nobs, nlam), float)
angdata = np.zeros((nobs, nlam), float)
for ii in range(nlam):
fdata[:,ii] = np.fft.fft(datasub[:,ii])**(1./nlam)
#magdata[:,ii] = np.abs(fdata[:,ii])**(1./nlam)
#angdata[:,ii] = np.angle(fdata[:,ii])
# Compute spectral self-similarity (?)
fsss0 = fdata.prod(1)
#fsss1 = magdata.prod(1) * np.exp(1j*angdata.sum(1)/nlam)
sss0 = np.fft.ifft(fsss0)
#sss1 = np.fft.ifft(fsss1)
return sss0
[docs]def pca_eigv(block, ord=None, svd=True, retnode=False):
"""Compute the eigenvalues of covariance matrix (i.e., the PC
coefficients) for input data block.
ord -- number of eigenvalues to return. If None, returns all.
svd -- bool flag; whether to use SVD to compute PCA
retnode -- also return the mdp.PCANode object"""
# 2010-10-12 20:58 IJC: Created
import mdp
from numpy import random
try:
node = mdp.nodes.PCANode(svd=svd, output_dim=ord)
node.train(block)
node.stop_training()
except: # occasionally this crashes even when svd=True; a bit of
# white noise prevents this without changing the results.
node = mdp.nodes.PCANode(svd=svd, output_dim=ord)
white_noise = random.randn(block.shape[0], block.shape[1]) * \
block.ravel().std()*1e-15
node.train(block + white_noise )
node.stop_training()
if retnode:
ret = (node.d[::-1], node)
else:
ret = node.d[::-1]
return ret
[docs]def pca2(block, eblock=None, ord=1, svd=True):
"""Compute Principal Component Analysis on a 2D array.
NOTE: I don't trust the MDP module (or, I don't understand what
it's doing), since it does not agree with my hand-coded (somewhat
slower) PCA routine.
block -- N x M numpy array -- N observations of M variables.
eblock -- dummy option kept for backwards compatibility; does nothing.
ord -- number of Principal Components to extract
svd -- bool flag; whether to use SVD to compute PCA
"""
# 2010-10-12 21:03 IJC: Created
print "NOTE: I don't trust the MDP module (or, I don't understand what\n"
print "it's doing), since it does not agree with my hand-coded (somewhat\n"
print "slower) PCA routine.\n"
import mdp
# For historical reasons (pre-MDP), transpose the data:
block = block.transpose()
cov_eigv, node = pca_eigv(block, ord=ord, svd=svd, retnode=True)
proj_matrix = node.v[:,::-1]
# REconstruct the input data using n PCs:
node2 = mdp.nodes.PCANode(svd=svd, output_dim=ord)
node2.train(block)
node2.stop_training()
nodepca = node.execute(block, n=ord)
nodepca2 = node2.execute(block, n=ord)
reconstruction = node.inverse(nodepca, n=ord).transpose()
reconstruction2 = node2.inverse(nodepca2, n=ord).transpose()
return (reconstruction, cov_eigv, proj_matrix, reconstruction2, node, node2)
[docs]def princomp(A,numpc=0):
"""computing eigenvalues and eigenvectors of covariance matrix
Taken from http://glowingpython.blogspot.it/2011/07/pca-and-image-compression-with-numpy.html
"""
# 2014-08-18 14:29 IJMC: Inserted into my code.
M = (A-np.mean(A.T,axis=1)).T # subtract the mean (along columns)
[latent,coeff] = np.linalg.eig(np.cov(M))
p = np.size(coeff,axis=1)
idx = np.argsort(latent) # sorting the eigenvalues
idx = idx[::-1] # in ascending order
# sorting eigenvectors according to the sorted eigenvalues
coeff = coeff[:,idx]
latent = latent[idx] # sorting eigenvalues
if numpc < p and numpc >= 0:
coeff = coeff[:,range(numpc)] # cutting some PCs if needed
score = np.dot(coeff.T,M) # projection of the data in the new space
return coeff,score,latent