[Picante-commits] r130 - branches/gsoc/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jul 9 04:37:51 CEST 2008
Author: mrhelmus
Date: 2008-07-09 04:37:51 +0200 (Wed, 09 Jul 2008)
New Revision: 130
Modified:
branches/gsoc/R/pblm.r
Log:
Added working bootstrap routine to calculate CIs for bs and ds. It is slow due to the optim function.
Modified: branches/gsoc/R/pblm.r
===================================================================
--- branches/gsoc/R/pblm.r 2008-07-09 02:03:25 UTC (rev 129)
+++ branches/gsoc/R/pblm.r 2008-07-09 02:37:51 UTC (rev 130)
@@ -1,4 +1,4 @@
-pblm<-function(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,maxit=10000){
+pblm<-function(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5)){
# Make a vector of associations
@@ -165,11 +165,11 @@
if(is.null(tree1) | is.null(tree2))
{
- coefficents<-approxCFstar
- rownames(coefficents)<-c("upper CI 95%","estimate","lower CI 95%")
- colnames(coefficents)<-paste("star",covnames,sep="-")
+ coefs<-approxCFstar
+ rownames(coefs)<-c("upper CI 95%","estimate","lower CI 95%")
+ colnames(coefs)<-paste("star",c("intercept",colnames(U)[-1]),sep="-")
MSEs<-cbind(data.frame(MSETotal),data.frame(MSEStar))
- return(list(MSE=MSEs,signal.strength=NULL,coefficents=data.frame(coefficents),variates=data.frame(data.vecs),residuals=data.frame(Estar)))
+ return(list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=data.frame(Estar),bootvalues=NULL))
} else {
#tree1 is the phylogeny for the rows
@@ -194,7 +194,6 @@
V2<-tree2[colnames(assocs),colnames(assocs)]
}
-
#Calculate Regression Coefficents for the base (assuming strict brownian motion evolution, ds=1)
V1<-as.matrix(V1)
V2<-as.matrix(V2)
@@ -221,8 +220,8 @@
# tau = tau_i + tau_j where tau_i equals the node to tip distance
tau1<-matrix(diag(initV1),nspp1,nspp1) + matrix(diag(initV1),nspp1,nspp1)-2*initV1
tau2<-matrix(diag(initV2),nspp2,nspp2) + matrix(diag(initV2),nspp2,nspp2)-2*initV2
- pstart<-c(.1,.1)
+
# The work horse function to estimate ds
pegls<-function(parameters)
{
@@ -247,24 +246,27 @@
MSEFull<-est$value
d1<-abs(est$par[1])
d2<-abs(est$par[2])
- # Calculate
+
+ # Calculate EGLS coef w estimated ds
V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
V1<-V1/det(V1)^(1/nspp1) # model of coevolution
V2<-V2/det(V2)^(1/nspp2)
V<-kronecker(V2,V1)
invV<-qr.solve(V)
- a<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
- s2a<-as.vector(MSE)*qr.solve(t(U)%*%invV%*%U)
- sda<-t(diag(s2a)^(.5))
- approxCFfull<-rbind(t(a)-1.96%*%sda, t(a), t(a)+1.96%*%sda)
- Efull<-A-U%*%a
+ aFull<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+ s2aFull<-as.vector(MSEFull)*qr.solve(t(U)%*%invV%*%U)
+ sdaFull<-t(diag(s2aFull)^(.5))
+ approxCFfull<-rbind(t(aFull)-1.96%*%sdaFull, t(aFull), t(aFull)+1.96%*%sdaFull)
+ Efull<-A-U%*%aFull
########################################
#organize output
- coefficents<-cbind(approxCFfull,approxCFstar,approxCFbase)
- rownames(coefficents)<-c("upper CI 95%","estimate","lower CI 95%")
- colnames(coefficents)<-c(paste("full",c("intercept",colnames(U)[-1]),sep="-"),paste("star",c("intercept",colnames(U)[-1]),sep="-"),paste("base",c("intercept",colnames(U)[-1]),sep="-"))
+ coefs<-cbind(approxCFfull,approxCFstar,approxCFbase)
+ rownames(coefs)<-c("approx upper CI 95%","estimate","approx lower CI 95%")
+ colnames(coefs)<-c(paste("full",c("intercept",colnames(U)[-1]),sep="-"),paste("star",c("intercept",colnames(U)[-1]),sep="-"),paste("base",c("intercept",colnames(U)[-1]),sep="-"))
+ coefs<-t(coefs)
+ CI.boot<-NULL
MSEs<-cbind(data.frame(MSETotal),data.frame(MSEFull), data.frame(MSEStar), data.frame(MSEbase))
residuals<-cbind(data.frame(Efull),data.frame(Estar),data.frame(Ebase))
rownames(residuals)<-pairnames
@@ -273,10 +275,77 @@
if(bootstrap)
{
- return(NULL)
+ Vtrue<-V
+ Atrue<-A
+ atrue<-aFull
+ dtrue<-c(d1,d2)
+ ehold<-eigen(Vtrue,symmetric=TRUE)
+ L<-ehold$vectors[,nassocs:1] #A or L
+ G<-sort(ehold$values) #D
+ iG<-diag(G^-.5) #iD
+
+ # Construct Y = TT*A so that
+ # E{(Y-b)*(Y-b)'} = E{(TT*A-b)*(TT*A-b)'}
+ # = T*V*T'
+ # = I
+
+ TT<-iG%*%t(L)
+ Y<-TT%*%Atrue
+ Z<-TT%*%U
+
+ res<-(Y-Z%*%atrue) # residuals in orthogonalized space
+ invT<-qr.solve(TT)
+
+ bootlist=NULL
+ for (i in 1:nreps)
+ {
+ randindex<-sample(1:nassocs,replace=TRUE) # vector of random indices
+ #randindex=1:nassocs # retain order
+ YY<-Z%*%atrue+res[randindex] # create new values of Y with random residuals
+ A<-invT%*%YY # back-transformed data
+ pstart<-dtrue+c(0,.1)
+ estRand<-optim(pstart,pegls,control=list(maxit=maxit))
+ MSEFullrand<-estRand$value
+ d1rand<-abs(estRand$par[1])
+ d2rand<-abs(estRand$par[2])
+
+ # Calculate EGLS coef w estimated ds
+ V1<-(d1rand^tau1)*(1-d1rand^(2*initV1))/(1-d1rand^2)
+ V2<-(d2rand^tau2)*(1-d2rand^(2*initV2))/(1-d2rand^2)
+ V1<-V1/det(V1)^(1/nspp1) # model of coevolution
+ V2<-V2/det(V2)^(1/nspp2)
+ V<-kronecker(V2,V1)
+ invV<-qr.solve(V)
+ arand<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+
+ bootlist<-rbind(bootlist,c(d1rand, d2rand, t(arand)))
+ }
+ nr<-dim(bootlist)[1]
+ nc<-dim(bootlist)[2]
+ alpha<-0.05
+ conf<-NULL
+ for(j in 1:nc)
+ {
+ bootconf<-quantile(bootlist[,j],probs = c(alpha/2, 1-alpha/2))
+ conf<-rbind(conf,c(bootconf[1],bootconf[2]))
+ }
+ signal.strength<-data.frame(cbind(conf[1:2,1],dtrue,conf[1:2,2]))
+ rownames(signal.strength)<-c("d1","d2")
+ colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+
+ CI.boot<-conf
+ rownames(CI.boot)<-c("d1","d2","intercept",colnames(U)[-1])
+ colnames(CI.boot)<-c("booted lower CI 95%","booted upper CI 95%")
+ colnames(bootlist)<-c("d1","d2","intercept",colnames(U)[-1])
+ return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),residuals=residuals,bootvalues=bootlist))
+
} else {
- return(list(MSE=MSEs,signal.strength=data.frame(cbind(d1,d2)),coefficents=data.frame(coefficents),variates=data.frame(data.vecs),residuals=residuals))
+ conf<-matrix(NA,2,2)
+ signal.strength<-data.frame(cbind(conf[1,],dtrue,conf[2,]))
+ rownames(signal.strength)<-c("d1","d2")
+ colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+ return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),residuals=residuals,bootvalues=NULL))
}
- }
+ }
-}
\ No newline at end of file
+}
\ No newline at end of file
More information about the Picante-commits
mailing list