[Picante-commits] r133 - branches/gsoc/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jul 9 08:20:36 CEST 2008
Author: mrhelmus
Date: 2008-07-09 08:20:36 +0200 (Wed, 09 Jul 2008)
New Revision: 133
Added:
branches/gsoc/R/pblm.R
Removed:
branches/gsoc/R/pblm.r
Log:
.r to .R
Added: branches/gsoc/R/pblm.R
===================================================================
--- branches/gsoc/R/pblm.R (rev 0)
+++ branches/gsoc/R/pblm.R 2008-07-09 06:20:36 UTC (rev 133)
@@ -0,0 +1,350 @@
+pblm<-function(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5)){
+
+ # Make a vector of associations
+ A<-as.matrix(as.vector(as.matrix(assocs)))
+ data.vecs<-A
+
+ #numbers of species and interactions
+ nassocs<-length(A)
+ nspp1<-dim(assocs)[1]
+ nspp2<-dim(assocs)[2]
+ sppnames1<-rownames(assocs)
+ sppnames2<-colnames(assocs)
+
+ #make names of species pairs
+ pairnames=NULL # make a vector of pairwise comparison names
+ for (o in 1:(nspp2))
+ {
+ for (u in 1:nspp1)
+ {
+ pairnames<-c(pairnames,paste(sppnames2[o],sppnames1[u],sep="-"))
+ }
+ }
+
+ #Clean Covariates
+ #If the covariate applies to both sets, then it should be in the matrix of the longer set
+ covnames<-NULL
+ C1covs<-NULL
+ if(is.null(covars1))
+ {
+ C1<-NULL
+ } else {
+ if(is.null(dim(covars1)))
+ {
+ C1<-matrix(covars1,nspp1,nspp2,byrow=FALSE)
+ if(is.factor(covars1))
+ {
+ C1<-as.matrix(as.vector(C1))
+ C1covs<-cbind(C1covs,C1)
+ C1<-as.matrix(model.matrix(~as.factor(C1)-1)[,-1])
+ colnames(C1)<-paste(rep("covar1",length(levels(covars1))-1),levels(covars1)[-1],sep="-")
+ } else {
+ C1<-as.matrix(as.vector(C1))
+ C1covs<-cbind(C1covs,C1)
+ }
+ covnames<-c(covnames,"covar1")
+ } else {
+ C1<-NULL
+ for(i in 1:dim(covars1)[2])
+ {
+ C1hold<-matrix(covars1[,i],nspp1,nspp2,byrow=FALSE)
+ if(is.factor(covars1[,i]))
+ {
+ C1hold<-as.matrix(as.vector(C1hold))
+ C1covs<-cbind(C1covs,C1hold)
+ C1hold<-as.matrix(model.matrix(~as.factor(C1hold)-1)[,-1])
+ colnames(C1hold)<-paste(rep(colnames(covars1)[i],length(levels(covars1[,i]))-1),levels(covars1[,i])[-1],sep="-")
+ C1<-cbind(C1,C1hold)
+ } else {
+ C1hold<-as.matrix(as.vector(C1hold))
+ C1covs<-cbind(C1covs,C1hold)
+ colnames(C1hold)<-colnames(covars1)[i]
+ C1<-cbind(C1,C1hold)
+ }
+ covnames<-c(covnames,colnames(covars1)[i])
+ }
+ }
+
+ data.vecs<-cbind(data.vecs,C1covs)
+ }
+
+
+ C2covs<-NULL
+ if(is.null(covars2))
+ {
+ C2<-NULL
+ } else {
+ if(is.null(dim(covars2)))
+ {
+ C2<-matrix(covars2,nspp1,nspp2,byrow=TRUE)
+ if(is.factor(covars2))
+ {
+ C2<-as.matrix(as.vector(C2))
+ C2covs<-cbind(C2covs,C2)
+ C2<-as.matrix(model.matrix(~as.factor(C2)-1)[,-1])
+ colnames(C2)<-paste(rep("covar2",length(levels(covars2))-1),levels(covars2)[-1],sep="-")
+ } else {
+ C2<-as.matrix(as.vector(C2))
+ C2covs<-cbind(C2covs,C2)
+ }
+ covnames<-c(covnames,"covar2")
+ } else {
+ C2<-NULL
+ for(i in 1:dim(covars2)[2])
+ {
+ C2hold<-matrix(covars2[,i],nspp1,nspp2,byrow=TRUE)
+ if(is.factor(covars2[,i]))
+ {
+ C2hold<-as.matrix(as.vector(C2hold))
+ C2covs<-cbind(C2covs,C2hold)
+ C2hold<-as.matrix(model.matrix(~as.factor(C2hold)-1)[,-1])
+ colnames(C2hold)<-paste(rep(colnames(covars2)[i],length(levels(covars2[,i]))-1),levels(covars2[,i])[-1],sep="-")
+ C2<-cbind(C2,C2hold)
+ } else {
+ C2hold<-as.matrix(as.vector(C2hold))
+ C2covs<-cbind(C2covs,C2hold)
+ colnames(C2hold)<-colnames(covars2)[i]
+ C2<-cbind(C2,C2hold)
+ }
+ covnames<-c(covnames,colnames(covars2)[i])
+ }
+ }
+
+ data.vecs<-cbind(data.vecs,C2covs)
+ }
+
+
+# Make U, the combined matrix of covariates
+ U<-NULL
+ if(is.null(C1) & is.null(C2))
+ {
+ U<-rep(1,length(A))
+ } else {
+
+ if(is.null(C1))
+ {
+ U<-rep(1,length(A))
+ } else {
+ U<-cbind(rep(1,length(A)),C1)
+ }
+
+ if(is.null(C2))
+ {
+ U<-U
+ } else {
+ U<-cbind(U,C2)
+ }
+ }
+
+ # Begin to organize output
+ if(is.null(dim(U)))
+ {
+ data.vecs<-data.frame(A)
+ colnames(data.vecs)<-"associations"
+ } else {
+ colnames(data.vecs)<-c("associations", covnames)
+ }
+ rownames(data.vecs)<-pairnames
+
+ ######
+ # Calculate Star Regression Coefficients
+ #calculate for the star (assuming no phylogenetic correlation)
+ astar<-solve((t(U)%*%U),(t(U)%*%A))
+ MSETotal<-cov(A)
+ s2aStar<-as.vector(MSETotal)*qr.solve((t(U)%*%U))
+ sdaStar<-t(diag(s2aStar)^(.5))
+ approxCFstar<-rbind(t(astar)-1.96%*%sdaStar, t(astar), t(astar)+1.96%*%sdaStar)
+ Estar<-A-U%*%astar
+ MSEStar<-cov(matrix(Estar))
+ #######
+
+ if(is.null(tree1) | is.null(tree2))
+ {
+ coefs<-approxCFstar
+ rownames(coefs)<-c("lower CI 95%","estimate","upper CI 95%")
+ colnames(coefs)<-paste("star",c("intercept",colnames(U)[-1]),sep="-")
+ MSEs<-cbind(data.frame(MSETotal),data.frame(MSEStar))
+ return(list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=data.frame(Estar),bootvalues=NULL))
+ } else {
+
+ #tree1 is the phylogeny for the rows
+ #tree2 is the phylogeny for the columns
+
+ #Clean Trees
+ if(is(tree1)[1]=="phylo")
+ {
+ if(is.null(tree1$edge.length)){tree1<-compute.brlen(tree1, 1)} #If phylo has no given branch lengths
+ V1<-vcv.phylo(tree1,cor=TRUE)
+ V1<-V1[rownames(assocs),rownames(assocs)]
+ } else {
+ V1<-tree1[rownames(assocs),rownames(assocs)]
+ }
+
+ if(is(tree2)[1]=="phylo")
+ {
+ if(is.null(tree2$edge.length)){tree2<-compute.brlen(tree2, 1)} #If phylo has no given branch lengths
+ V2<-vcv.phylo(tree2,cor=TRUE)
+ V2<-V2[colnames(assocs),colnames(assocs)]
+ } else {
+ V2<-tree2[colnames(assocs),colnames(assocs)]
+ }
+
+ #Calculate Regression Coefficents for the base (assuming strict brownian motion evolution, ds=1)
+ V1<-as.matrix(V1)
+ V2<-as.matrix(V2)
+
+
+ V1<-V1/det(V1)^(1/nspp1) # scale covariance matrices (this reduces numerical problems caused by
+ V2<-V2/det(V2)^(1/nspp2) # determinants going to infinity or zero)
+ V<-kronecker(V2,V1)
+ invV<-qr.solve(V)
+
+ abase<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+ MSEBase<-(t(A-U%*%abase)%*%invV%*%(A-U%*%abase))/(nassocs-1)
+ s2abase<-as.vector(MSEBase)*qr.solve(t(U)%*%invV%*%U)
+ sdabase<-t(diag(s2abase)^(.5))
+ approxCFbase<-rbind(t(abase)-1.96%*%sdabase, t(abase), t(abase)+1.96%*%sdabase)
+ Ebase<-A-U%*%abase
+
+ ###################
+ # Full EGLS estimates of phylogenetic signal
+ ##################
+ initV1<-V1
+ initV2<-V2
+
+ # tau = tau_i + tau_j where tau_i equals the node to tip distance
+ tau1<-matrix(diag(initV1),nspp1,nspp1) + matrix(diag(initV1),nspp1,nspp1)-2*initV1
+ tau2<-matrix(diag(initV2),nspp2,nspp2) + matrix(diag(initV2),nspp2,nspp2)-2*initV2
+
+ # The workhorse function to estimate ds
+ pegls<-function(parameters)
+ {
+ d1<-abs(parameters[1])
+ d2<-abs(parameters[2])
+
+ V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
+ V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
+
+ V1<-V1/det(V1)^(1/nspp1) # model of coevolution
+ V2<-V2/det(V2)^(1/nspp2)
+ V<-kronecker(V2,V1)
+ invV<-qr.solve(V)
+
+ a<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+ E<-(A-U%*%a)
+ #MSE
+ t(E)%*%invV%*%E/(nassocs-1)
+ }
+ # estimate d1 and d2 via Nelder-Mead method same as fminsearch in Matlab, by minimizing MSE
+ est<-optim(pstart,pegls,control=list(maxit=maxit))
+ MSEFull<-est$value
+ d1<-abs(est$par[1])
+ d2<-abs(est$par[2])
+
+ # Calculate EGLS coef w estimated ds
+ V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
+ V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
+ V1<-V1/det(V1)^(1/nspp1) # model of coevolution
+ V2<-V2/det(V2)^(1/nspp2)
+ V<-kronecker(V2,V1)
+ invV<-qr.solve(V)
+ aFull<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+ s2aFull<-as.vector(MSEFull)*qr.solve(t(U)%*%invV%*%U)
+ sdaFull<-t(diag(s2aFull)^(.5))
+ approxCFfull<-rbind(t(aFull)-1.96%*%sdaFull, t(aFull), t(aFull)+1.96%*%sdaFull)
+ Efull<-A-U%*%aFull
+ ########################################
+
+ #organize output
+ coefs<-cbind(approxCFfull,approxCFstar,approxCFbase)
+ rownames(coefs)<-c("approx lower CI 95%","estimate","approx upper CI 95%")
+ colnames(coefs)<-c(paste("full",c("intercept",colnames(U)[-1]),sep="-"),paste("star",c("intercept",colnames(U)[-1]),sep="-"),paste("base",c("intercept",colnames(U)[-1]),sep="-"))
+ coefs<-t(coefs)
+ CI.boot<-NULL
+ MSEs<-cbind(data.frame(MSETotal),data.frame(MSEFull), data.frame(MSEStar), data.frame(MSEBase))
+ residuals<-cbind(data.frame(Efull),data.frame(Estar),data.frame(Ebase))
+ rownames(residuals)<-pairnames
+
+ #bootstrap CIs
+
+ if(bootstrap)
+ {
+ Vtrue<-V
+ Atrue<-A
+ atrue<-aFull
+ dtrue<-c(d1,d2)
+ ehold<-eigen(Vtrue,symmetric=TRUE)
+ L<-ehold$vectors[,nassocs:1] #A or L
+ G<-sort(ehold$values) #D
+ iG<-diag(G^-.5) #iD
+
+ # Construct Y = TT*A so that
+ # E{(Y-b)*(Y-b)'} = E{(TT*A-b)*(TT*A-b)'}
+ # = T*V*T'
+ # = I
+
+ TT<-iG%*%t(L)
+ Y<-TT%*%Atrue
+ Z<-TT%*%U
+
+ res<-(Y-Z%*%atrue) # residuals in orthogonalized space
+ invT<-qr.solve(TT)
+
+ bootlist=NULL
+ for (i in 1:nreps)
+ {
+ randindex<-sample(1:nassocs,replace=TRUE) # vector of random indices
+ #randindex=1:nassocs # retain order
+ YY<-Z%*%atrue+res[randindex] # create new values of Y with random residuals
+ A<-invT%*%YY # back-transformed data
+ pstart<-dtrue+c(0,.1)
+ estRand<-optim(pstart,pegls,control=list(maxit=maxit))
+ MSEFullrand<-estRand$value
+ d1rand<-abs(estRand$par[1])
+ d2rand<-abs(estRand$par[2])
+
+ # Calculate EGLS coef w estimated ds
+ V1<-(d1rand^tau1)*(1-d1rand^(2*initV1))/(1-d1rand^2)
+ V2<-(d2rand^tau2)*(1-d2rand^(2*initV2))/(1-d2rand^2)
+ V1<-V1/det(V1)^(1/nspp1) # model of coevolution
+ V2<-V2/det(V2)^(1/nspp2)
+ V<-kronecker(V2,V1)
+ invV<-qr.solve(V)
+ arand<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+
+ bootlist<-rbind(bootlist,c(d1rand, d2rand, t(arand)))
+ }
+ nr<-dim(bootlist)[1]
+ nc<-dim(bootlist)[2]
+
+ #Calculate bootstrapped CIs
+ alpha<-0.05 # alpha is always 0.05, but could change here
+ conf<-NULL
+ for(j in 1:nc)
+ {
+ bootconf<-quantile(bootlist[,j],probs = c(alpha/2, 1-alpha/2))
+ conf<-rbind(conf,c(bootconf[1],bootconf[2]))
+ }
+ signal.strength<-data.frame(cbind(conf[1:2,1],dtrue,conf[1:2,2]))
+ rownames(signal.strength)<-c("d1","d2")
+ colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+
+ #organize output
+ CI.boot<-conf
+ rownames(CI.boot)<-c("d1","d2","intercept",colnames(U)[-1])
+ colnames(CI.boot)<-c("booted lower CI 95%","booted upper CI 95%")
+ colnames(bootlist)<-c("d1","d2","intercept",colnames(U)[-1])
+ return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),residuals=residuals,bootvalues=bootlist))
+
+ } else {
+ ########
+ # If bootstrapping not performed
+
+ conf<-matrix(NA,2,2)
+ signal.strength<-data.frame(cbind(conf[1,],dtrue,conf[2,]))
+ rownames(signal.strength)<-c("d1","d2")
+ colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+ return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),residuals=residuals,bootvalues=NULL))
+ }
+ }
+}
\ No newline at end of file
Deleted: branches/gsoc/R/pblm.r
===================================================================
--- branches/gsoc/R/pblm.r 2008-07-09 06:18:39 UTC (rev 132)
+++ branches/gsoc/R/pblm.r 2008-07-09 06:20:36 UTC (rev 133)
@@ -1,350 +0,0 @@
-pblm<-function(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5)){
-
- # Make a vector of associations
- A<-as.matrix(as.vector(as.matrix(assocs)))
- data.vecs<-A
-
- #numbers of species and interactions
- nassocs<-length(A)
- nspp1<-dim(assocs)[1]
- nspp2<-dim(assocs)[2]
- sppnames1<-rownames(assocs)
- sppnames2<-colnames(assocs)
-
- #make names of species pairs
- pairnames=NULL # make a vector of pairwise comparison names
- for (o in 1:(nspp2))
- {
- for (u in 1:nspp1)
- {
- pairnames<-c(pairnames,paste(sppnames2[o],sppnames1[u],sep="-"))
- }
- }
-
- #Clean Covariates
- #If the covariate applies to both sets, then it should be in the matrix of the longer set
- covnames<-NULL
- C1covs<-NULL
- if(is.null(covars1))
- {
- C1<-NULL
- } else {
- if(is.null(dim(covars1)))
- {
- C1<-matrix(covars1,nspp1,nspp2,byrow=FALSE)
- if(is.factor(covars1))
- {
- C1<-as.matrix(as.vector(C1))
- C1covs<-cbind(C1covs,C1)
- C1<-as.matrix(model.matrix(~as.factor(C1)-1)[,-1])
- colnames(C1)<-paste(rep("covar1",length(levels(covars1))-1),levels(covars1)[-1],sep="-")
- } else {
- C1<-as.matrix(as.vector(C1))
- C1covs<-cbind(C1covs,C1)
- }
- covnames<-c(covnames,"covar1")
- } else {
- C1<-NULL
- for(i in 1:dim(covars1)[2])
- {
- C1hold<-matrix(covars1[,i],nspp1,nspp2,byrow=FALSE)
- if(is.factor(covars1[,i]))
- {
- C1hold<-as.matrix(as.vector(C1hold))
- C1covs<-cbind(C1covs,C1hold)
- C1hold<-as.matrix(model.matrix(~as.factor(C1hold)-1)[,-1])
- colnames(C1hold)<-paste(rep(colnames(covars1)[i],length(levels(covars1[,i]))-1),levels(covars1[,i])[-1],sep="-")
- C1<-cbind(C1,C1hold)
- } else {
- C1hold<-as.matrix(as.vector(C1hold))
- C1covs<-cbind(C1covs,C1hold)
- colnames(C1hold)<-colnames(covars1)[i]
- C1<-cbind(C1,C1hold)
- }
- covnames<-c(covnames,colnames(covars1)[i])
- }
- }
-
- data.vecs<-cbind(data.vecs,C1covs)
- }
-
-
- C2covs<-NULL
- if(is.null(covars2))
- {
- C2<-NULL
- } else {
- if(is.null(dim(covars2)))
- {
- C2<-matrix(covars2,nspp1,nspp2,byrow=TRUE)
- if(is.factor(covars2))
- {
- C2<-as.matrix(as.vector(C2))
- C2covs<-cbind(C2covs,C2)
- C2<-as.matrix(model.matrix(~as.factor(C2)-1)[,-1])
- colnames(C2)<-paste(rep("covar2",length(levels(covars2))-1),levels(covars2)[-1],sep="-")
- } else {
- C2<-as.matrix(as.vector(C2))
- C2covs<-cbind(C2covs,C2)
- }
- covnames<-c(covnames,"covar2")
- } else {
- C2<-NULL
- for(i in 1:dim(covars2)[2])
- {
- C2hold<-matrix(covars2[,i],nspp1,nspp2,byrow=TRUE)
- if(is.factor(covars2[,i]))
- {
- C2hold<-as.matrix(as.vector(C2hold))
- C2covs<-cbind(C2covs,C2hold)
- C2hold<-as.matrix(model.matrix(~as.factor(C2hold)-1)[,-1])
- colnames(C2hold)<-paste(rep(colnames(covars2)[i],length(levels(covars2[,i]))-1),levels(covars2[,i])[-1],sep="-")
- C2<-cbind(C2,C2hold)
- } else {
- C2hold<-as.matrix(as.vector(C2hold))
- C2covs<-cbind(C2covs,C2hold)
- colnames(C2hold)<-colnames(covars2)[i]
- C2<-cbind(C2,C2hold)
- }
- covnames<-c(covnames,colnames(covars2)[i])
- }
- }
-
- data.vecs<-cbind(data.vecs,C2covs)
- }
-
-
-# Make U, the combined matrix of covariates
- U<-NULL
- if(is.null(C1) & is.null(C2))
- {
- U<-rep(1,length(A))
- } else {
-
- if(is.null(C1))
- {
- U<-rep(1,length(A))
- } else {
- U<-cbind(rep(1,length(A)),C1)
- }
-
- if(is.null(C2))
- {
- U<-U
- } else {
- U<-cbind(U,C2)
- }
- }
-
- # Begin to organize output
- if(is.null(dim(U)))
- {
- data.vecs<-data.frame(A)
- colnames(data.vecs)<-"associations"
- } else {
- colnames(data.vecs)<-c("associations", covnames)
- }
- rownames(data.vecs)<-pairnames
-
- ######
- # Calculate Star Regression Coefficients
- #calculate for the star (assuming no phylogenetic correlation)
- astar<-solve((t(U)%*%U),(t(U)%*%A))
- MSETotal<-cov(A)
- s2aStar<-as.vector(MSETotal)*qr.solve((t(U)%*%U))
- sdaStar<-t(diag(s2aStar)^(.5))
- approxCFstar<-rbind(t(astar)-1.96%*%sdaStar, t(astar), t(astar)+1.96%*%sdaStar)
- Estar<-A-U%*%astar
- MSEStar<-cov(matrix(Estar))
- #######
-
- if(is.null(tree1) | is.null(tree2))
- {
- coefs<-approxCFstar
- rownames(coefs)<-c("lower CI 95%","estimate","upper CI 95%")
- colnames(coefs)<-paste("star",c("intercept",colnames(U)[-1]),sep="-")
- MSEs<-cbind(data.frame(MSETotal),data.frame(MSEStar))
- return(list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=data.frame(Estar),bootvalues=NULL))
- } else {
-
- #tree1 is the phylogeny for the rows
- #tree2 is the phylogeny for the columns
-
- #Clean Trees
- if(is(tree1)[1]=="phylo")
- {
- if(is.null(tree1$edge.length)){tree1<-compute.brlen(tree1, 1)} #If phylo has no given branch lengths
- V1<-vcv.phylo(tree1,cor=TRUE)
- V1<-V1[rownames(assocs),rownames(assocs)]
- } else {
- V1<-tree1[rownames(assocs),rownames(assocs)]
- }
-
- if(is(tree2)[1]=="phylo")
- {
- if(is.null(tree2$edge.length)){tree2<-compute.brlen(tree2, 1)} #If phylo has no given branch lengths
- V2<-vcv.phylo(tree2,cor=TRUE)
- V2<-V2[colnames(assocs),colnames(assocs)]
- } else {
- V2<-tree2[colnames(assocs),colnames(assocs)]
- }
-
- #Calculate Regression Coefficents for the base (assuming strict brownian motion evolution, ds=1)
- V1<-as.matrix(V1)
- V2<-as.matrix(V2)
-
-
- V1<-V1/det(V1)^(1/nspp1) # scale covariance matrices (this reduces numerical problems caused by
- V2<-V2/det(V2)^(1/nspp2) # determinants going to infinity or zero)
- V<-kronecker(V2,V1)
- invV<-qr.solve(V)
-
- abase<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
- MSEBase<-(t(A-U%*%abase)%*%invV%*%(A-U%*%abase))/(nassocs-1)
- s2abase<-as.vector(MSEBase)*qr.solve(t(U)%*%invV%*%U)
- sdabase<-t(diag(s2abase)^(.5))
- approxCFbase<-rbind(t(abase)-1.96%*%sdabase, t(abase), t(abase)+1.96%*%sdabase)
- Ebase<-A-U%*%abase
-
- ###################
- # Full EGLS estimates of phylogenetic signal
- ##################
- initV1<-V1
- initV2<-V2
-
- # tau = tau_i + tau_j where tau_i equals the node to tip distance
- tau1<-matrix(diag(initV1),nspp1,nspp1) + matrix(diag(initV1),nspp1,nspp1)-2*initV1
- tau2<-matrix(diag(initV2),nspp2,nspp2) + matrix(diag(initV2),nspp2,nspp2)-2*initV2
-
- # The workhorse function to estimate ds
- pegls<-function(parameters)
- {
- d1<-abs(parameters[1])
- d2<-abs(parameters[2])
-
- V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
- V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
-
- V1<-V1/det(V1)^(1/nspp1) # model of coevolution
- V2<-V2/det(V2)^(1/nspp2)
- V<-kronecker(V2,V1)
- invV<-qr.solve(V)
-
- a<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
- E<-(A-U%*%a)
- #MSE
- t(E)%*%invV%*%E/(nassocs-1)
- }
- # estimate d1 and d2 via Nelder-Mead method same as fminsearch in Matlab, by minimizing MSE
- est<-optim(pstart,pegls,control=list(maxit=maxit))
- MSEFull<-est$value
- d1<-abs(est$par[1])
- d2<-abs(est$par[2])
-
- # Calculate EGLS coef w estimated ds
- V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
- V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
- V1<-V1/det(V1)^(1/nspp1) # model of coevolution
- V2<-V2/det(V2)^(1/nspp2)
- V<-kronecker(V2,V1)
- invV<-qr.solve(V)
- aFull<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
- s2aFull<-as.vector(MSEFull)*qr.solve(t(U)%*%invV%*%U)
- sdaFull<-t(diag(s2aFull)^(.5))
- approxCFfull<-rbind(t(aFull)-1.96%*%sdaFull, t(aFull), t(aFull)+1.96%*%sdaFull)
- Efull<-A-U%*%aFull
- ########################################
-
- #organize output
- coefs<-cbind(approxCFfull,approxCFstar,approxCFbase)
- rownames(coefs)<-c("approx lower CI 95%","estimate","approx upper CI 95%")
- colnames(coefs)<-c(paste("full",c("intercept",colnames(U)[-1]),sep="-"),paste("star",c("intercept",colnames(U)[-1]),sep="-"),paste("base",c("intercept",colnames(U)[-1]),sep="-"))
- coefs<-t(coefs)
- CI.boot<-NULL
- MSEs<-cbind(data.frame(MSETotal),data.frame(MSEFull), data.frame(MSEStar), data.frame(MSEBase))
- residuals<-cbind(data.frame(Efull),data.frame(Estar),data.frame(Ebase))
- rownames(residuals)<-pairnames
-
- #bootstrap CIs
-
- if(bootstrap)
- {
- Vtrue<-V
- Atrue<-A
- atrue<-aFull
- dtrue<-c(d1,d2)
- ehold<-eigen(Vtrue,symmetric=TRUE)
- L<-ehold$vectors[,nassocs:1] #A or L
- G<-sort(ehold$values) #D
- iG<-diag(G^-.5) #iD
-
- # Construct Y = TT*A so that
- # E{(Y-b)*(Y-b)'} = E{(TT*A-b)*(TT*A-b)'}
- # = T*V*T'
- # = I
-
- TT<-iG%*%t(L)
- Y<-TT%*%Atrue
- Z<-TT%*%U
-
- res<-(Y-Z%*%atrue) # residuals in orthogonalized space
- invT<-qr.solve(TT)
-
- bootlist=NULL
- for (i in 1:nreps)
- {
- randindex<-sample(1:nassocs,replace=TRUE) # vector of random indices
- #randindex=1:nassocs # retain order
- YY<-Z%*%atrue+res[randindex] # create new values of Y with random residuals
- A<-invT%*%YY # back-transformed data
- pstart<-dtrue+c(0,.1)
- estRand<-optim(pstart,pegls,control=list(maxit=maxit))
- MSEFullrand<-estRand$value
- d1rand<-abs(estRand$par[1])
- d2rand<-abs(estRand$par[2])
-
- # Calculate EGLS coef w estimated ds
- V1<-(d1rand^tau1)*(1-d1rand^(2*initV1))/(1-d1rand^2)
- V2<-(d2rand^tau2)*(1-d2rand^(2*initV2))/(1-d2rand^2)
- V1<-V1/det(V1)^(1/nspp1) # model of coevolution
- V2<-V2/det(V2)^(1/nspp2)
- V<-kronecker(V2,V1)
- invV<-qr.solve(V)
- arand<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A))) #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
-
- bootlist<-rbind(bootlist,c(d1rand, d2rand, t(arand)))
- }
- nr<-dim(bootlist)[1]
- nc<-dim(bootlist)[2]
-
- #Calculate bootstrapped CIs
- alpha<-0.05 # alpha is always 0.05, but could change here
- conf<-NULL
- for(j in 1:nc)
- {
- bootconf<-quantile(bootlist[,j],probs = c(alpha/2, 1-alpha/2))
- conf<-rbind(conf,c(bootconf[1],bootconf[2]))
- }
- signal.strength<-data.frame(cbind(conf[1:2,1],dtrue,conf[1:2,2]))
- rownames(signal.strength)<-c("d1","d2")
- colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
-
- #organize output
- CI.boot<-conf
- rownames(CI.boot)<-c("d1","d2","intercept",colnames(U)[-1])
- colnames(CI.boot)<-c("booted lower CI 95%","booted upper CI 95%")
- colnames(bootlist)<-c("d1","d2","intercept",colnames(U)[-1])
- return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),residuals=residuals,bootvalues=bootlist))
-
- } else {
- ########
- # If bootstrapping not performed
-
- conf<-matrix(NA,2,2)
- signal.strength<-data.frame(cbind(conf[1,],dtrue,conf[2,]))
- rownames(signal.strength)<-c("d1","d2")
- colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
- return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),residuals=residuals,bootvalues=NULL))
- }
- }
-}
\ No newline at end of file
More information about the Picante-commits
mailing list