[Picante-commits] r141 - in pkg: . R man

Sat Jul 19 02:55:55 CEST 2008

Author: skembel
Date: 2008-07-19 02:55:55 +0200 (Sat, 19 Jul 2008)
New Revision: 141

Added:
   pkg/R/pblm.R
   pkg/R/phylostruct.R
   pkg/R/sppregs.R
   pkg/man/pblm.Rd
   pkg/man/phylostruct.Rd
   pkg/man/sppregs.Rd
Modified:
   pkg/DESCRIPTION
   pkg/R/phylodiversity.R
   pkg/R/specaccum.psr.R
Log:
Merge gsoc branch into trunk in prep for 0.3 release

Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2008-07-19 00:51:58 UTC (rev 140)
+++ pkg/DESCRIPTION	2008-07-19 00:55:55 UTC (rev 141)
@@ -6,6 +6,6 @@
 Author: Steve Kembel <skembel at berkeley.edu>, David Ackerly <dackerly at berkeley.edu>, Simon Blomberg <s.blomberg1 at uq.edu.au>, Peter Cowan <pdc at berkeley.edu>, Matthew Helmus <mrhelmus at wisc.edu>, Cam Webb <cwebb at oeb.harvard.edu>
 Maintainer: Steve Kembel <skembel at berkeley.edu>
 Depends: ape, vegan
-Suggests: circular
+Suggests: brglm, circular
 Description: Phylocom integration, community analyses, null-models, traits and evolution in R
 License: GPL-2

Copied: pkg/R/pblm.R (from rev 140, branches/gsoc/R/pblm.R)
===================================================================
--- pkg/R/pblm.R	                        (rev 0)
+++ pkg/R/pblm.R	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,350 @@
+pblm<-function(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5)){
+
+  # Make a vector of associations
+  A<-as.matrix(as.vector(as.matrix(assocs)))
+  data.vecs<-A
+  
+  #numbers of species and interactions
+  nassocs<-length(A)
+  nspp1<-dim(assocs)[1]
+	nspp2<-dim(assocs)[2]
+  sppnames1<-rownames(assocs)
+  sppnames2<-colnames(assocs)
+  
+  #make names of species pairs
+  pairnames=NULL  # make a vector of pairwise comparison names
+  for (o in 1:(nspp2))
+  {
+    for (u in 1:nspp1)
+    {
+      pairnames<-c(pairnames,paste(sppnames2[o],sppnames1[u],sep="-"))
+    }
+  }
+    
+  #Clean Covariates
+  #If the covariate applies to both sets, then it should be in the matrix of the longer set 
+  covnames<-NULL
+  C1covs<-NULL
+  if(is.null(covars1))
+  {
+    C1<-NULL
+  } else {
+    if(is.null(dim(covars1)))
+    {
+      C1<-matrix(covars1,nspp1,nspp2,byrow=FALSE)
+      if(is.factor(covars1))
+      {
+        C1<-as.matrix(as.vector(C1))
+        C1covs<-cbind(C1covs,C1)
+        C1<-as.matrix(model.matrix(~as.factor(C1)-1)[,-1])
+        colnames(C1)<-paste(rep("covar1",length(levels(covars1))-1),levels(covars1)[-1],sep="-")
+      } else {
+        C1<-as.matrix(as.vector(C1))
+        C1covs<-cbind(C1covs,C1)
+      }
+      covnames<-c(covnames,"covar1")
+    } else {
+      C1<-NULL
+      for(i in 1:dim(covars1)[2])
+      {
+        C1hold<-matrix(covars1[,i],nspp1,nspp2,byrow=FALSE)
+        if(is.factor(covars1[,i]))
+        {
+          C1hold<-as.matrix(as.vector(C1hold))
+          C1covs<-cbind(C1covs,C1hold)
+          C1hold<-as.matrix(model.matrix(~as.factor(C1hold)-1)[,-1])
+          colnames(C1hold)<-paste(rep(colnames(covars1)[i],length(levels(covars1[,i]))-1),levels(covars1[,i])[-1],sep="-")
+          C1<-cbind(C1,C1hold)
+        } else { 
+          C1hold<-as.matrix(as.vector(C1hold))
+          C1covs<-cbind(C1covs,C1hold)
+          colnames(C1hold)<-colnames(covars1)[i]
+          C1<-cbind(C1,C1hold)
+        }
+        covnames<-c(covnames,colnames(covars1)[i])
+      }
+    }
+  
+  data.vecs<-cbind(data.vecs,C1covs)
+  }
+
+
+  C2covs<-NULL
+  if(is.null(covars2))
+  {
+    C2<-NULL
+  } else {
+    if(is.null(dim(covars2)))
+    {
+      C2<-matrix(covars2,nspp1,nspp2,byrow=TRUE)
+      if(is.factor(covars2))
+      {
+        C2<-as.matrix(as.vector(C2))
+        C2covs<-cbind(C2covs,C2)
+        C2<-as.matrix(model.matrix(~as.factor(C2)-1)[,-1])
+        colnames(C2)<-paste(rep("covar2",length(levels(covars2))-1),levels(covars2)[-1],sep="-")
+      } else {
+        C2<-as.matrix(as.vector(C2))
+        C2covs<-cbind(C2covs,C2)
+      }
+      covnames<-c(covnames,"covar2")
+    } else {
+      C2<-NULL
+      for(i in 1:dim(covars2)[2])
+      {
+        C2hold<-matrix(covars2[,i],nspp1,nspp2,byrow=TRUE)
+        if(is.factor(covars2[,i]))
+        {
+          C2hold<-as.matrix(as.vector(C2hold))
+          C2covs<-cbind(C2covs,C2hold)
+          C2hold<-as.matrix(model.matrix(~as.factor(C2hold)-1)[,-1])
+          colnames(C2hold)<-paste(rep(colnames(covars2)[i],length(levels(covars2[,i]))-1),levels(covars2[,i])[-1],sep="-")
+          C2<-cbind(C2,C2hold)
+        } else { 
+          C2hold<-as.matrix(as.vector(C2hold))
+          C2covs<-cbind(C2covs,C2hold)
+          colnames(C2hold)<-colnames(covars2)[i]
+          C2<-cbind(C2,C2hold)
+        }
+        covnames<-c(covnames,colnames(covars2)[i])
+      }
+    }
+  
+  data.vecs<-cbind(data.vecs,C2covs)
+  }
+  
+  
+# Make U, the combined matrix of covariates 
+  U<-NULL
+  if(is.null(C1) & is.null(C2))
+  {
+    U<-rep(1,length(A))
+  } else {
+    
+    if(is.null(C1))
+    {
+      U<-rep(1,length(A))
+    } else {
+      U<-cbind(rep(1,length(A)),C1)
+    }
+    
+    if(is.null(C2))
+    {
+      U<-U
+    } else {
+      U<-cbind(U,C2)
+    }
+  }
+
+  # Begin to organize output
+  if(is.null(dim(U)))
+  {
+    data.vecs<-data.frame(A)
+    colnames(data.vecs)<-"associations"
+  } else {    
+    colnames(data.vecs)<-c("associations", covnames)
+  }
+  rownames(data.vecs)<-pairnames
+  
+  ######
+  # Calculate Star Regression Coefficients
+  #calculate for the star (assuming no phylogenetic correlation)
+	astar<-solve((t(U)%*%U),(t(U)%*%A))
+	MSETotal<-cov(A)
+	s2aStar<-as.vector(MSETotal)*qr.solve((t(U)%*%U))
+	sdaStar<-t(diag(s2aStar)^(.5))
+	approxCFstar<-rbind(t(astar)-1.96%*%sdaStar, t(astar), t(astar)+1.96%*%sdaStar)
+  Estar<-A-U%*%astar
+  MSEStar<-cov(matrix(Estar))
+  #######
+  
+  if(is.null(tree1) | is.null(tree2))
+  {
+    coefs<-approxCFstar
+    rownames(coefs)<-c("lower CI 95%","estimate","upper CI 95%")
+    colnames(coefs)<-paste("star",c("intercept",colnames(U)[-1]),sep="-")
+    MSEs<-cbind(data.frame(MSETotal),data.frame(MSEStar))
+    return(list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=data.frame(Estar),bootvalues=NULL))
+  } else {
+  
+    #tree1 is the phylogeny for the rows
+    #tree2 is the phylogeny for the columns
+    
+    #Clean Trees
+    if(is(tree1)[1]=="phylo")
+    {
+      if(is.null(tree1$edge.length)){tree1<-compute.brlen(tree1, 1)}  #If phylo has no given branch lengths
+      V1<-vcv.phylo(tree1,cor=TRUE)
+      V1<-V1[rownames(assocs),rownames(assocs)]
+    } else {
+      V1<-tree1[rownames(assocs),rownames(assocs)]
+    }    
+    
+    if(is(tree2)[1]=="phylo")
+    {
+    if(is.null(tree2$edge.length)){tree2<-compute.brlen(tree2, 1)}  #If phylo has no given branch lengths
+      V2<-vcv.phylo(tree2,cor=TRUE)
+      V2<-V2[colnames(assocs),colnames(assocs)]
+    } else {
+      V2<-tree2[colnames(assocs),colnames(assocs)]
+    }
+  
+    #Calculate Regression Coefficents for the base (assuming strict brownian motion evolution, ds=1)
+    V1<-as.matrix(V1)
+    V2<-as.matrix(V2)
+    
+    
+    V1<-V1/det(V1)^(1/nspp1)   # scale covariance matrices (this reduces numerical problems caused by
+  	V2<-V2/det(V2)^(1/nspp2)   # determinants going to infinity or zero)
+  	V<-kronecker(V2,V1)  
+    invV<-qr.solve(V)
+    
+    abase<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A)))   #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+    MSEBase<-(t(A-U%*%abase)%*%invV%*%(A-U%*%abase))/(nassocs-1)  
+    s2abase<-as.vector(MSEBase)*qr.solve(t(U)%*%invV%*%U)
+  	sdabase<-t(diag(s2abase)^(.5))
+    approxCFbase<-rbind(t(abase)-1.96%*%sdabase, t(abase), t(abase)+1.96%*%sdabase)
+    Ebase<-A-U%*%abase
+    
+    ###################
+    # Full EGLS estimates of phylogenetic signal
+    ##################
+  	initV1<-V1
+  	initV2<-V2
+  
+  	# tau = tau_i + tau_j where tau_i equals the node to tip distance
+  	tau1<-matrix(diag(initV1),nspp1,nspp1) + matrix(diag(initV1),nspp1,nspp1)-2*initV1
+  	tau2<-matrix(diag(initV2),nspp2,nspp2) + matrix(diag(initV2),nspp2,nspp2)-2*initV2
+    
+    # The workhorse function to estimate ds
+    pegls<-function(parameters)
+    {
+      d1<-abs(parameters[1])
+      d2<-abs(parameters[2])
+  	
+      V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
+      V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
+  
+      V1<-V1/det(V1)^(1/nspp1)   # model of coevolution
+      V2<-V2/det(V2)^(1/nspp2)
+      V<-kronecker(V2,V1)  
+      invV<-qr.solve(V)
+    
+      a<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A)))   #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+      E<-(A-U%*%a)
+      #MSE
+      t(E)%*%invV%*%E/(nassocs-1)
+    }
+    # estimate d1 and d2 via Nelder-Mead method same as fminsearch in Matlab, by minimizing MSE
+    est<-optim(pstart,pegls,control=list(maxit=maxit))        
+    MSEFull<-est$value
+  	d1<-abs(est$par[1])
+  	d2<-abs(est$par[2])
+  	
+    # Calculate EGLS coef w estimated ds 
+  	V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
+    V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
+    V1<-V1/det(V1)^(1/nspp1)   # model of coevolution
+    V2<-V2/det(V2)^(1/nspp2)
+    V<-kronecker(V2,V1)  
+    invV<-qr.solve(V)
+    aFull<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A)))   #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+    s2aFull<-as.vector(MSEFull)*qr.solve(t(U)%*%invV%*%U)
+  	sdaFull<-t(diag(s2aFull)^(.5))
+  	approxCFfull<-rbind(t(aFull)-1.96%*%sdaFull, t(aFull), t(aFull)+1.96%*%sdaFull)
+    Efull<-A-U%*%aFull
+    ########################################
+    
+    #organize output
+    coefs<-cbind(approxCFfull,approxCFstar,approxCFbase)
+    rownames(coefs)<-c("approx lower CI 95%","estimate","approx upper CI 95%")
+    colnames(coefs)<-c(paste("full",c("intercept",colnames(U)[-1]),sep="-"),paste("star",c("intercept",colnames(U)[-1]),sep="-"),paste("base",c("intercept",colnames(U)[-1]),sep="-"))
+    coefs<-t(coefs)
+    CI.boot<-NULL
+    MSEs<-cbind(data.frame(MSETotal),data.frame(MSEFull), data.frame(MSEStar), data.frame(MSEBase))
+    residuals<-cbind(data.frame(Efull),data.frame(Estar),data.frame(Ebase))
+    rownames(residuals)<-pairnames
+  
+    #bootstrap CIs
+    
+    if(bootstrap)
+    {
+      Vtrue<-V
+		  Atrue<-A
+		  atrue<-aFull
+		  dtrue<-c(d1,d2)
+		  ehold<-eigen(Vtrue,symmetric=TRUE)
+      L<-ehold$vectors[,nassocs:1]    #A or L
+      G<-sort(ehold$values)      #D
+      iG<-diag(G^-.5)    #iD
+
+      # Construct Y = TT*A so that 
+			# E{(Y-b)*(Y-b)'} = E{(TT*A-b)*(TT*A-b)'}
+			#				  = T*V*T'
+			#				  = I
+
+      TT<-iG%*%t(L)
+		  Y<-TT%*%Atrue
+		  Z<-TT%*%U
+
+		  res<-(Y-Z%*%atrue)	# residuals in orthogonalized space
+		  invT<-qr.solve(TT)
+			
+      bootlist=NULL
+      for (i in 1:nreps)
+      {
+        randindex<-sample(1:nassocs,replace=TRUE)	# vector of random indices
+        #randindex=1:nassocs					# retain order
+        YY<-Z%*%atrue+res[randindex]	# create new values of Y with random residuals
+        A<-invT%*%YY	# back-transformed data
+        pstart<-dtrue+c(0,.1)
+        estRand<-optim(pstart,pegls,control=list(maxit=maxit))
+        MSEFullrand<-estRand$value
+        d1rand<-abs(estRand$par[1])
+  	    d2rand<-abs(estRand$par[2])
+  	
+        # Calculate EGLS coef w estimated ds 
+        V1<-(d1rand^tau1)*(1-d1rand^(2*initV1))/(1-d1rand^2)
+        V2<-(d2rand^tau2)*(1-d2rand^(2*initV2))/(1-d2rand^2)
+        V1<-V1/det(V1)^(1/nspp1)   # model of coevolution
+        V2<-V2/det(V2)^(1/nspp2)
+        V<-kronecker(V2,V1)  
+        invV<-qr.solve(V)
+        arand<-solve((t(U)%*%invV%*%U),((t(U)%*%invV%*%A)))   #NOTE: Ives in his Matlab code uses a Left matrix division symbol (\)
+        
+        bootlist<-rbind(bootlist,c(d1rand, d2rand, t(arand)))
+      }
+      nr<-dim(bootlist)[1]
+      nc<-dim(bootlist)[2]
+      
+      #Calculate bootstrapped CIs
+      alpha<-0.05  # alpha is always 0.05, but could change here
+      conf<-NULL
+      for(j in 1:nc)
+      {
+        bootconf<-quantile(bootlist[,j],probs = c(alpha/2, 1-alpha/2))
+        conf<-rbind(conf,c(bootconf[1],bootconf[2]))
+      }
+      signal.strength<-data.frame(cbind(conf[1:2,1],dtrue,conf[1:2,2]))
+      rownames(signal.strength)<-c("d1","d2")
+      colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+
+      #organize output
+      CI.boot<-conf
+      rownames(CI.boot)<-c("d1","d2","intercept",colnames(U)[-1])
+      colnames(CI.boot)<-c("booted lower CI 95%","booted upper CI 95%")
+      colnames(bootlist)<-c("d1","d2","intercept",colnames(U)[-1])
+      return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),residuals=residuals,bootvalues=bootlist))
+    
+    } else {
+    ########
+    # If bootstrapping not performed
+    
+    conf<-matrix(NA,2,2)
+    signal.strength<-data.frame(cbind(conf[1,],dtrue,conf[2,]))
+    rownames(signal.strength)<-c("d1","d2")
+    colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
+    return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),residuals=residuals,bootvalues=NULL))
+    }
+  }                                                                                                       
+}
\ No newline at end of file

Modified: pkg/R/phylodiversity.R
===================================================================
--- pkg/R/phylodiversity.R	2008-07-19 00:51:58 UTC (rev 140)
+++ pkg/R/phylodiversity.R	2008-07-19 00:55:55 UTC (rev 141)
@@ -137,6 +137,7 @@
 
   if(is(tree)[1]=="phylo")
   {
+    if(is.null(tree$edge.length)){tree<-compute.brlen(tree, 1)}  #If phylo has no given branch lengths
     tree<-prune.sample(samp,tree)
     # Make sure that the species line up
     samp<-samp[,tree$tip.label]
@@ -262,8 +263,11 @@
     flag=2
   }
 
+  samp<-as.matrix(samp)
+  
   if(is(tree)[1]=="phylo")
   {
+    if(is.null(tree$edge.length)){tree<-compute.brlen(tree, 1)}  #If phylo has no given branch lengths
     tree<-prune.sample(samp,tree) 
     # Make sure that the species line up
     samp<-samp[,tree$tip.label]
@@ -323,6 +327,7 @@
 
   if(is(tree)[1]=="phylo")
   {
+    if(is.null(tree$edge.length)){tree<-compute.brlen(tree, 1)}  #If phylo has no given branch lengths
     tree<-prune.sample(samp,tree)
     # Make sure that the species line up
     samp<-samp[,tree$tip.label]
@@ -378,6 +383,7 @@
   }  
   if(is(tree)[1]=="phylo")
   {
+    if(is.null(tree$edge.length)){tree<-compute.brlen(tree, 1)}  #If phylo has no given branch lengths
     tree<-prune.sample(samp,tree)
     # Make sure that the species line up
     samp<-samp[,tree$tip.label]
@@ -400,7 +406,7 @@
   Cmatrix<-Cmatrix[indexcov,indexcov]
   samp<-samp[,indexcov]
 
-  obs.PSV<-mean(psv(samp,Cmatrix,compute.var=FALSE)[,1])
+  obs.PSV<-mean(psv(samp,Cmatrix,compute.var=FALSE)[,1],na.rm=TRUE)
 
   # numbers of locations and species
   nlocations<-dim(samp)[1]
@@ -411,7 +417,7 @@
   {
     spp.samp<-samp[,-j]
     spp.Cmatrix<-Cmatrix[-j,-j]
-    spp.PSV<-mean(psv(spp.samp,spp.Cmatrix,compute.var=FALSE)[,1])
+    spp.PSV<-mean(psv(spp.samp,spp.Cmatrix,compute.var=FALSE)[,1],na.rm=TRUE)
     spp.PSVs<-c(spp.PSVs,spp.PSV)
   }
   spp.PSVout<-(spp.PSVs-obs.PSV)/sum(abs(spp.PSVs-obs.PSV))
@@ -440,6 +446,7 @@
 
 pd<-function(samp,tree){
   
+  if(is.null(tree$edge.length)){tree<-compute.brlen(tree, 1)}  #If phylo has no given branch lengths
   # Make sure that the species line up
   tree<-prune.sample(samp,tree)
   samp<-samp[,tree$tip.label]

Copied: pkg/R/phylostruct.R (from rev 140, branches/gsoc/R/phylostruct.R)
===================================================================
--- pkg/R/phylostruct.R	                        (rev 0)
+++ pkg/R/phylostruct.R	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,40 @@
+phylostruct<-function(samp,tree,env=NULL,metric=c("psv","psr","pse","psc","sppregs"),null.model=c("frequency","richness","both"),runs=10,alpha=0.05,fam="binomial"){
+
+  metric<-match.arg(metric)
+  null.model<-match.arg(null.model)
+  if(metric=="sppregs")
+  {
+  nulls<-t(replicate(runs,sppregs(randomizeSample(samp,null.model=null.model),env,tree,fam=fam)$correlations))
+  obs<-sppregs(samp,env,tree,fam=fam)$correlations
+  mean.null<-apply(nulls,2,mean)
+  quantiles.null<-t(apply(nulls,2,quantile,probs=c(alpha/2,1-(alpha/2))))
+  
+  return(list(metric=metric,null.model=null.model,runs=runs,obs=obs,mean.null=mean.null
+                ,quantiles.null=quantiles.null,phylo.structure=NULL,nulls=nulls))
+
+
+  } else {
+
+    nulls<-switch(metric,
+                       psv = replicate(runs,mean(psv(randomizeSample(samp,null.model=null.model),tree,compute.var=FALSE)[,1])),
+                       psr = replicate(runs,mean(psr(randomizeSample(samp,null.model=null.model),tree,compute.var=FALSE)[,1])),
+                       pse = replicate(runs,mean(pse(randomizeSample(samp,null.model=null.model),tree)[,1])),
+                       psc = replicate(runs,mean(psc(randomizeSample(samp,null.model=null.model),tree)[,1])))
+    quantiles.null<-quantile(nulls,probs=c(alpha/2,1-(alpha/2)))
+    mean.null<-mean(nulls)
+    mean.obs<-switch(metric,
+                       psv = mean(psv(samp,tree,compute.var=FALSE)[,1]),
+                       psr = mean(psr(samp,tree,compute.var=FALSE)[,1]),
+                       pse = mean(pse(samp,tree)[,1]),
+                       psc = mean(psc(samp,tree)[,1]))
+
+    if(mean.obs<=quantiles.null[1])
+    {phylo.structure="underdispersed"
+    } else {if(mean.obs>=quantiles.null[2]){
+    phylo.structure="overdispersed"} else {phylo.structure="random"}
+    }
+    
+    return(list(metric=metric,null.model=null.model,runs=runs,mean.obs=mean.obs,mean.null=mean.null
+                ,quantiles.null=quantiles.null,phylo.structure=phylo.structure,null.means=nulls))
+  }
+}

Modified: pkg/R/specaccum.psr.R
===================================================================
--- pkg/R/specaccum.psr.R	2008-07-19 00:51:58 UTC (rev 140)
+++ pkg/R/specaccum.psr.R	2008-07-19 00:55:55 UTC (rev 141)
@@ -35,8 +35,8 @@
     perm[, i]<-r.x
   }
   sites <- 1:n
-  specaccum <- apply(perm, 1, mean)
-  sdaccum <- apply(perm, 1, sd)
+  specaccum <- apply(perm, 1, mean, na.rm=TRUE)
+  sdaccum <- apply(perm, 1, sd, na.rm=TRUE)
   out <- list(call = match.call(), method = method, sites = sites, richness = specaccum, sd = sdaccum, perm = perm)
   class(out) <- "specaccum"
   out

Copied: pkg/R/sppregs.R (from rev 140, branches/gsoc/R/sppregs.R)
===================================================================
--- pkg/R/sppregs.R	                        (rev 0)
+++ pkg/R/sppregs.R	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,137 @@
+sppregs<-function(samp,env,tree=NULL,fam="gaussian"){
+  
+  if(is.null(tree))
+  {
+    cors.phylo=NULL
+  } else {                # If a tree is provided
+  
+    if(is(tree)[1]=="phylo")
+    {
+      tree<-prune.sample(samp,tree)
+      # Make sure that the species line up
+      samp<-samp[,tree$tip.label]
+      # Make a correlation matrix of the species pool phylogeny
+      Cmatrix<-vcv.phylo(tree,cor=TRUE)
+    } else {
+      Cmatrix<-tree
+      species<-colnames(samp)
+      preval<-colSums(samp)/sum(samp)
+      species<-species[preval>0]
+      Cmatrix<-Cmatrix[species,species]
+      samp<-samp[,colnames(Cmatrix)]
+    }
+    cors.phylo<-Cmatrix[lower.tri(Cmatrix)] #vector of pairwise phylogenetic correlations among species
+    samp<-samp[,colnames(Cmatrix)]          #only those species in the phylogeny are regressed
+  }
+  
+  nplots<-dim(samp)[1]     # number of units in the occurence data
+  nspp<-dim(samp)[2]       # number of species  in the occurence data
+  sppnames<-colnames(samp) # vector of species names
+
+  #make formula for model fit to each species
+
+  if(is.null(dim(env)))
+  {
+    nenvs<-1
+    envnames<-names(env)
+    if (is.null(envnames)){envnames<-"env"}
+    formu<-paste("y~",envnames) #Make formula
+  } else {
+    nenvs<-dim(env)[2]       # number of environmental variables
+    envnames<-colnames(env)  # vecor of env names
+    formu<-paste("y~",envnames[1]) #Make formula
+    for (t in 2:nenvs)
+    {
+      formu<-paste(formu,envnames[t],sep="+")
+    }
+  }
+
+  #Fit either the logistic or the standard regressions
+  spp.resids<-NULL                           #holds each species residuals y-yhat
+  spp.coef<-NULL                             #holds the coefficients of each species 
+  spp.se<-NULL                               #holds the se of the coefs.
+  spp.fits<-NULL                             #holds the yhats
+               
+  if(fam=="gaussian")
+  {
+      for(i in 1:nspp)
+      {
+        y<-samp[,i]
+        mod<-glm(formu,data=cbind(y,env))
+        spp.resids<-cbind(spp.resids,mod$y-mod$fitted.values)
+        spp.fits<-cbind(spp.fits,mod$fitted.values)
+        spp.coef<-cbind(spp.coef,summary(mod)$coef[,1])
+        spp.se<-cbind(spp.se,summary(mod)$coef[,2])
+      }
+      cors.resid<-cor(spp.resids)[lower.tri(cor(spp.resids))]  #a vector of residual correlations among species
+
+  } else {
+  
+    if (!require(brglm)) {
+        stop("The 'brglm' package is required to use this function with argument fam=binomial.")
+    }
+
+    samp[samp>0]<-1             #make samp a pa matrix
+    for(i in 1:nspp)
+    {
+      y<-samp[,i]
+      mod<-brglm(formu,data=data.frame(cbind(y,env)))
+      spp.resids<-cbind(spp.resids,mod$y-mod$fitted.values)
+      spp.fits<-cbind(spp.fits,mod$fitted.values)
+      spp.coef<-cbind(spp.coef,summary(mod)$coef[,1])
+      spp.se<-cbind(spp.se,summary(mod)$coef[,2])
+    }
+    
+    # calcualte the correlations among species residuals given that they come from a binomial process
+    cor.r<-matrix(0,nrow=nspp,ncol=nspp)
+    for (j in 1:dim(spp.resids)[1])
+    {
+      invv<-matrix((spp.fits[j,]*(1-spp.fits[j,]))^(-.5),nrow=1)
+      invv<-t(invv)%*%invv
+      invv[invv>(10^10)]<-(10^10)
+		  r<-matrix(spp.resids[j,],nrow=1)
+      cor.r=cor.r+((t(r)%*%r)*invv)
+    }
+    RC=cor.r/dim(spp.resids)[1]
+    cors.resid<-RC[lower.tri(RC)] # Observed correlations of residuals among species
+  }
+  
+  #add names to output
+  colnames(spp.coef)<-colnames(samp)
+  colnames(spp.se)<-colnames(samp)
+  colnames(spp.resids)<-colnames(samp)
+  colnames(spp.fits)<-colnames(samp)
+  pairnames=NULL  # make a vector of pairwise comparison names
+  for (o in 1:(nspp-1))
+  {
+    for (u in (o+1):nspp)
+    {
+      pairnames<-c(pairnames,paste(sppnames[o],sppnames[u],sep="-"))
+    }
+  }
+  cors.pa<-cor(samp)[lower.tri(cor(samp))] #obs pa correlations
+  names(cors.pa)<-pairnames
+  names(cors.resid)<-pairnames
+  if(is.null(cors.phylo)==FALSE)
+  {
+    names(cors.phylo)<-pairnames
+  }
+
+  correlations<-c(cor(cors.phylo,cors.pa,use="pairwise.complete.obs"),
+                  cor(cors.phylo,cors.resid,use="pairwise.complete.obs"),
+                  cor(cors.phylo,cors.resid-cors.pa,use="pairwise.complete.obs"))
+  names(correlations)<-c("occurence-phylo","residual-phylo","change-phylo")
+  return(list(family=fam,residuals=spp.resids,coefficients=spp.coef,std.errors=spp.se,correlations=correlations,
+              cors.pa=cors.pa,cors.resid=cors.resid,cors.phylo=cors.phylo))
+
+}
+
+sppregs.plot<-function(sppreg,rows=c(1,3),cex.mag=1,x.label="phylogenetic correlations",y.label=c("occurrence correlations w/ env","occurrence correlations wo/ env","change in correlations")){
+  par(mfrow=rows,las=1,cex=cex.mag)
+  plot(sppreg$cors.phylo,sppreg$cors.pa,xlab=x.label,ylab=y.label[1],main=paste("cor =",round(cor(sppreg$cors.phylo,sppreg$cors.pa,use="pairwise.complete.obs"),4)))
+  abline(0,0,lty=2)
+  plot(sppreg$cors.phylo,sppreg$cors.resid,xlab=x.label,ylab=y.label[2],main=paste("cor =",round(cor(sppreg$cors.phylo,sppreg$cors.resid,use="pairwise.complete.obs"),4)))
+  abline(0,0,lty=2)
+  plot(sppreg$cors.phylo,sppreg$cors.resid-sppreg$cors.pa,xlab=x.label,ylab=y.label[3],main=paste("cor =",round(cor(sppreg$cors.phylo,sppreg$cors.resid-sppreg$cors.pa,use="pairwise.complete.obs"),4)))
+  abline(0,0,lty=2)
+}
\ No newline at end of file

Copied: pkg/man/pblm.Rd (from rev 140, branches/gsoc/man/pblm.Rd)
===================================================================
--- pkg/man/pblm.Rd	                        (rev 0)
+++ pkg/man/pblm.Rd	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,62 @@
+\name{pblm}
+\alias{pblm}
+\title{ Phylogenetic Bipartite Linear Model }
+\description{
+  Fits a linear model to the association strengths of a bipartite data set with or without phylogenetic correlation among the interacting species
+}
+\usage{
+pblm(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5))
+}
+
+\arguments{
+  \item{assocs}{ A matrix of association strengths among two sets of interacting species }
+  \item{tree1}{ A phylo tree object or a phylogenetic covariance matrix for the rows of \code{assocs} }
+  \item{tree2}{ A phylo tree object or a phylogenetic covariance matrix for the columns of \code{assocs}}
+  \item{covars1}{ A matrix of covariates (e.g., traits) for the row species of \code{assocs} }
+  \item{covars2}{ A matrix of covariates (e.g., traits) for the column species of \code{assocs} }
+  \item{bootstrap}{ logical, bootstrap confidence intervals of the parameter estimates }
+  \item{nreps}{ Number of bootstrap replicated data sets to estimate parameter CIs }
+  \item{maxit}{ as in \code{\link{optim}} }
+  \item{pstart}{ starting values of the two phylogenetic signal strength parameters passed to \code{\link{optim}} }
+}
+
+\details{
+ Fit a linear model with covariates using estimated generalized least squares to the association strengths between two sets of interacting species. 
+ Associations can be either binary or continuous. If phylogenies of the two sets of interacting species are supplied, 
+ two \emph{phyogenetic signal strength} parameters (\emph{d1} and \emph{d2}), one for each species set, based on an Ornstein-Uhlenbeck model of 
+ evolution with stabilizing selection are estimated. Values of \emph{d=1} indicate no stabilizing selection and correspond to the Brownian motion model of 
+ evolution; \emph{0<d<1} represents stabilizing selection; \emph{d=0} depicts the absence of phylogenetic correlation (i.e., a star phylogeny); and \emph{d>1} corresponds 
+ to disruptive selection where phylogenetic signal is amplified. Confidence intervals for these and the other parameters can be estimated with 
+ bootstrapping.
+ 
+ }
+
+\value{
+	The function returns a list with:
+  \item{MSE}{ total, full (each \emph{d} estimated), star (\emph{d=0}), and base (\emph{d=1}) mean squared errors }
+  \item{signal.strength}{ two estimates of phylogenetic signal strength }
+  \item{coefficients}{ estimated intercept and covariate coefficients with approximate 95 percent CIs for the three model types (full, star, base) }
+  \item{CI.boot}{ 95 percent CIs for all parameters }
+  \item{variates}{ matrix of model variates (can be used for plotting) }
+  \item{residuals}{ matrix of residuals from the three models (full, star and base) }
+  \item{bootvalues}{ matrix of parameters estimated from the \code{nreps} bootstrap replicated data sets used to calculate CIs }
+  
+}
+
+\note{Covariates that apply to both species sets (e.g., sampling site) should be supplied in the covariate matrix of the set with the most species.
+\cr
+\cr
+Bootstrapping CIs is slow due to the function \code{\link{optim}} used to estimate the model parameters. See appendix A in Ives and Godfray (2006) 
+for a discussion about this boostrapping procedure}
+
+
+
+\references{Ives A.R. & Godfray H.C. (2006) Phylogenetic analysis of trophic associations. The American Naturalist, 168, E1-E14 \cr
+\cr
+Blomberg S.P., Garland T.J. & Ives A.R. (2003) Testing for phylogenetic signal in comparative data: Behavioral traits are more labile. Evolution, 57, 717-745
+}
+
+\author{ Matthew Helmus \email{mrhelmus at gmail.com} }
+\seealso{ the K metric, \code{\link{Kcalc}}, uses the same Ornstein-Uhlenbeck model of evolution }
+
+\keyword{univar}
\ No newline at end of file

Copied: pkg/man/phylostruct.Rd (from rev 140, branches/gsoc/man/phylostruct.Rd)
===================================================================
--- pkg/man/phylostruct.Rd	                        (rev 0)
+++ pkg/man/phylostruct.Rd	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,46 @@
+\name{phylostruct}
+\alias{phylostruct}
+\title{ Permutations to Test for Phylogenetic Signal in Community Composition }
+\description{ Randomize sample/community data matrices to create null distributions of given metrics
+}
+\usage{
+phylostruct(samp, tree, env=NULL, metric=c("psv","psr","pse","psc","sppregs"), null.model=c("frequency","richness","both"),
+            runs=10, alpha=0.05, fam="binomial")
+}
+\arguments{
+  \item{samp}{ community data matrix, species as columns, communities as rows }
+  \item{tree}{ phylo tree object or a phylogenetic covariance matrix }
+  \item{env}{ environmental data matrix }
+  \item{metric}{ if \code{metric="psv"}, \code{"psr"}, \code{"pse"}, or \code{"psc"} compares the observed mean of the respective metric to a null distribution at a given alpha; if \code{metric="sppregs"} compares the three correlations produced by \code{\link{sppregs}} to null distributions }
+  \item{null.model}{ permutation procedure used to create the null distribution, see \code{\link{randomizeSample}}}
+  \item{runs}{ the number of permutations to create the distribution, a rule of thumb is (number of communities)/alpha  }
+  \item{alpha}{ probability value to compare the observed mean/correlations to a null distribution }
+  \item{fam}{ as in \code{\link{sppregs}} }
+}
+
+\details{The function creates null distributions for the \code{\link{psd}} set of metrics and for the correlations of \code{\link{sppregs}} from observed community data sets.}
+
+\value{
+\item{metric}{ metric used }
+\item{null.model}{ permutation used }
+\item{runs}{ number of permutations }
+\item{obs}{ observed mean value of a particular metric or the three observed correlations from \code{\link{sppregs}} }
+\item{mean.null}{ mean(s) of the null distribution(s) }
+\item{quantiles.null}{  quantiles of the null distribution(s) to compare to \code{obs}; determined by \code{alpha}}
+\item{phylo.structure}{ if \code{obs} less than (alpha/2), \code{phylo.structure="underdispersed"}; if \code{obs} greater than (1-alpha/2), \code{phylo.structure="overdispersed"}; otherwise \code{phylo.structure="random"} and NULL if \code{metric="sppregs"}}
+\item{nulls}{ null values of the distribution(s)}
+}
+
+\references{ Helmus M.R., Bland T.J., Williams C.K. & Ives A.R. (2007a) Phylogenetic measures of biodiversity. American Naturalist, 169, E68-E83
+\cr
+\cr
+Helmus M.R., Savage K., Diebel M.W., Maxted J.T. & Ives A.R. (2007b) Separating the determinants of phylogenetic community structure. Ecology Letters, 10, 917-925
+\cr
+\cr
+Gotelli N.J. (2000) Null model analysis of species co-occurrence patterns. Ecology, 81, 2606-2621}
+
+\author{ Matthew Helmus \email{mrhelmus at gmail.com} }
+\seealso{ \code{\link{psd}} ,\code{\link{sppregs}}, \code{\link{randomizeSample}} }
+
+\keyword{univar}
+

Copied: pkg/man/sppregs.Rd (from rev 140, branches/gsoc/man/sppregs.Rd)
===================================================================
--- pkg/man/sppregs.Rd	                        (rev 0)
+++ pkg/man/sppregs.Rd	2008-07-19 00:55:55 UTC (rev 141)
@@ -0,0 +1,57 @@
+\name{sppregs}
+\alias{sppregs}
+\alias{sppregs.plot}
+\title{ Regressions to Separate Phylogenetic Attraction and Repulsion }
+\description{
+  Fit regressions on species abundance or presence/absence across communities and calculate phylogenetic correlations
+}
+\usage{
+sppregs(samp, env, tree=NULL, fam="gaussian")
+sppregs.plot(sppreg, rows=c(1,3), cex.mag=1, x.label="phylogenetic correlations", 
+y.label=c("occurrence correlations w/ env","occurrence correlations wo/ env","change in correlations"))
+}
+
+\arguments{
+  \item{samp}{ community data matrix, species as columns, communities as rows }
+  \item{env}{ environmental data matrix }
+  \item{tree}{  phylo tree object or a phylogenetic covariance matrix }
+  \item{fam}{ with \code{fam = "gaussian"} fits with \code{\link[stats]{glm}}; with \code{fam = "binomial"} fit logistic regressions with Firth's bias-reduction using \code{\link[brglm]{brglm}}  }
+  \item{sppreg}{ object from function \code{\link[picante]{sppregs}} }
+  \item{rows}{ \code{rows = c(1,3)} plots in a row; \code{rows = c(3,1)} in a column }
+  \item{cex.mag}{ value for \code{cex} in \code{par} }
+  \item{x.label}{ x axis labels }
+  \item{y.label}{ y axis labels }
+}
+
+\details{For each species in \code{samp}, the function fits regressions of species presence/absence or abundances
+          on the environmental variables supplied in \code{env}; and calculates the \code{(n^2-n)/2} pairwise species correlations
+          between the residuals of these fits and pairwise species phylogenetic correlations. The residuals can be
+          thought of as the presence/absence of species across sites/communities after accounting for how species respond
+          to environmental variation across sites. Each set of coefficients can be tested for phylogenetic signal with, for example, 
+          the function \code{\link{phylosignal}}. 
+          \cr
+          \cr
+          The function \code{sppregs.plot} produces a set of three plots of the correlations of pairwise species phylogenetic correlations versus: 
+          the observed pairwise correlations of species across communities, the residual correlations, and the pairwise differences between (i.e., the 
+          change in species co-occurrence once the environmental variables are taken into account). The significance of these correlations can be tested
+          via permutation with the function \code{\link{phylostruct}}.
+}
+\note{The function requires the library \code{\link[brglm]{brglm}} to perform logistic regressions}
+          
+\value{
+\item{family}{ the regression error distribution }
+\item{residuals}{ the residuals from each species regression }
+\item{coefficients}{ the estimated coefficients from each species regression }
+\item{std.errors}{ the standard errors of the coefficients }
+\item{correlations}{ correlations of pairwise species phylogenetic correlations between: the observed pairwise correlations of species across communities, the residual correlations, and the pairwise differences between the two }
+\item{cors.pa}{ the observed pairwise correlations of species across communities }
+\item{cors.resid}{ the residual pairwise correlations of species across communities }
+\item{cors.phylo}{ the phylogenetic pairwise correlations among species }
+}
+
+\references{ Helmus M.R., Savage K., Diebel M.W., Maxted J.T. & Ives A.R. (2007) Separating the determinants of phylogenetic community structure. Ecology Letters, 10, 917-925 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com} }
+\seealso{\code{\link{phylostruct}}, \code{\link{phylosignal}}}
+\keyword{univar}
+
+