[Picante-commits] r163 - in pkg: . R man

Thu Aug 14 00:30:49 CEST 2008

Author: skembel
Date: 2008-08-14 00:30:49 +0200 (Thu, 14 Aug 2008)
New Revision: 163

Added:
   pkg/R/pblmpredict.R
   pkg/src/
Modified:
   pkg/R/pblm.R
   pkg/R/phylostruct.R
   pkg/R/randomizeSample.R
   pkg/man/pblm.Rd
   pkg/man/phylostruct.Rd
   pkg/man/randomizeSample.Rd
Log:
Merge final GSoC updates back into trunk

Modified: pkg/R/pblm.R
===================================================================

--- pkg/R/pblm.R	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/R/pblm.R	2008-08-13 22:30:49 UTC (rev 163)
@@ -10,7 +10,6 @@
 	nspp2<-dim(assocs)[2]
   sppnames1<-rownames(assocs)
   sppnames2<-colnames(assocs)
-  
   #make names of species pairs
   pairnames=NULL  # make a vector of pairwise comparison names
   for (o in 1:(nspp2))
@@ -154,17 +153,25 @@
 	s2aStar<-as.vector(MSETotal)*qr.solve((t(U)%*%U))
 	sdaStar<-t(diag(s2aStar)^(.5))
 	approxCFstar<-rbind(t(astar)-1.96%*%sdaStar, t(astar), t(astar)+1.96%*%sdaStar)
-  Estar<-A-U%*%astar
+  Pstar<-U%*%astar
+  Estar<-A-Pstar
   MSEStar<-cov(matrix(Estar))
+  
   #######
-  
   if(is.null(tree1) | is.null(tree2))
   {
     coefs<-approxCFstar
     rownames(coefs)<-c("lower CI 95%","estimate","upper CI 95%")
     colnames(coefs)<-paste("star",c("intercept",colnames(U)[-1]),sep="-")
     MSEs<-cbind(data.frame(MSETotal),data.frame(MSEStar))
-    return(list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=data.frame(Estar),bootvalues=NULL))
+    Pstar<-data.frame(Pstar)
+    colnames(Pstar)<-"star"
+    Estar<-data.frame(Estar)
+    colnames(Estar)<-"star"
+    output<-list(MSE=MSEs,signal.strength=NULL,coefficients=data.frame(t(coefs)),CI.boot=NULL,variates=data.frame(data.vecs),residuals=Estar,predicted=Pstar,bootvalues=NULL,Vfull=NULL)
+    class(output)<-"pblm"
+    return(output)
+    
   } else {
   
     #tree1 is the phylogeny for the rows
@@ -193,7 +200,6 @@
     V1<-as.matrix(V1)
     V2<-as.matrix(V2)
     
-    
     V1<-V1/det(V1)^(1/nspp1)   # scale covariance matrices (this reduces numerical problems caused by
   	V2<-V2/det(V2)^(1/nspp2)   # determinants going to infinity or zero)
   	V<-kronecker(V2,V1)  
@@ -204,8 +210,9 @@
     s2abase<-as.vector(MSEBase)*qr.solve(t(U)%*%invV%*%U)
   	sdabase<-t(diag(s2abase)^(.5))
     approxCFbase<-rbind(t(abase)-1.96%*%sdabase, t(abase), t(abase)+1.96%*%sdabase)
-    Ebase<-A-U%*%abase
-    
+    Pbase<-t(t(U%*%abase)%*%invV)
+    Ebase<-A-Pbase
+  
     ###################
     # Full EGLS estimates of phylogenetic signal
     ##################
@@ -241,6 +248,7 @@
   	d1<-abs(est$par[1])
   	d2<-abs(est$par[2])
   	
+  	
     # Calculate EGLS coef w estimated ds 
   	V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
     V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
@@ -252,7 +260,9 @@
     s2aFull<-as.vector(MSEFull)*qr.solve(t(U)%*%invV%*%U)
   	sdaFull<-t(diag(s2aFull)^(.5))
   	approxCFfull<-rbind(t(aFull)-1.96%*%sdaFull, t(aFull), t(aFull)+1.96%*%sdaFull)
-    Efull<-A-U%*%aFull
+    Pfull<-t(t(U%*%aFull)%*%invV)
+    Efull<-A-Pfull
+
     ########################################
     
     #organize output
@@ -263,10 +273,15 @@
     CI.boot<-NULL
     MSEs<-cbind(data.frame(MSETotal),data.frame(MSEFull), data.frame(MSEStar), data.frame(MSEBase))
     residuals<-cbind(data.frame(Efull),data.frame(Estar),data.frame(Ebase))
+    predicted<-cbind(data.frame(Pfull),data.frame(Pstar),data.frame(Pbase))
     rownames(residuals)<-pairnames
-  
+    rownames(predicted)<-pairnames
+    colnames(predicted)<-c("full","star","base")
+    colnames(residuals)<-c("full","star","base")
+    phylocovs=list(V1=V1,V2=V2)
+    
+    ################
     #bootstrap CIs
-    
     if(bootstrap)
     {
       Vtrue<-V
@@ -334,17 +349,21 @@
       rownames(CI.boot)<-c("d1","d2","intercept",colnames(U)[-1])
       colnames(CI.boot)<-c("booted lower CI 95%","booted upper CI 95%")
       colnames(bootlist)<-c("d1","d2","intercept",colnames(U)[-1])
-      return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),residuals=residuals,bootvalues=bootlist))
+      output<-list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=CI.boot,variates=data.frame(data.vecs),predicted=predicted,residuals=residuals,bootvalues=bootlist,phylocovs=phylocovs)
+      class(output)<-"pblm"
+      return(output)
     
     } else {
     ########
     # If bootstrapping not performed
     
     conf<-matrix(NA,2,2)
-    signal.strength<-data.frame(cbind(conf[1,],dtrue,conf[2,]))
+    signal.strength<-data.frame(cbind(conf[1,],c(d1,d2),conf[2,]))
     rownames(signal.strength)<-c("d1","d2")
     colnames(signal.strength)<-c("booted lower CI 95%","estimate","booted upper CI 95%")
-    return(list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),residuals=residuals,bootvalues=NULL))
+    output<-list(MSE=MSEs,signal.strength=signal.strength,coefficients=data.frame(coefs),CI.boot=NULL,variates=data.frame(data.vecs),predicted=predicted,residuals=residuals,bootvalues=NULL,phylocovs=phylocovs)
+    class(output)<-"pblm"
+    return(output)
     }
   }                                                                                                       
 }
\ No newline at end of file

Copied: pkg/R/pblmpredict.R (from rev 162, branches/gsoc/R/pblmpredict.R)
===================================================================
--- pkg/R/pblmpredict.R	                        (rev 0)
+++ pkg/R/pblmpredict.R	2008-08-13 22:30:49 UTC (rev 163)
@@ -0,0 +1,171 @@
+pblmpredict<-function(x,tree1.w.novel=NULL,tree2.w.novel=NULL,predict.originals=FALSE)
+{
+  if (!identical(class(x),"pblm")) stop("x must be of class pblm")
+  if(is.null(x$phylocovs$V1) | is.null(x$phylocovs$V2)) stop("a pblm fit with phylogenies must be supplied")
+  
+  sppnames1.orig<-rownames(x$phylocovs$V1)
+  sppnames2.orig<-rownames(x$phylocovs$V2)
+  
+  if(predict.originals)
+  {
+    novel.spp1<-sppnames1.orig
+    novel.spp2<-sppnames2.orig
+    n.novels1<-length(novel.spp1)
+    n.novels2<-length(novel.spp2)
+    cors.1=NULL
+    obs.novels1<-NULL
+    pairnames<-rownames(x$variates)
+    for(i in 1:n.novels1)
+    {
+      novel.assocs<-grep(novel.spp1[i],pairnames)
+      A<-x$variates[-1*novel.assocs,]
+      other.assocs<-1:length(pairnames)
+      other.assocs<-other.assocs[-1*novel.assocs]
+      Vyy<-V[other.assocs,other.assocs]
+		  Vxy<-V[novel.assocs,other.assocs]
+		  invVyy<-qr.solve(Vyy)
+		  U<-rep(1,length(other.assocs))
+      mu<-solve((t(U)%*%invVyy%*%U),(t(U)%*%invVyy%*%A))
+		  dxgy<-Vxy%*%invVyy%*%(A-mu)
+		  estmu<-rep(mu,length(dxgy))+dxgy
+		  Vxgy<-V[novel.assocs,novel.assocs]-Vxy%*%invVyy%*%t(Vxy)
+  
+		  cors.1<-c(cors.1,cor(estmu,x$variates[novel.assocs,]))
+      obs.novels1<-rbind(obs.novels1,cbind(estmu,data.frame(diag(Vxgy)),x$variates[novel.assocs,]))
+    }
+    names(cors.1)<-sppnames1.orig
+    
+  cors.2=NULL
+  obs.novels2<-NULL
+    for(i in 1:n.novels2)
+    {
+      novel.assocs<-grep(novel.spp2[i],pairnames)
+      A<-x$variates[-1*novel.assocs,]
+      other.assocs<-1:length(pairnames)
+      other.assocs<-other.assocs[-1*novel.assocs]
+      Vyy<-V[other.assocs,other.assocs]
+		  Vxy<-V[novel.assocs,other.assocs]
+		  invVyy<-qr.solve(Vyy)
+  
+  
+		  U<-rep(1,length(other.assocs))
+		  
+      mu<-solve((t(U)%*%invVyy%*%U),(t(U)%*%invVyy%*%A))
+
+		  dxgy<-Vxy%*%invVyy%*%(A-mu)
+		  estmu<-rep(mu,length(dxgy))+dxgy
+
+		  Vxgy<-V[novel.assocs,novel.assocs]-Vxy%*%invVyy%*%t(Vxy)
+      cors.2<-c(cors.2,cor(estmu,x$variates[novel.assocs,]))
+		  obs.novels2<-rbind(obs.novels2,cbind(estmu,data.frame(diag(Vxgy)),x$variates[novel.assocs,]))
+    }
+    names(cors.2)<-sppnames2.orig
+    return(list(cors.1=cors.1[sort(names(cors.1))],cors.2=cors.2[sort(names(cors.2))],obs.novels1=obs.novels1,obs.novels2=obs.novels2))
+  
+  } else {
+  
+  A<-x$variates[,1]
+  #n.cov<-dim(x$variates)[2]-1
+  #full.coef<-x$coefficients[grep("full",rownames(x$coefficients)),2]
+  #predicted.orig<-x$predicted
+  
+  if(is(tree1.w.novel)[1]=="phylo")
+  {
+    if(is.null(tree1.w.novel$edge.length)){tree1.w.novel<-compute.brlen(tree1.w.novel, 1)}  #If phylo has no given branch lengths
+    V1<-vcv.phylo(tree1.w.novel,cor=TRUE)
+  } else {
+    V1<-tree1.w.novel
+  }
+  
+  if(is(tree2.w.novel)[1]=="phylo")
+  {
+    if(is.null(tree2.w.novel$edge.length)){tree2.w.novel<-compute.brlen(tree2.w.novel, 1)}  #If phylo has no given branch lengths
+    V2<-vcv.phylo(tree2.w.novel,cor=TRUE)
+  } else {
+    V2<-tree2.w.novel
+  }
+  
+  sppnames1<-rownames(V1)
+  sppnames2<-rownames(V2)
+  
+  V1<-as.matrix(V1)
+  V2<-as.matrix(V2)
+  d1<-x$signal.strength[1,2]
+  d2<-x$signal.strength[2,2]
+  
+ 	initV1<-V1
+ 	initV2<-V2
+  nspp1<-dim(V1)[1]
+  nspp2<-dim(V2)[1]
+  
+ 	# tau = tau_i + tau_j where tau_i equals the node to tip distance
+ 	tau1<-matrix(diag(initV1),nspp1,nspp1) + matrix(diag(initV1),nspp1,nspp1)-2*initV1
+ 	tau2<-matrix(diag(initV2),nspp2,nspp2) + matrix(diag(initV2),nspp2,nspp2)-2*initV2
+
+  V1<-(d1^tau1)*(1-d1^(2*initV1))/(1-d1^2)
+  V2<-(d2^tau2)*(1-d2^(2*initV2))/(1-d2^2)
+  V1<-V1/det(V1)^(1/nspp1)   # model of coevolution
+  V2<-V2/det(V2)^(1/nspp2)
+  V<-kronecker(V2,V1)  
+  invV<-qr.solve(V)
+  
+  #make names of species pairs
+  pairnames=NULL  # make a vector of pairwise comparison names
+  for (o in 1:(nspp2))
+  {
+    for (u in 1:nspp1)
+    {
+      pairnames<-c(pairnames,paste(sppnames2[o],sppnames1[u],sep="-"))
+    }
+  }
+  
+  rownames(V)<-pairnames
+  colnames(V)<-pairnames
+  
+  #####
+  
+    novel.spp1<-setdiff(sppnames1,sppnames1.orig)
+    novel.spp2<-setdiff(sppnames2,sppnames2.orig)
+    n.novels1<-length(novel.spp1)
+    n.novels2<-length(novel.spp2)
+  }
+  
+  if(n.novels1+n.novels2==0) stop("no novel species")
+  
+  other.assocs<-rownames(x$variates)
+  Vyy<-V[other.assocs,other.assocs]
+  invVyy<-qr.solve(Vyy)
+  #U<-rep(1,length(other.assocs))
+  #mu<-solve((t(U)%*%invVyy%*%U),(t(U)%*%invVyy%*%A))
+	mu<-x$coefficients[1,2]
+  	  
+  if(n.novels1>0)
+  {
+    obs.novels1<-NULL
+    for(i in 1:n.novels1)
+    {
+      novel.assocs<-grep(novel.spp1[i],pairnames)
+		  Vxy<-V[novel.assocs,other.assocs]
+  		dxgy<-Vxy%*%invVyy%*%(A-mu)
+		  estmu<-rep(mu,length(dxgy))+dxgy
+		  Vxgy<-V[novel.assocs,novel.assocs]-Vxy%*%invVyy%*%t(Vxy)
+      obs.novels1<-rbind(obs.novels1,cbind(estmu,data.frame(diag(Vxgy))))
+    }
+  } else {obs.novels1=NULL}
+  
+  if(n.novels2>0)
+  {
+    obs.novels2<-NULL
+    for(i in 1:n.novels2)
+    {
+      novel.assocs<-grep(novel.spp2[i],pairnames)
+  	  Vxy<-V[novel.assocs,other.assocs]
+  		dxgy<-Vxy%*%invVyy%*%(A-mu)
+		  estmu<-rep(mu,length(dxgy))+dxgy
+		  Vxgy<-V[novel.assocs,novel.assocs]-Vxy%*%invVyy%*%t(Vxy)
+ 	    obs.novels2<-rbind(obs.novels2,cbind(estmu,data.frame(diag(Vxgy))))
+    }
+  } else {obs.novels2=NULL}
+  
+  return(list(cors.1=NA,cors.2=NA,obs.novels1=obs.novels1,obs.novels2=obs.novels2)) 
+}
\ No newline at end of file

Modified: pkg/R/phylostruct.R
===================================================================
--- pkg/R/phylostruct.R	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/R/phylostruct.R	2008-08-13 22:30:49 UTC (rev 163)
@@ -1,40 +1,39 @@
-phylostruct<-function(samp,tree,env=NULL,metric=c("psv","psr","pse","psc","sppregs"),null.model=c("frequency","richness","both"),runs=10,alpha=0.05,fam="binomial"){
+phylostruct<-function(samp,tree,env=NULL,metric=c("psv","psr","pse","psc","sppregs"),null.model=c("frequency","richness","independentswap","trialswap"),runs=100,it=1000,alpha=0.05,fam="binomial"){
 
   metric<-match.arg(metric)
   null.model<-match.arg(null.model)
   if(metric=="sppregs")
   {
-  nulls<-t(replicate(runs,sppregs(randomizeSample(samp,null.model=null.model),env,tree,fam=fam)$correlations))
+  nulls<-t(replicate(runs,sppregs(randomizeSample(samp,null.model=null.model,it=it),env,tree,fam=fam)$correlations))
   obs<-sppregs(samp,env,tree,fam=fam)$correlations
   mean.null<-apply(nulls,2,mean)
   quantiles.null<-t(apply(nulls,2,quantile,probs=c(alpha/2,1-(alpha/2))))
-  
-  return(list(metric=metric,null.model=null.model,runs=runs,obs=obs,mean.null=mean.null
+  if((null.model!="independentswap")&&(null.model!="trialswap")){it=NA}
+  return(list(metric=metric,null.model=null.model,runs=runs,it=it,obs=obs,mean.null=mean.null
                 ,quantiles.null=quantiles.null,phylo.structure=NULL,nulls=nulls))
 
-
   } else {
 
     nulls<-switch(metric,
-                       psv = replicate(runs,mean(psv(randomizeSample(samp,null.model=null.model),tree,compute.var=FALSE)[,1])),
-                       psr = replicate(runs,mean(psr(randomizeSample(samp,null.model=null.model),tree,compute.var=FALSE)[,1])),
-                       pse = replicate(runs,mean(pse(randomizeSample(samp,null.model=null.model),tree)[,1])),
-                       psc = replicate(runs,mean(psc(randomizeSample(samp,null.model=null.model),tree)[,1])))
+                       psv = replicate(runs,mean(psv(as.matrix(randomizeSample(samp,null.model=null.model,it=it)),tree,compute.var=FALSE)[,1],na.rm=TRUE)),
+                       psr = replicate(runs,mean(psr(as.matrix(randomizeSample(samp,null.model=null.model,it=it)),tree,compute.var=FALSE)[,1],na.rm=TRUE)),
+                       pse = replicate(runs,mean(pse(as.matrix(randomizeSample(samp,null.model=null.model,it=it)),tree)[,1],na.rm=TRUE)),
+                       psc = replicate(runs,mean(psc(as.matrix(randomizeSample(samp,null.model=null.model,it=it)),tree)[,1],na.rm=TRUE)))
     quantiles.null<-quantile(nulls,probs=c(alpha/2,1-(alpha/2)))
     mean.null<-mean(nulls)
     mean.obs<-switch(metric,
-                       psv = mean(psv(samp,tree,compute.var=FALSE)[,1]),
-                       psr = mean(psr(samp,tree,compute.var=FALSE)[,1]),
-                       pse = mean(pse(samp,tree)[,1]),
-                       psc = mean(psc(samp,tree)[,1]))
+                       psv = mean(psv(samp,tree,compute.var=FALSE)[,1],na.rm=TRUE),
+                       psr = mean(psr(samp,tree,compute.var=FALSE)[,1],na.rm=TRUE),
+                       pse = mean(pse(samp,tree)[,1],na.rm=TRUE),
+                       psc = mean(psc(samp,tree)[,1],na.rm=TRUE))
 
     if(mean.obs<=quantiles.null[1])
     {phylo.structure="underdispersed"
     } else {if(mean.obs>=quantiles.null[2]){
     phylo.structure="overdispersed"} else {phylo.structure="random"}
     }
-    
-    return(list(metric=metric,null.model=null.model,runs=runs,mean.obs=mean.obs,mean.null=mean.null
+    if((null.model!="independentswap")&&(null.model!="trialswap")){it=NA}    
+    return(list(metric=metric,null.model=null.model,runs=runs,it=it,mean.obs=mean.obs,mean.null=mean.null
                 ,quantiles.null=quantiles.null,phylo.structure=phylo.structure,null.means=nulls))
   }
 }

Modified: pkg/R/randomizeSample.R
===================================================================
--- pkg/R/randomizeSample.R	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/R/randomizeSample.R	2008-08-13 22:30:49 UTC (rev 163)
@@ -1,40 +1,74 @@
+.First.lib <- function(lib,pkg) {
+  library.dynam("picante",pkg,lib)
+} 
+
 `randomizeSample` <-
-function(samp, null.model=c("frequency","richness","both")) {
-	null.model <- match.arg(null.model)
+function(samp, null.model=c("frequency","richness","independentswap","trialswap"), lang=c("C","R"), it=1000) {
+
+    samp <- as.matrix(samp)
+    lang <- match.arg(lang)
+    null.model <- match.arg(null.model)
+    
 	if (identical(null.model,"frequency")) {
-	    return(data.frame(apply(samp,2,sample),row.names=row.names(samp)))
+        if (identical(lang,"R")) {
+    	    return(data.frame(apply(samp,2,sample),row.names=row.names(samp)))
+    	} else {
+    	    ret1 <- .C("frequency", m=as.numeric(samp), as.integer(nrow(samp)), as.integer(ncol(samp)), PACKAGE="picante")
+    	    return(matrix(ret1$m,nrow=nrow(samp),dimnames=list(rownames(samp),colnames(samp))))
+    	}
 	}
+
 	if (identical(null.model,"richness")) {
-	    return(t(data.frame(apply(samp,1,sample),row.names=colnames(samp))))
+	    if (identical(lang,"R")) {
+            return(t(data.frame(apply(samp,1,sample),row.names=colnames(samp))))
+       	} else {
+    	    ret1 <- .C("richness", m=as.numeric(samp), as.integer(nrow(samp)), as.integer(ncol(samp)), PACKAGE="picante")
+    	    return(matrix(ret1$m,nrow=nrow(samp),dimnames=list(rownames(samp),colnames(samp))))
+    	}
 	}
-	if (identical(null.model,"both")) {
-        #check for presence-absence and warn until abundance implemented
-        x <- decostand(samp, "pa")
-        if (!identical(x,samp)) stop("Null model currently requires a presence-absence matrix.")
-        sppFreq <- apply(x, 2, sum)/ncol(x)
-        siteRichness <- apply(x, 1, sum)
-        sampleList <- vector()
-        sppList <- vector()
-        for (siteNum in 1:length(siteRichness)) {
-            sampleList <- c(sampleList, rep(names(siteRichness)[siteNum], 
-                siteRichness[siteNum]))
-            sppList <- c(sppList, sample(names(sppFreq), siteRichness[siteNum], 
-                replace = FALSE, prob = sppFreq))
+
+	if (identical(null.model,"independentswap")) 
+    {
+        if (identical(lang,"R")) {
+            #check for presence-absence and warn until abundance implemented
+            x <- decostand(samp, "pa")
+            if (!identical(x,samp)) stop("Null model currently requires a presence-absence matrix.")
+            sppFreq <- apply(x, 2, sum)/ncol(x)
+            siteRichness <- apply(x, 1, sum)
+            sampleList <- vector()
+            sppList <- vector()
+            for (siteNum in 1:length(siteRichness)) {
+                sampleList <- c(sampleList, rep(names(siteRichness)[siteNum], 
+                    siteRichness[siteNum]))
+                sppList <- c(sppList, sample(names(sppFreq), siteRichness[siteNum], 
+                    replace = FALSE, prob = sppFreq))
+            }
+            shuffledList <- data.frame(sample = sampleList, species = sppList, 
+                p = rep(1, length(sppList)))
+            shuffledMatrix <- tapply(shuffledList$p, list(shuffledList$sample, 
+                shuffledList$species), sum)
+            shuffledMatrix[is.na(shuffledMatrix)] <- 0
+            if (identical(dim(shuffledMatrix),dim(x))) {
+                return(shuffledMatrix[rownames(x), ])        
+            } else {
+                mergedFrame <- merge(shuffledMatrix, x, all.y = TRUE)
+                mergedFrame[is.na(mergedFrame)] <- 0
+                return(mergedFrame[rownames(x),colnames(x)])
+            }        
+        } else {
+            ret1 <- .C("independentswap", m=as.numeric(samp), as.integer(it), as.integer(nrow(samp)), as.integer(ncol(samp)), PACKAGE="picante")
+            return(matrix(ret1$m,nrow=nrow(samp),dimnames=list(rownames(samp),colnames(samp))))        
         }
-        shuffledList <- data.frame(sample = sampleList, species = sppList, 
-            p = rep(1, length(sppList)))
-        shuffledMatrix <- tapply(shuffledList$p, list(shuffledList$sample, 
-            shuffledList$species), sum)
-        shuffledMatrix[is.na(shuffledMatrix)] <- 0
-        if (identical(dim(shuffledMatrix),dim(x))) {
-            return(shuffledMatrix[rownames(x), ])        
+	}
+	
+    if (identical(null.model,"trialswap")) 
+    {
+        if (identical(lang,"R")) {
+            stop("Trial swap implemented in C only")
+        } else {
+            ret1 <- .C("trialswap", m=as.numeric(samp), as.integer(it), as.integer(nrow(samp)), as.integer(ncol(samp)), PACKAGE="picante")
+            return(matrix(ret1$m,nrow=nrow(samp),dimnames=list(rownames(samp),colnames(samp))))        
         }
-        else
-        {
-            mergedFrame <- merge(shuffledMatrix, x, all.y = TRUE)
-            mergedFrame[is.na(mergedFrame)] <- 0
-            return(mergedFrame[rownames(x),colnames(x)])
-        }
 	}
+
 }
-

Modified: pkg/man/pblm.Rd
===================================================================
--- pkg/man/pblm.Rd	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/man/pblm.Rd	2008-08-13 22:30:49 UTC (rev 163)
@@ -1,11 +1,14 @@
 \name{pblm}
 \alias{pblm}
+\alias{pblmpredict}
 \title{ Phylogenetic Bipartite Linear Model }
 \description{
   Fits a linear model to the association strengths of a bipartite data set with or without phylogenetic correlation among the interacting species
 }
 \usage{
 pblm(assocs,tree1=NULL,tree2=NULL,covars1=NULL,covars2=NULL,bootstrap=FALSE,nreps=10,maxit=10000,pstart=c(.5,.5))
+pblmpredict(x,tree1.w.novel=NULL,tree2.w.novel=NULL,predict.originals=FALSE)
+
 }
 
 \arguments{
@@ -18,6 +21,10 @@
   \item{nreps}{ Number of bootstrap replicated data sets to estimate parameter CIs }
   \item{maxit}{ as in \code{\link{optim}} }
   \item{pstart}{ starting values of the two phylogenetic signal strength parameters passed to \code{\link{optim}} }
+  \item{x}{ object of class \code{pblm} }
+  \item{tree1.w.novel}{ A phylo tree object or a phylogenetic covariance matrix which corresponds to \code{tree1} of \code{x} with species to predict associations }
+  \item{tree2.w.novel}{ A phylo tree object or a phylogenetic covariance matrix which corresponds to \code{tree2} of \code{x} with species to predict associations } 
+  \item{predict.originals}{ if \code{TRUE} then the associations of each original species in the two phylogenies is predicted } 
 }
 
 \details{
@@ -28,35 +35,44 @@
  evolution; \emph{0<d<1} represents stabilizing selection; \emph{d=0} depicts the absence of phylogenetic correlation (i.e., a star phylogeny); and \emph{d>1} corresponds 
  to disruptive selection where phylogenetic signal is amplified. Confidence intervals for these and the other parameters can be estimated with 
  bootstrapping.
- 
+ \cr
+ \cr
+ The function \code{pblmpredict} predicts the associations of novel species following the methods given in appendix B of Ives and Godfray (2006). 
  }
 
 \value{
-	The function returns a list with:
   \item{MSE}{ total, full (each \emph{d} estimated), star (\emph{d=0}), and base (\emph{d=1}) mean squared errors }
   \item{signal.strength}{ two estimates of phylogenetic signal strength }
   \item{coefficients}{ estimated intercept and covariate coefficients with approximate 95 percent CIs for the three model types (full, star, base) }
   \item{CI.boot}{ 95 percent CIs for all parameters }
   \item{variates}{ matrix of model variates (can be used for plotting) }
   \item{residuals}{ matrix of residuals from the three models (full, star and base) }
+  \item{predicted}{ predicted associations }
   \item{bootvalues}{ matrix of parameters estimated from the \code{nreps} bootstrap replicated data sets used to calculate CIs }
-  
-}
+  \item{phylocovs}{ phylogenetic covariance matricies scaled by the estimated \code{d1} and \code{d2} }
+  \item{cors.1}{ correlations among predicted and observed associations for species of \code{tree1}, \code{NA} if \code{predict.originals=FALSE} }
+  \item{cors.2}{ correlations among predicted and observed associations for species of \code{tree2},  \code{NA} if \code{predict.originals=FALSE} }
+  \item{pred.novels1}{ predicted associations for the novel speices of \code{tree1} }
+  \item{pred.novels2}{ predicted associations for the novel speices of \code{tree2} }
+}      
 
+
+
 \note{Covariates that apply to both species sets (e.g., sampling site) should be supplied in the covariate matrix of the set with the most species.
 \cr
 \cr
 Bootstrapping CIs is slow due to the function \code{\link{optim}} used to estimate the model parameters. See appendix A in Ives and Godfray (2006) 
-for a discussion about this boostrapping procedure}
-
-
-
+for a discussion about this boostrapping procedure
+\cr
+\cr
+If \code{pblmpredict=TRUE} the function does not first remove each species in turn when predicting the associations of the original species as 
+is done in Ives and Godfray (2006).}
+ 
 \references{Ives A.R. & Godfray H.C. (2006) Phylogenetic analysis of trophic associations. The American Naturalist, 168, E1-E14 \cr
 \cr
 Blomberg S.P., Garland T.J. & Ives A.R. (2003) Testing for phylogenetic signal in comparative data: Behavioral traits are more labile. Evolution, 57, 717-745
 }
 
 \author{ Matthew Helmus \email{mrhelmus at gmail.com} }
-\seealso{ the K metric, \code{\link{Kcalc}}, uses the same Ornstein-Uhlenbeck model of evolution }
 
 \keyword{univar}
\ No newline at end of file

Modified: pkg/man/phylostruct.Rd
===================================================================
--- pkg/man/phylostruct.Rd	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/man/phylostruct.Rd	2008-08-13 22:30:49 UTC (rev 163)
@@ -4,8 +4,8 @@
 \description{ Randomize sample/community data matrices to create null distributions of given metrics
 }
 \usage{
-phylostruct(samp, tree, env=NULL, metric=c("psv","psr","pse","psc","sppregs"), null.model=c("frequency","richness","both"),
-            runs=10, alpha=0.05, fam="binomial")
+phylostruct(samp, tree, env=NULL, metric=c("psv","psr","pse","psc","sppregs"), null.model=c("frequency",
+            "richness","independentswap","trialswap"), runs=100, it=1000, alpha=0.05, fam="binomial")
 }
 \arguments{
   \item{samp}{ community data matrix, species as columns, communities as rows }
@@ -14,6 +14,7 @@
   \item{metric}{ if \code{metric="psv"}, \code{"psr"}, \code{"pse"}, or \code{"psc"} compares the observed mean of the respective metric to a null distribution at a given alpha; if \code{metric="sppregs"} compares the three correlations produced by \code{\link{sppregs}} to null distributions }
   \item{null.model}{ permutation procedure used to create the null distribution, see \code{\link{randomizeSample}}}
   \item{runs}{ the number of permutations to create the distribution, a rule of thumb is (number of communities)/alpha  }
+  \item{it}{ the number of swaps for the independent and trial-swap null models, see \code{\link{randomizeSample}} }
   \item{alpha}{ probability value to compare the observed mean/correlations to a null distribution }
   \item{fam}{ as in \code{\link{sppregs}} }
 }
@@ -24,6 +25,7 @@
 \item{metric}{ metric used }
 \item{null.model}{ permutation used }
 \item{runs}{ number of permutations }
+\item{it}{ number of swaps if applicable }
 \item{obs}{ observed mean value of a particular metric or the three observed correlations from \code{\link{sppregs}} }
 \item{mean.null}{ mean(s) of the null distribution(s) }
 \item{quantiles.null}{  quantiles of the null distribution(s) to compare to \code{obs}; determined by \code{alpha}}
@@ -42,5 +44,4 @@
 \author{ Matthew Helmus \email{mrhelmus at gmail.com} }
 \seealso{ \code{\link{psd}} ,\code{\link{sppregs}}, \code{\link{randomizeSample}} }
 
-\keyword{univar}
-
+\keyword{univar}
\ No newline at end of file

Modified: pkg/man/randomizeSample.Rd
===================================================================
--- pkg/man/randomizeSample.Rd	2008-08-13 06:01:45 UTC (rev 162)
+++ pkg/man/randomizeSample.Rd	2008-08-13 22:30:49 UTC (rev 163)
@@ -6,7 +6,7 @@
   Various null models for randomizing community data matrices
 }
 \usage{
-randomizeSample(samp, null.model = c("frequency", "richness", "both"))
+randomizeSample(samp, null.model = c("frequency", "richness", "independentswap","trialswap"), lang=c("C","R"), it=1000)
 }
 
 \arguments{
@@ -14,15 +14,21 @@
   \item{null.model}{ Null model
     \item{frequency}{Randomize community data matrix abundances within species (maintains species occurence frequency)}
     \item{richness}{Randomize community data matrix abundances within samples (maintains sample species richness)}
-    \item{both}{Randomize community data matrix by drawing species from sample pool with probability weighted by occurrence frequency (maintains both species occurrence frequency and sample species richness)}
+    \item{independentswap}{Randomize community data matrix with the independent-swap algorithm (Gotelli 2000) by drawing species from sample pool with probability weighted by occurrence frequency (maintains both species occurrence frequency and sample species richness)}
+    \item{trialswap}{Randomize community data matrix with the trial-swap algorithm (Miklos & Podani 2004) by drawing species from sample pool with probability weighted by occurrence frequency (maintains both species occurrence frequency and sample species richness)}
   }
+  \item{lang}{run code in C or R? C is faster.} 
+  \item{it}{ number of independent or trial-swaps to perform }
 }
 \value{
   Randomized community data matrix
 }
-\references{Gotelli, N.J. 2000. Null model analysis of species co-occurrence patterns. Ecology 81: 2606-2621.}
+\references{Gotelli, N.J. 2000. Null model analysis of species co-occurrence patterns. Ecology 81: 2606-2621.
+\cr
+\cr
+Miklos I. & Podani J. 2004. Randomization of presence-absence matrices: Comments and new algorithms. Ecology 85: 86-92.
+}
 \author{ Steve Kembel <skembel at berkeley.edu> }
-\section{Warning }{ Null model \code{both} currently only works with presence-absence data. Convert your data to presence-absence before using this null model (e.g. \code{decostand(x,method="pa")})}
 \examples{
 data(phylocom)
 randomizeSample(phylocom$sample, null.model="richness")}

Copied: pkg/src (from rev 162, branches/gsoc/src)