[Picante-commits] r101 - in pkg: R man

Wed Jun 11 23:27:54 CEST 2008

Author: skembel
Date: 2008-06-11 23:27:54 +0200 (Wed, 11 Jun 2008)
New Revision: 101

Added:
   pkg/R/specaccum.psr.R
   pkg/man/pd.Rd
   pkg/man/psd.Rd
   pkg/man/specaccum.psr.Rd
Modified:
   pkg/R/phylodiversity.R
Log:
Merged gsoc branch into trunk. Huzzah.

Modified: pkg/R/phylodiversity.R
===================================================================

--- pkg/R/phylodiversity.R	2008-06-11 21:20:24 UTC (rev 100)
+++ pkg/R/phylodiversity.R	2008-06-11 21:27:54 UTC (rev 101)
@@ -123,3 +123,363 @@
     data.frame(ntaxa=specnumber(samp),mnnd.obs, mnnd.rand.mean, mnnd.rand.sd, mnnd.obs.rank, 
         mnnd.obs.z, mnnd.obs.p=mnnd.obs.rank/(runs+1),runs=runs, row.names = row.names(samp))
 }
+
+psv<-function(samp,tree,compute.var=TRUE){
+  # Make samp matrix a pa matrix
+  samp[samp>0]<-1
+  
+  flag=0
+  if (is.null(dim(samp))) #if the samp matrix only has one site
+  {
+    samp<-rbind(samp,samp)
+    flag=2
+  }
+
+  if(is(tree)[1]=="phylo")
+  {
+    tree<-prune.sample(samp,tree)
+    # Make sure that the species line up
+    samp<-samp[,tree$tip.label]
+    # Make a correlation matrix of the species pool phylogeny
+    Cmatrix<-vcv.phylo(tree,cor=TRUE)
+  } else {
+    Cmatrix<-tree
+    species<-colnames(samp)
+    preval<-colSums(samp)/sum(samp)
+    species<-species[preval>0]
+    Cmatrix<-Cmatrix[species,species]
+    samp<-samp[,colnames(Cmatrix)]
+  }
+  
+    # reduce given Cmatrix to the species observed in samp
+    SR<-rowSums(samp)
+    samp<-samp[SR>1,] # prune out locations with <2 species
+    SR<-SR[SR>1]
+  
+    #cull the species that are not found in the samp set after all communities with 1 species are removed
+    preval<-colSums(samp)/sum(samp)
+    indexcov<-preval>0
+    Cmatrix<-Cmatrix[indexcov,indexcov]
+    samp<-samp[,indexcov]
+  
+    # numbers of locations and species
+    nlocations<-dim(samp)[1]
+    nspecies<-dim(samp)[2]
+  
+    ##################################
+    # calculate observed PSVs
+    #
+    PSVs<-NULL
+  
+    for(i in 1:nlocations)
+    {
+      index<-seq(1:nrow(Cmatrix))[samp[i,]==1]	#species present
+      n<-length(index)			#number of species present
+      C<-Cmatrix[index,index]	#C for individual locations
+      PSV<-(n*sum(diag(as.matrix(C)))-sum(C))/(n*(n-1))
+      PSVs<-c(PSVs,PSV)
+    }
+      PSVout<-cbind(PSVs,SR)
+  
+  if(flag==2)
+  {
+    PSVout<-PSVout[-2,]
+    return(PSVout)
+  } else {
+    if(compute.var==FALSE) 
+    {
+      return(data.frame(PSVout))
+    } else {
+    
+      PSVvar<-NULL
+      X<-Cmatrix-(sum(sum(Cmatrix-diag(nspecies))))/(nspecies*(nspecies-1));
+      X<-X-diag(diag(X))
+  
+      SS1<-sum(X*X)/2
+  
+      SS2<-0;
+      for(i in 1:(nspecies-1)){
+        for(j in (i+1):nspecies){
+          SS2<-SS2+X[i,j]*(sum(X[i,])-X[i,j])
+        }
+      }
+      SS3<--SS1-SS2
+  
+      S1<-SS1*2/(nspecies*(nspecies-1))
+      S2<-SS2*2/(nspecies*(nspecies-1)*(nspecies-2))
+  
+      if(nspecies==3){
+        S3<-0
+      }else{
+      S3<-SS3*2/(nspecies*(nspecies-1)*(nspecies-2)*(nspecies-3))
+      }
+  
+      for(n in 2:nspecies){
+        approxi<-2/(n*(n-1))*(S1 + (n-2)*S2 + (n-2)*(n-3)*S3)
+        PSVvar<-rbind(PSVvar, c(n, approxi))
+      }
+  
+      vars<-rep(0,nlocations)
+      PSVout<-cbind(PSVout,vars)
+  
+      for (g in 1:nlocations)
+      {
+        PSVout[g,3]<-PSVvar[PSVout[g,2]-1,2]
+      }
+      return(data.frame(PSVout))
+    }
+  }
+}
+
+
+psr <- function(samp,tree,compute.var=TRUE){
+  PSVout<-psv(samp,tree,compute.var)
+  if(is.null(dim(PSVout))==TRUE) 
+  {
+    PSRout<-data.frame(cbind(PSVout[1]*PSVout[2],PSVout[2]))
+    names(PSRout)<-c("PSR","SR")
+    rownames(PSRout)<-""
+    return(PSRout)
+  } else {
+    PSRout<-cbind(PSVout[,1]*PSVout[,2],PSVout[,2])
+    if(compute.var!=TRUE) {
+      colnames(PSRout)<-c("PSR","SR")
+      return(data.frame(PSRout))
+    } else {
+      PSRout<-cbind(PSRout,PSVout[,3]*(PSVout[,2])^2)       
+      colnames(PSRout)<-c("PSR","SR","vars")
+      return(data.frame(PSRout))
+    }
+  }
+}
+
+pse<-function(samp,tree){
+  flag=0
+  if (is.null(dim(samp))) #if the samp matrix only has one site
+  {
+    samp<-rbind(samp,samp)
+    flag=2
+  }
+
+  if(is(tree)[1]=="phylo")
+  {
+    tree<-prune.sample(samp,tree) 
+    # Make sure that the species line up
+    samp<-samp[,tree$tip.label]
+    # Make a correlation matrix of the species pool phylogeny
+    Cmatrix<-vcv.phylo(tree,cor=TRUE)
+  } else {
+    Cmatrix<-tree
+    species<-colnames(samp)
+    preval<-colSums(samp)/sum(samp)
+    species<-species[preval>0]
+    Cmatrix<-Cmatrix[species,species]
+    samp<-samp[,colnames(Cmatrix)]
+  }
+  
+  # reduce given Cmatrix to the species observed in samp
+  SR<-apply(samp>0,1,sum)
+  # prune out locations with <2 species
+  samp<-samp[SR>1,]
+  SR<-SR[SR>1]
+
+  #cull the species that are not found in the samp set after all communities with 1 species are removed
+  preval<-colSums(samp)/sum(samp)
+  indexcov<-preval>0
+  Cmatrix<-Cmatrix[indexcov,indexcov]
+  samp<-samp[,indexcov]
+
+  # numbers of locations and species
+  nlocations<-dim(samp)[1]
+  nspecies<-dim(samp)[2]
+
+  #################################
+  # calculate observed phylogenetic species evenness
+  PSEs<-NULL
+  for(i in 1:nlocations){
+    index<-seq(1,ncol(Cmatrix))[samp[i,]>0]	  # species present
+    C<-Cmatrix[index,index]		                #C for individual locations
+    n<-length(index)                          #location species richness
+    N<-sum(samp[i,])                          #location total abundance
+    M<-samp[i,samp[i,]>0]                  #species abundance column
+    mbar<-mean(M)                             #mean species abundance
+    PSE<-(N*t(diag(as.matrix(C)))%*%M-t(M)%*%as.matrix(C)%*%M)/(N^2-N*mbar)   #phylogenetic species evenness
+    PSEs<-c(PSEs,PSE)
+  }
+  PSEout=cbind(PSEs,SR)
+  if (flag==2)
+  {
+    PSEout<-PSEout[-2,]
+    return(PSEout)  
+  } else {
+    return(data.frame(PSEout))
+  }
+}
+
+psc<-function(samp,tree){
+  # Make samp matrix a pa matrix
+  samp[samp>0]<-1
+  flag=0
+  if (is.null(dim(samp))) #if the samp matrix only has one site
+  {
+    samp<-rbind(samp,samp)
+    flag=2
+  }
+
+  if(is(tree)[1]=="phylo")
+  {
+    tree<-prune.sample(samp,tree)
+    # Make sure that the species line up
+    samp<-samp[,tree$tip.label]
+    # Make a correlation matrix of the species pool phylogeny
+    Cmatrix<-vcv.phylo(tree,cor=TRUE)
+  } else {
+    Cmatrix<-tree
+    species<-colnames(samp)
+    preval<-colSums(samp)/sum(samp)
+    species<-species[preval>0]
+    Cmatrix<-Cmatrix[species,species]
+    samp<-samp[,colnames(Cmatrix)]
+  }
+
+  # reduce given Cmatrix to the species observed in samp
+  SR<-rowSums(samp)
+  samp<-samp[SR>1,] # prune out locations with <2 species
+  SR<-SR[SR>1]
+
+  #cull the species that are not found in the samp set after all communities with 1 species are removed
+  preval<-colSums(samp)/sum(samp)
+  indexcov<-preval>0
+  Cmatrix<-Cmatrix[indexcov,indexcov]
+  samp<-samp[,indexcov]
+
+  # numbers of locations and species
+  nlocations<-dim(samp)[1]
+  nspecies<-dim(samp)[2]
+
+  ##################################
+  # calculate observed PSCs
+  #
+  PSCs<-NULL
+
+  for(i in 1:nlocations)
+  {
+    index<-seq(1:nrow(Cmatrix))[samp[i,]==1]	#species present
+    n<-length(index)			#number of species present
+    C<-Cmatrix[index,index]	#C for individual locations
+    diag(C)<--1
+    PSC<-sum(apply(C,1,max))/n
+    PSCs<-c(PSCs,PSC)
+  }
+    PSCout<-cbind(PSCs,SR)
+ if (flag==2)
+  {
+    PSCout<-PSCout[-2,]
+    return(PSCout)  
+  } else {
+    return(data.frame(PSCout))
+  }
+}
+
+psv.spp<-function(samp,tree){
+  # Make samp matrix a pa matrix
+  samp[samp>0]<-1
+  if (is.null(dim(samp))) #if the samp matrix only has one site
+  {
+    samp<-rbind(samp,samp)
+  }  
+  if(is(tree)[1]=="phylo")
+  {
+    tree<-prune.sample(samp,tree)
+    # Make sure that the species line up
+    samp<-samp[,tree$tip.label]
+    # Make a correlation matrix of the species pool phylogeny
+    Cmatrix<-vcv.phylo(tree,cor=TRUE)
+  } else {
+    Cmatrix<-tree
+    species<-colnames(samp)
+    preval<-colSums(samp)/sum(samp)
+    species<-species[preval>0]
+    Cmatrix<-Cmatrix[species,species]
+    samp<-samp[,colnames(Cmatrix)]
+  }
+  # reduce given Cmatrix to the species observed in samp
+  samp<-samp[rowSums(samp)>1,] # prune out locations with <2 species
+
+  #cull the species that are not found in the samp set after all communities with 1 species are removed
+  preval<-colSums(samp)/sum(samp)
+  indexcov<-preval>0
+  Cmatrix<-Cmatrix[indexcov,indexcov]
+  samp<-samp[,indexcov]
+
+  obs.PSV<-mean(psv(samp,Cmatrix,compute.var=FALSE)[,1])
+
+  # numbers of locations and species
+  nlocations<-dim(samp)[1]
+  nspecies<-dim(samp)[2]
+
+  spp.PSVs<-NULL
+  for (j in 1:nspecies)
+  {
+    spp.samp<-samp[,-j]
+    spp.Cmatrix<-Cmatrix[-j,-j]
+    spp.PSV<-mean(psv(spp.samp,spp.Cmatrix,compute.var=FALSE)[,1])
+    spp.PSVs<-c(spp.PSVs,spp.PSV)
+  }
+  spp.PSVout<-(spp.PSVs-obs.PSV)/sum(abs(spp.PSVs-obs.PSV))
+  names(spp.PSVout)<-colnames(samp)
+  return(spp.PSVout)
+}
+
+psd<-function(samp,tree,compute.var=TRUE){
+  if (is.null(dim(samp))) #if the samp matrix only has one site
+  {
+    PSDout<-data.frame(c(psv(samp,tree,compute.var)[1],psc(samp,tree)[1],psr(samp,tree,compute.var)[1],pse(samp,tree)))
+    names(PSDout)<-c("PSV","PSC","PSR","PSE","SR")
+    return(PSDout)
+  } else {
+    if (compute.var==TRUE)
+    {
+      PSDout<-cbind(psv(samp,tree,compute.var)[,c(1,3)],psc(samp,tree)[,1],psr(samp,tree,compute.var)[,c(1,3)],pse(samp,tree))
+      colnames(PSDout)<-c("PSV","var.PSV","PSC","PSR","var.PSR","PSE","SR")
+    } else {
+      PSDout<-cbind(psv(samp,tree,compute.var)[,1],psc(samp,tree)[,1],psr(samp,tree,compute.var)[,1],pse(samp,tree))
+      colnames(PSDout)<-c("PSV","PSC","PSR","PSE","SR")
+    }
+    return(data.frame(PSDout))
+  }
+}
+
+pd<-function(samp,tree){
+  
+  # Make sure that the species line up
+  tree<-prune.sample(samp,tree)
+  samp<-samp[,tree$tip.label]
+  species<-colnames(samp)
+  
+  # reduce given Cmatrix to the species observed in samp
+  SR<-rowSums(samp)
+  samp<-samp[SR>1,] # prune out locations with <2 species
+  SR<-SR[SR>1]
+
+  # numbers of locations and species
+  nlocations=dim(samp)[1]
+  nspecies=dim(samp)[2]
+
+  ##################################
+  # calculate observed PDs
+  #
+  PDs=NULL
+
+  for(i in 1:nlocations)
+  {  
+    index<-species[samp[i,]==0]	#species not in sample
+    sub.tree<-drop.tip(tree,index)
+    PDs<-c(PDs,sum(sub.tree$edge.length))
+  }
+  
+  PDout<-data.frame(cbind(PDs,SR))
+  rownames(PDout)<-rownames(samp) 
+  return(PDout)
+}
+

Copied: pkg/R/specaccum.psr.R (from rev 100, branches/gsoc/R/specaccum.psr.R)
===================================================================
--- pkg/R/specaccum.psr.R	                        (rev 0)
+++ pkg/R/specaccum.psr.R	2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,43 @@
+specaccum.psr<-function (samp, tree, permutations = 100, method = "random", ...)
+{
+
+#function adapted from the vegan package specaccum
+
+  x <- as.matrix(samp)
+  n <- nrow(x)
+  p <- ncol(x)
+  if (p == 1)
+  {
+    x <- t(x)
+    n <- nrow(x)
+    p <- ncol(x)
+  }
+  accumulator <- function(x,ind,tree)
+  {
+    n <- nrow(x)
+    p <- ncol(x)
+    xx<-x
+    xx[1:n,1:p]<-0
+    xx[apply(x[ind, ], 2, cumsum)>0]<-1
+    PSV<-psv(xx,tree,compute.var=FALSE)
+    PSV[,1]*PSV[,2]
+  }
+  METHODS <- c("collector", "random", "exact", "rarefaction",
+        "coleman")
+    method <- match.arg(method, METHODS)
+
+  specaccum <- sdaccum <- sites <- perm <- NULL
+  perm <- array(dim = c(n, permutations))
+  for (i in 1:permutations)
+  {
+    r.x=0
+    while(length(r.x)<n){r.x <- accumulator(x, sample(n),tree)}
+    perm[, i]<-r.x
+  }
+  sites <- 1:n
+  specaccum <- apply(perm, 1, mean)
+  sdaccum <- apply(perm, 1, sd)
+  out <- list(call = match.call(), method = method, sites = sites, richness = specaccum, sd = sdaccum, perm = perm)
+  class(out) <- "specaccum"
+  out
+}
\ No newline at end of file

Copied: pkg/man/pd.Rd (from rev 100, branches/gsoc/man/pd.Rd)
===================================================================
--- pkg/man/pd.Rd	                        (rev 0)
+++ pkg/man/pd.Rd	2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,32 @@
+\name{pd}
+\alias{pd}
+
+\title{ Calculate Faith's Phylogenetic Diversity }
+\description{
+  Calculate the sum of the total branch lengths for one or multiple samples.
+}
+\usage{
+pd(samp, tree)
+}
+
+\arguments{
+  \item{samp}{ Community data matrix }
+  \item{tree}{ A phylo tree object }
+}
+
+\value{
+	Returns a dataframe of the PD and species richness values for all samples
+}
+
+\note{This function does not calculate PD for samples with one or zero species. The data sets need not be species-community data sets but may be any sample
+  data set with an associated phylogeny. PD is not statistically independent of species richness, it positively correlates with species richness across samples.
+}
+
+
+\references{ Faith D.P. (1992) Conservation evaluation and phylogenetic diversity. Biological Conservation, 61, 1-10 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com}\cr
+         Jonathan Davies \email{davies at nceas.ucsb.edu}
+ }
+\seealso{ \code{\link{psr}} }
+
+\keyword{univar}

Copied: pkg/man/psd.Rd (from rev 100, branches/gsoc/man/psd.Rd)
===================================================================
--- pkg/man/psd.Rd	                        (rev 0)
+++ pkg/man/psd.Rd	2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,68 @@
+\name{psd}
+\alias{psd}
+\alias{psv.spp}
+\alias{psv}
+\alias{psr}
+\alias{pse}
+\alias{psc}
+\title{ Phylogenetic Species Diversity Metrics }
+\description{
+  Calculate the bounded phylogenetic biodiversity metrics: phylogenetic species variability, richness, evenness and clustering for one or multiple samples.
+}
+\usage{
+psv(samp,tree,compute.var=TRUE)
+psr(samp,tree,compute.var=TRUE)
+pse(samp,tree)
+psc(samp,tree)
+psd(samp,tree,compute.var=TRUE)
+psv.spp(samp,tree)
+}
+
+\arguments{
+  \item{samp}{ Community data matrix }
+  \item{tree}{ A phylo tree object or a phylogenetic covariance matrix }
+  \item{compute.var}{ Computes the expected variances for PSV and PSR for each community }
+}
+\details{
+ \emph{Phylogenetic species variability (PSV)} quantifies how phylogenetic relatedness decreases the variance of a hypothetical unselected/neutral trait
+ shared by all species in a community. The expected value of PSV is statistically independent of species richness, is one when all species in a sample
+ are unrelated (i.e., a star phylogeny) and approaches zero as species become more related. PSV is directly related to mean phylogenetic distance. The 
+ expected variance around PSV for any sample of a particular species richness can be approximated. 
+ To address how individual species contribute to the mean PSV of a data set, the function \code{psv.spp} gives signed proportions of the total deviation 
+ from the mean PSV that occurs when all species are removed from the data set one at a time. The absolute values of these \dQuote{species effects} 
+ tend to positively correlate with species prevalence. 
+ \cr
+ \cr
+ \emph{Phylogenetic species richness (PSR)} is the number of species in a sample multiplied by PSV. It can be considered the species richness of a sample
+ after discounting by species relatedness. The value is maximum at the species richness of the sample, and decreases towards zero as relatedness 
+ increases. The expected variance around PSR for any sample of a particular species richness can be approximated.
+ \cr
+ \cr
+ \emph{Phylogenetic species evenness (PSE)} is the metric PSV modified to incorporate relative species abundances. The maximum attainable value of PSE (i.e., 1)
+  occurs only if species abundances are equal and species phylogeny is a star. PSE essentially grafts each individual of a species onto the tip of the 
+  phylogeny of its species with branch lengths of zero.
+  \cr
+  \cr
+  \emph{Phylogenetic species clustering (PSC)} is a metric of the branch tip clustering of species across a sample's phylogeny. As PSC increases to 1, species
+   are less related to one another the tips of the phylogeny. PSC is directly related to mean nearest neighbor distance.
+}
+
+\value{
+	Returns a dataframe of the respective phylogenetic species diversity metric values  
+}
+
+\note{These metrics are bounded either between zero and one (PSV, PSE, PSC) or zero and species richness (PSR); but the metrics asymptotically 
+  approach zero as relatedness increases. Zero can be assigned to communities with less than two species, but conclusions drawn from assigning 
+  communities zero values need be carefully explored for any data set. The data sets need not be species-community data sets but may be any sample 
+  data set with an associated phylogeny. 
+}
+
+
+\references{ Helmus M.R., Bland T.J., Williams C.K. & Ives A.R. (2007) Phylogenetic measures of biodiversity. American Naturalist, 169, E68-E83 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com} }
+\seealso{ \code{\link{mpd}} ,\code{\link{mnnd}}, \code{\link{specaccum.psr}} }
+
+\keyword{univar}
+
+
+

Copied: pkg/man/specaccum.psr.Rd (from rev 100, branches/gsoc/man/specaccum.psr.Rd)
===================================================================
--- pkg/man/specaccum.psr.Rd	                        (rev 0)
+++ pkg/man/specaccum.psr.Rd	2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,37 @@
+\name{specaccum.psr}
+\alias{specaccum.psr}
+
+\title{ Phylogenetic Species Richness Sample-Based Rarefaction Curve }
+\description{
+  Finds a sample-based rarefaction curve for phylogentic species richness for a set of samples.
+}
+\usage{
+specaccum.psr(samp, tree, permutations = 100, method = "random", ...)
+}
+
+\arguments{
+  \item{samp}{ Community data matrix }
+  \item{tree}{ A phylo tree object or a phylogenetic covariance matrix}
+  \item{permutations}{ Number of permutations with method \code{method= "random"} }
+  \item{method}{ Species accumulation method, currently only \code{"random"} is supported which adds samples in random order. }
+  \item{\dots}{ Other parameters to functions }
+}
+
+\value{
+The function returns an object of class \code{"specaccum"} with items:
+
+\item{call}{ Function call. }
+\item{method}{ Accumulator method. }
+\item{sites}{ Number of sites/samples. }
+\item{richness}{ The mean phylogenetic species richness corresponding to number of sites/samples. }
+\item{sd}{ The standard deviation of phylogenetic apecies accumulation curve (or its standard error) estimated from permutations in \code{method = "random"}. }
+\item{perm}{ Permutation results with \code{method = "random"} and NULL in other cases. Each column in perm holds one permutation. }
+}
+
+\references{ Gotelli N.J. & Colwell R.K. (2001) Quantifying biodiversity: procedures and pitfalls in the measurement and comparison of species richness. Ecology Letters, 4, 379-391\cr
+              \cr Helmus M.R., Bland T.J., Williams C.K. & Ives A.R. (2007) Phylogenetic measures of biodiversity. American Naturalist, 169, E68-E83 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com}
+ }
+\seealso{ \code{\link{psr}}, \code{\link[vegan]{specaccum}}}
+
+\keyword{univar}