[Picante-commits] r101 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jun 11 23:27:54 CEST 2008
Author: skembel
Date: 2008-06-11 23:27:54 +0200 (Wed, 11 Jun 2008)
New Revision: 101
Added:
pkg/R/specaccum.psr.R
pkg/man/pd.Rd
pkg/man/psd.Rd
pkg/man/specaccum.psr.Rd
Modified:
pkg/R/phylodiversity.R
Log:
Merged gsoc branch into trunk. Huzzah.
Modified: pkg/R/phylodiversity.R
===================================================================
--- pkg/R/phylodiversity.R 2008-06-11 21:20:24 UTC (rev 100)
+++ pkg/R/phylodiversity.R 2008-06-11 21:27:54 UTC (rev 101)
@@ -123,3 +123,363 @@
data.frame(ntaxa=specnumber(samp),mnnd.obs, mnnd.rand.mean, mnnd.rand.sd, mnnd.obs.rank,
mnnd.obs.z, mnnd.obs.p=mnnd.obs.rank/(runs+1),runs=runs, row.names = row.names(samp))
}
+
+psv<-function(samp,tree,compute.var=TRUE){
+ # Make samp matrix a pa matrix
+ samp[samp>0]<-1
+
+ flag=0
+ if (is.null(dim(samp))) #if the samp matrix only has one site
+ {
+ samp<-rbind(samp,samp)
+ flag=2
+ }
+
+ if(is(tree)[1]=="phylo")
+ {
+ tree<-prune.sample(samp,tree)
+ # Make sure that the species line up
+ samp<-samp[,tree$tip.label]
+ # Make a correlation matrix of the species pool phylogeny
+ Cmatrix<-vcv.phylo(tree,cor=TRUE)
+ } else {
+ Cmatrix<-tree
+ species<-colnames(samp)
+ preval<-colSums(samp)/sum(samp)
+ species<-species[preval>0]
+ Cmatrix<-Cmatrix[species,species]
+ samp<-samp[,colnames(Cmatrix)]
+ }
+
+ # reduce given Cmatrix to the species observed in samp
+ SR<-rowSums(samp)
+ samp<-samp[SR>1,] # prune out locations with <2 species
+ SR<-SR[SR>1]
+
+ #cull the species that are not found in the samp set after all communities with 1 species are removed
+ preval<-colSums(samp)/sum(samp)
+ indexcov<-preval>0
+ Cmatrix<-Cmatrix[indexcov,indexcov]
+ samp<-samp[,indexcov]
+
+ # numbers of locations and species
+ nlocations<-dim(samp)[1]
+ nspecies<-dim(samp)[2]
+
+ ##################################
+ # calculate observed PSVs
+ #
+ PSVs<-NULL
+
+ for(i in 1:nlocations)
+ {
+ index<-seq(1:nrow(Cmatrix))[samp[i,]==1] #species present
+ n<-length(index) #number of species present
+ C<-Cmatrix[index,index] #C for individual locations
+ PSV<-(n*sum(diag(as.matrix(C)))-sum(C))/(n*(n-1))
+ PSVs<-c(PSVs,PSV)
+ }
+ PSVout<-cbind(PSVs,SR)
+
+ if(flag==2)
+ {
+ PSVout<-PSVout[-2,]
+ return(PSVout)
+ } else {
+ if(compute.var==FALSE)
+ {
+ return(data.frame(PSVout))
+ } else {
+
+ PSVvar<-NULL
+ X<-Cmatrix-(sum(sum(Cmatrix-diag(nspecies))))/(nspecies*(nspecies-1));
+ X<-X-diag(diag(X))
+
+ SS1<-sum(X*X)/2
+
+ SS2<-0;
+ for(i in 1:(nspecies-1)){
+ for(j in (i+1):nspecies){
+ SS2<-SS2+X[i,j]*(sum(X[i,])-X[i,j])
+ }
+ }
+ SS3<--SS1-SS2
+
+ S1<-SS1*2/(nspecies*(nspecies-1))
+ S2<-SS2*2/(nspecies*(nspecies-1)*(nspecies-2))
+
+ if(nspecies==3){
+ S3<-0
+ }else{
+ S3<-SS3*2/(nspecies*(nspecies-1)*(nspecies-2)*(nspecies-3))
+ }
+
+ for(n in 2:nspecies){
+ approxi<-2/(n*(n-1))*(S1 + (n-2)*S2 + (n-2)*(n-3)*S3)
+ PSVvar<-rbind(PSVvar, c(n, approxi))
+ }
+
+ vars<-rep(0,nlocations)
+ PSVout<-cbind(PSVout,vars)
+
+ for (g in 1:nlocations)
+ {
+ PSVout[g,3]<-PSVvar[PSVout[g,2]-1,2]
+ }
+ return(data.frame(PSVout))
+ }
+ }
+}
+
+
+psr <- function(samp,tree,compute.var=TRUE){
+ PSVout<-psv(samp,tree,compute.var)
+ if(is.null(dim(PSVout))==TRUE)
+ {
+ PSRout<-data.frame(cbind(PSVout[1]*PSVout[2],PSVout[2]))
+ names(PSRout)<-c("PSR","SR")
+ rownames(PSRout)<-""
+ return(PSRout)
+ } else {
+ PSRout<-cbind(PSVout[,1]*PSVout[,2],PSVout[,2])
+ if(compute.var!=TRUE) {
+ colnames(PSRout)<-c("PSR","SR")
+ return(data.frame(PSRout))
+ } else {
+ PSRout<-cbind(PSRout,PSVout[,3]*(PSVout[,2])^2)
+ colnames(PSRout)<-c("PSR","SR","vars")
+ return(data.frame(PSRout))
+ }
+ }
+}
+
+pse<-function(samp,tree){
+ flag=0
+ if (is.null(dim(samp))) #if the samp matrix only has one site
+ {
+ samp<-rbind(samp,samp)
+ flag=2
+ }
+
+ if(is(tree)[1]=="phylo")
+ {
+ tree<-prune.sample(samp,tree)
+ # Make sure that the species line up
+ samp<-samp[,tree$tip.label]
+ # Make a correlation matrix of the species pool phylogeny
+ Cmatrix<-vcv.phylo(tree,cor=TRUE)
+ } else {
+ Cmatrix<-tree
+ species<-colnames(samp)
+ preval<-colSums(samp)/sum(samp)
+ species<-species[preval>0]
+ Cmatrix<-Cmatrix[species,species]
+ samp<-samp[,colnames(Cmatrix)]
+ }
+
+ # reduce given Cmatrix to the species observed in samp
+ SR<-apply(samp>0,1,sum)
+ # prune out locations with <2 species
+ samp<-samp[SR>1,]
+ SR<-SR[SR>1]
+
+ #cull the species that are not found in the samp set after all communities with 1 species are removed
+ preval<-colSums(samp)/sum(samp)
+ indexcov<-preval>0
+ Cmatrix<-Cmatrix[indexcov,indexcov]
+ samp<-samp[,indexcov]
+
+ # numbers of locations and species
+ nlocations<-dim(samp)[1]
+ nspecies<-dim(samp)[2]
+
+ #################################
+ # calculate observed phylogenetic species evenness
+ PSEs<-NULL
+ for(i in 1:nlocations){
+ index<-seq(1,ncol(Cmatrix))[samp[i,]>0] # species present
+ C<-Cmatrix[index,index] #C for individual locations
+ n<-length(index) #location species richness
+ N<-sum(samp[i,]) #location total abundance
+ M<-samp[i,samp[i,]>0] #species abundance column
+ mbar<-mean(M) #mean species abundance
+ PSE<-(N*t(diag(as.matrix(C)))%*%M-t(M)%*%as.matrix(C)%*%M)/(N^2-N*mbar) #phylogenetic species evenness
+ PSEs<-c(PSEs,PSE)
+ }
+ PSEout=cbind(PSEs,SR)
+ if (flag==2)
+ {
+ PSEout<-PSEout[-2,]
+ return(PSEout)
+ } else {
+ return(data.frame(PSEout))
+ }
+}
+
+psc<-function(samp,tree){
+ # Make samp matrix a pa matrix
+ samp[samp>0]<-1
+ flag=0
+ if (is.null(dim(samp))) #if the samp matrix only has one site
+ {
+ samp<-rbind(samp,samp)
+ flag=2
+ }
+
+ if(is(tree)[1]=="phylo")
+ {
+ tree<-prune.sample(samp,tree)
+ # Make sure that the species line up
+ samp<-samp[,tree$tip.label]
+ # Make a correlation matrix of the species pool phylogeny
+ Cmatrix<-vcv.phylo(tree,cor=TRUE)
+ } else {
+ Cmatrix<-tree
+ species<-colnames(samp)
+ preval<-colSums(samp)/sum(samp)
+ species<-species[preval>0]
+ Cmatrix<-Cmatrix[species,species]
+ samp<-samp[,colnames(Cmatrix)]
+ }
+
+ # reduce given Cmatrix to the species observed in samp
+ SR<-rowSums(samp)
+ samp<-samp[SR>1,] # prune out locations with <2 species
+ SR<-SR[SR>1]
+
+ #cull the species that are not found in the samp set after all communities with 1 species are removed
+ preval<-colSums(samp)/sum(samp)
+ indexcov<-preval>0
+ Cmatrix<-Cmatrix[indexcov,indexcov]
+ samp<-samp[,indexcov]
+
+ # numbers of locations and species
+ nlocations<-dim(samp)[1]
+ nspecies<-dim(samp)[2]
+
+ ##################################
+ # calculate observed PSCs
+ #
+ PSCs<-NULL
+
+ for(i in 1:nlocations)
+ {
+ index<-seq(1:nrow(Cmatrix))[samp[i,]==1] #species present
+ n<-length(index) #number of species present
+ C<-Cmatrix[index,index] #C for individual locations
+ diag(C)<--1
+ PSC<-sum(apply(C,1,max))/n
+ PSCs<-c(PSCs,PSC)
+ }
+ PSCout<-cbind(PSCs,SR)
+ if (flag==2)
+ {
+ PSCout<-PSCout[-2,]
+ return(PSCout)
+ } else {
+ return(data.frame(PSCout))
+ }
+}
+
+psv.spp<-function(samp,tree){
+ # Make samp matrix a pa matrix
+ samp[samp>0]<-1
+ if (is.null(dim(samp))) #if the samp matrix only has one site
+ {
+ samp<-rbind(samp,samp)
+ }
+ if(is(tree)[1]=="phylo")
+ {
+ tree<-prune.sample(samp,tree)
+ # Make sure that the species line up
+ samp<-samp[,tree$tip.label]
+ # Make a correlation matrix of the species pool phylogeny
+ Cmatrix<-vcv.phylo(tree,cor=TRUE)
+ } else {
+ Cmatrix<-tree
+ species<-colnames(samp)
+ preval<-colSums(samp)/sum(samp)
+ species<-species[preval>0]
+ Cmatrix<-Cmatrix[species,species]
+ samp<-samp[,colnames(Cmatrix)]
+ }
+ # reduce given Cmatrix to the species observed in samp
+ samp<-samp[rowSums(samp)>1,] # prune out locations with <2 species
+
+ #cull the species that are not found in the samp set after all communities with 1 species are removed
+ preval<-colSums(samp)/sum(samp)
+ indexcov<-preval>0
+ Cmatrix<-Cmatrix[indexcov,indexcov]
+ samp<-samp[,indexcov]
+
+ obs.PSV<-mean(psv(samp,Cmatrix,compute.var=FALSE)[,1])
+
+ # numbers of locations and species
+ nlocations<-dim(samp)[1]
+ nspecies<-dim(samp)[2]
+
+ spp.PSVs<-NULL
+ for (j in 1:nspecies)
+ {
+ spp.samp<-samp[,-j]
+ spp.Cmatrix<-Cmatrix[-j,-j]
+ spp.PSV<-mean(psv(spp.samp,spp.Cmatrix,compute.var=FALSE)[,1])
+ spp.PSVs<-c(spp.PSVs,spp.PSV)
+ }
+ spp.PSVout<-(spp.PSVs-obs.PSV)/sum(abs(spp.PSVs-obs.PSV))
+ names(spp.PSVout)<-colnames(samp)
+ return(spp.PSVout)
+}
+
+psd<-function(samp,tree,compute.var=TRUE){
+ if (is.null(dim(samp))) #if the samp matrix only has one site
+ {
+ PSDout<-data.frame(c(psv(samp,tree,compute.var)[1],psc(samp,tree)[1],psr(samp,tree,compute.var)[1],pse(samp,tree)))
+ names(PSDout)<-c("PSV","PSC","PSR","PSE","SR")
+ return(PSDout)
+ } else {
+ if (compute.var==TRUE)
+ {
+ PSDout<-cbind(psv(samp,tree,compute.var)[,c(1,3)],psc(samp,tree)[,1],psr(samp,tree,compute.var)[,c(1,3)],pse(samp,tree))
+ colnames(PSDout)<-c("PSV","var.PSV","PSC","PSR","var.PSR","PSE","SR")
+ } else {
+ PSDout<-cbind(psv(samp,tree,compute.var)[,1],psc(samp,tree)[,1],psr(samp,tree,compute.var)[,1],pse(samp,tree))
+ colnames(PSDout)<-c("PSV","PSC","PSR","PSE","SR")
+ }
+ return(data.frame(PSDout))
+ }
+}
+
+pd<-function(samp,tree){
+
+ # Make sure that the species line up
+ tree<-prune.sample(samp,tree)
+ samp<-samp[,tree$tip.label]
+ species<-colnames(samp)
+
+ # reduce given Cmatrix to the species observed in samp
+ SR<-rowSums(samp)
+ samp<-samp[SR>1,] # prune out locations with <2 species
+ SR<-SR[SR>1]
+
+ # numbers of locations and species
+ nlocations=dim(samp)[1]
+ nspecies=dim(samp)[2]
+
+ ##################################
+ # calculate observed PDs
+ #
+ PDs=NULL
+
+ for(i in 1:nlocations)
+ {
+ index<-species[samp[i,]==0] #species not in sample
+ sub.tree<-drop.tip(tree,index)
+ PDs<-c(PDs,sum(sub.tree$edge.length))
+ }
+
+ PDout<-data.frame(cbind(PDs,SR))
+ rownames(PDout)<-rownames(samp)
+ return(PDout)
+}
+
Copied: pkg/R/specaccum.psr.R (from rev 100, branches/gsoc/R/specaccum.psr.R)
===================================================================
--- pkg/R/specaccum.psr.R (rev 0)
+++ pkg/R/specaccum.psr.R 2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,43 @@
+specaccum.psr<-function (samp, tree, permutations = 100, method = "random", ...)
+{
+
+#function adapted from the vegan package specaccum
+
+ x <- as.matrix(samp)
+ n <- nrow(x)
+ p <- ncol(x)
+ if (p == 1)
+ {
+ x <- t(x)
+ n <- nrow(x)
+ p <- ncol(x)
+ }
+ accumulator <- function(x,ind,tree)
+ {
+ n <- nrow(x)
+ p <- ncol(x)
+ xx<-x
+ xx[1:n,1:p]<-0
+ xx[apply(x[ind, ], 2, cumsum)>0]<-1
+ PSV<-psv(xx,tree,compute.var=FALSE)
+ PSV[,1]*PSV[,2]
+ }
+ METHODS <- c("collector", "random", "exact", "rarefaction",
+ "coleman")
+ method <- match.arg(method, METHODS)
+
+ specaccum <- sdaccum <- sites <- perm <- NULL
+ perm <- array(dim = c(n, permutations))
+ for (i in 1:permutations)
+ {
+ r.x=0
+ while(length(r.x)<n){r.x <- accumulator(x, sample(n),tree)}
+ perm[, i]<-r.x
+ }
+ sites <- 1:n
+ specaccum <- apply(perm, 1, mean)
+ sdaccum <- apply(perm, 1, sd)
+ out <- list(call = match.call(), method = method, sites = sites, richness = specaccum, sd = sdaccum, perm = perm)
+ class(out) <- "specaccum"
+ out
+}
\ No newline at end of file
Copied: pkg/man/pd.Rd (from rev 100, branches/gsoc/man/pd.Rd)
===================================================================
--- pkg/man/pd.Rd (rev 0)
+++ pkg/man/pd.Rd 2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,32 @@
+\name{pd}
+\alias{pd}
+
+\title{ Calculate Faith's Phylogenetic Diversity }
+\description{
+ Calculate the sum of the total branch lengths for one or multiple samples.
+}
+\usage{
+pd(samp, tree)
+}
+
+\arguments{
+ \item{samp}{ Community data matrix }
+ \item{tree}{ A phylo tree object }
+}
+
+\value{
+ Returns a dataframe of the PD and species richness values for all samples
+}
+
+\note{This function does not calculate PD for samples with one or zero species. The data sets need not be species-community data sets but may be any sample
+ data set with an associated phylogeny. PD is not statistically independent of species richness, it positively correlates with species richness across samples.
+}
+
+
+\references{ Faith D.P. (1992) Conservation evaluation and phylogenetic diversity. Biological Conservation, 61, 1-10 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com}\cr
+ Jonathan Davies \email{davies at nceas.ucsb.edu}
+ }
+\seealso{ \code{\link{psr}} }
+
+\keyword{univar}
Copied: pkg/man/psd.Rd (from rev 100, branches/gsoc/man/psd.Rd)
===================================================================
--- pkg/man/psd.Rd (rev 0)
+++ pkg/man/psd.Rd 2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,68 @@
+\name{psd}
+\alias{psd}
+\alias{psv.spp}
+\alias{psv}
+\alias{psr}
+\alias{pse}
+\alias{psc}
+\title{ Phylogenetic Species Diversity Metrics }
+\description{
+ Calculate the bounded phylogenetic biodiversity metrics: phylogenetic species variability, richness, evenness and clustering for one or multiple samples.
+}
+\usage{
+psv(samp,tree,compute.var=TRUE)
+psr(samp,tree,compute.var=TRUE)
+pse(samp,tree)
+psc(samp,tree)
+psd(samp,tree,compute.var=TRUE)
+psv.spp(samp,tree)
+}
+
+\arguments{
+ \item{samp}{ Community data matrix }
+ \item{tree}{ A phylo tree object or a phylogenetic covariance matrix }
+ \item{compute.var}{ Computes the expected variances for PSV and PSR for each community }
+}
+\details{
+ \emph{Phylogenetic species variability (PSV)} quantifies how phylogenetic relatedness decreases the variance of a hypothetical unselected/neutral trait
+ shared by all species in a community. The expected value of PSV is statistically independent of species richness, is one when all species in a sample
+ are unrelated (i.e., a star phylogeny) and approaches zero as species become more related. PSV is directly related to mean phylogenetic distance. The
+ expected variance around PSV for any sample of a particular species richness can be approximated.
+ To address how individual species contribute to the mean PSV of a data set, the function \code{psv.spp} gives signed proportions of the total deviation
+ from the mean PSV that occurs when all species are removed from the data set one at a time. The absolute values of these \dQuote{species effects}
+ tend to positively correlate with species prevalence.
+ \cr
+ \cr
+ \emph{Phylogenetic species richness (PSR)} is the number of species in a sample multiplied by PSV. It can be considered the species richness of a sample
+ after discounting by species relatedness. The value is maximum at the species richness of the sample, and decreases towards zero as relatedness
+ increases. The expected variance around PSR for any sample of a particular species richness can be approximated.
+ \cr
+ \cr
+ \emph{Phylogenetic species evenness (PSE)} is the metric PSV modified to incorporate relative species abundances. The maximum attainable value of PSE (i.e., 1)
+ occurs only if species abundances are equal and species phylogeny is a star. PSE essentially grafts each individual of a species onto the tip of the
+ phylogeny of its species with branch lengths of zero.
+ \cr
+ \cr
+ \emph{Phylogenetic species clustering (PSC)} is a metric of the branch tip clustering of species across a sample's phylogeny. As PSC increases to 1, species
+ are less related to one another the tips of the phylogeny. PSC is directly related to mean nearest neighbor distance.
+}
+
+\value{
+ Returns a dataframe of the respective phylogenetic species diversity metric values
+}
+
+\note{These metrics are bounded either between zero and one (PSV, PSE, PSC) or zero and species richness (PSR); but the metrics asymptotically
+ approach zero as relatedness increases. Zero can be assigned to communities with less than two species, but conclusions drawn from assigning
+ communities zero values need be carefully explored for any data set. The data sets need not be species-community data sets but may be any sample
+ data set with an associated phylogeny.
+}
+
+
+\references{ Helmus M.R., Bland T.J., Williams C.K. & Ives A.R. (2007) Phylogenetic measures of biodiversity. American Naturalist, 169, E68-E83 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com} }
+\seealso{ \code{\link{mpd}} ,\code{\link{mnnd}}, \code{\link{specaccum.psr}} }
+
+\keyword{univar}
+
+
+
Copied: pkg/man/specaccum.psr.Rd (from rev 100, branches/gsoc/man/specaccum.psr.Rd)
===================================================================
--- pkg/man/specaccum.psr.Rd (rev 0)
+++ pkg/man/specaccum.psr.Rd 2008-06-11 21:27:54 UTC (rev 101)
@@ -0,0 +1,37 @@
+\name{specaccum.psr}
+\alias{specaccum.psr}
+
+\title{ Phylogenetic Species Richness Sample-Based Rarefaction Curve }
+\description{
+ Finds a sample-based rarefaction curve for phylogentic species richness for a set of samples.
+}
+\usage{
+specaccum.psr(samp, tree, permutations = 100, method = "random", ...)
+}
+
+\arguments{
+ \item{samp}{ Community data matrix }
+ \item{tree}{ A phylo tree object or a phylogenetic covariance matrix}
+ \item{permutations}{ Number of permutations with method \code{method= "random"} }
+ \item{method}{ Species accumulation method, currently only \code{"random"} is supported which adds samples in random order. }
+ \item{\dots}{ Other parameters to functions }
+}
+
+\value{
+The function returns an object of class \code{"specaccum"} with items:
+
+\item{call}{ Function call. }
+\item{method}{ Accumulator method. }
+\item{sites}{ Number of sites/samples. }
+\item{richness}{ The mean phylogenetic species richness corresponding to number of sites/samples. }
+\item{sd}{ The standard deviation of phylogenetic apecies accumulation curve (or its standard error) estimated from permutations in \code{method = "random"}. }
+\item{perm}{ Permutation results with \code{method = "random"} and NULL in other cases. Each column in perm holds one permutation. }
+}
+
+\references{ Gotelli N.J. & Colwell R.K. (2001) Quantifying biodiversity: procedures and pitfalls in the measurement and comparison of species richness. Ecology Letters, 4, 379-391\cr
+ \cr Helmus M.R., Bland T.J., Williams C.K. & Ives A.R. (2007) Phylogenetic measures of biodiversity. American Naturalist, 169, E68-E83 }
+\author{ Matthew Helmus \email{mrhelmus at gmail.com}
+ }
+\seealso{ \code{\link{psr}}, \code{\link[vegan]{specaccum}}}
+
+\keyword{univar}
More information about the Picante-commits
mailing list