[Picante-commits] r213 - in pkg: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 11 02:33:54 CET 2010


Author: skembel
Date: 2010-02-11 02:33:54 +0100 (Thu, 11 Feb 2010)
New Revision: 213

Added:
   pkg/R/data.checking.R
   pkg/R/raoD.R
   pkg/man/data.checking.Rd
   pkg/man/raoD.Rd
Log:
Added data checking functions and Rao's quadratic entropy function

Added: pkg/R/data.checking.R
===================================================================
--- pkg/R/data.checking.R	                        (rev 0)
+++ pkg/R/data.checking.R	2010-02-11 01:33:54 UTC (rev 213)
@@ -0,0 +1,100 @@
+match.phylo.comm <- function(phy, comm) {
+
+    if(!(is.data.frame(comm) | is.matrix(comm))) {
+        stop("Community data should be a data.frame or matrix with samples in rows and taxa in columns")
+    }
+
+    res <- list()
+
+    phytaxa <- phy$tip.label
+    commtaxa <- colnames(comm)
+
+    if(is.null(commtaxa)) {
+        stop("Community data set lacks taxa (column) names, these are required to match phylogeny and community data")
+    }
+
+    if(!all(commtaxa %in% phytaxa)) {
+        print("Dropping taxa from the community because they are not present in the phylogeny:")
+        print(setdiff(commtaxa,phytaxa))
+        comm <- comm[,intersect(commtaxa,phytaxa)]
+        commtaxa <- colnames(comm)
+    }
+    
+    if(any(!(phytaxa %in% commtaxa))) {
+        print("Dropping tips from the tree because they are not present in the community data:")
+        print(setdiff(phytaxa,commtaxa))
+        res$phy <- prune.sample(comm, phy)
+    } else {
+        res$phy <- phy
+    }
+    
+    res$comm <- comm[,res$phy$tip.label]
+    return(res)
+
+}
+
+match.phylo.data <- function(phy, data) {
+
+    res <- list()
+
+    phytaxa <- phy$tip.label
+
+    if(is.vector(data)) {
+        datataxa <- names(data)
+    
+        if(is.null(datataxa)) {
+            warning("Data set lacks taxa names, these are required to match phylogeny and data. Data are returned unsorted. Assuming that data and phy$tip.label are in the same order!")
+            return(list(phy=phy,data=data))
+        }
+    
+        if(!all(datataxa %in% phytaxa)) {
+            print("Dropping taxa from the data because they are not present in the phylogeny:")
+            print(setdiff(datataxa,phytaxa))
+            data <- data[intersect(datataxa,phytaxa)]
+            datataxa <- names(data)
+        }
+        
+        if(any(!(phytaxa %in% datataxa))) {
+            print("Dropping tips from the tree because they are not present in the data:")
+            print(setdiff(phytaxa,datataxa))
+            res$phy <- drop.tip(phy, setdiff(phytaxa,datataxa))
+        } else {
+            res$phy <- phy 
+        }
+        
+        res$data <- data[res$phy$tip.label]
+        return(res)   
+        
+    } else if(is.data.frame(data) | is.matrix(data)) {
+        dataclass <- class(data)
+        data <- as.matrix(data)
+
+        datataxa <- rownames(data)
+    
+        if(is.null(datataxa)) {
+            warning("Data set lacks taxa (row) names, these are required to match phylogeny and data. Data are returned unsorted. Assuming that data rows and phy$tip.label are in the same order!")
+            return(list(phy=phy,data=data))
+        }
+    
+        if(!all(datataxa %in% phytaxa)) {
+            print("Dropping taxa from the data because they are not present in the phylogeny:")
+            print(setdiff(datataxa,phytaxa))
+            data <- data[intersect(datataxa,phytaxa),]
+            datataxa <- rownames(data)
+        }
+        
+        if(any(!(phytaxa %in% datataxa))) {
+            print("Dropping tips from the tree because they are not present in the data:")
+            print(setdiff(phytaxa,datataxa))
+            res$phy <- drop.tip(phy, setdiff(phytaxa,datataxa))
+        } else {
+            res$phy <- phy
+        }
+        
+        res$data <- data[res$phy$tip.label,]
+        return(res)   
+             
+    } else {
+        stop("Data must be a vector, data.frame, or matrix")
+    }
+}
\ No newline at end of file

Added: pkg/R/raoD.R
===================================================================
--- pkg/R/raoD.R	                        (rev 0)
+++ pkg/R/raoD.R	2010-02-11 01:33:54 UTC (rev 213)
@@ -0,0 +1,62 @@
+raoD <- function (comm, phy=NULL)
+{
+    res <- list()
+
+	if (is.null(phy)) {
+	    tij <- 1-diag(x=rep(1,length(comm[1,])))
+	} else {
+	    if (!is.ultrametric(phy)) {
+	        stop("Phylogeny must be ultrametric")
+	    }
+	    dat <- match.phylo.comm(phy, comm)
+	    comm <- dat$comm
+	    phy <- dat$phy
+	    tij <- cophenetic(phy) / 2
+	}
+
+    x <- as.matrix(comm)    
+    S <- length(x[1,])
+    N <- length(x[,1])
+    total <- apply(x, 1, sum)
+    samp.relabund <- total / sum(x)
+    x.combined <- matrix(apply(x, 2, sum), nrow=1) / sum(x)
+    x <- sweep(x, 1, total, "/")
+
+	D <- vector(length=N)
+    names(D) <- rownames(x)
+	for (k in 1:N)
+		D[k] <-
+			sum ( tij * outer(as.vector(t(x[k,])),as.vector(t(x[k,]))) )
+    res$Dkk <- D
+
+	H <- matrix(nrow=N,ncol=N)    
+	for (k in 1:N) {
+		for (l in 1:N) {
+			H[k,l] <-
+			sum ( tij * outer(as.vector(t(x[k,])),as.vector(t(x[l,]))) )
+		}
+	}
+
+    row.names(H) <- row.names(x)
+    colnames(H) <- row.names(x)
+	Dkl <- H
+	res$Dkl <- Dkl
+
+	for (k in 1:N) {
+		for (l in 1:N) {
+			H[k,l] <- Dkl[k,l] - (Dkl[k,k] + Dkl[l,l]) / 2
+		}
+	}
+    res$H <- H
+    
+    res$total <- sum ( tij * outer(as.vector(t(x.combined)),as.vector(t(x.combined))) )
+
+    res$alpha <- sum(res$D * samp.relabund)
+
+    res$beta <- res$total - res$alpha
+    
+    res$Fst <- res$beta / res$total
+    
+    return(res)
+    
+}

Added: pkg/man/data.checking.Rd
===================================================================
--- pkg/man/data.checking.Rd	                        (rev 0)
+++ pkg/man/data.checking.Rd	2010-02-11 01:33:54 UTC (rev 213)
@@ -0,0 +1,43 @@
+\name{match.phylo.data}
+\alias{match.phylo.data}
+\alias{match.phylo.comm}
+
+\title{ Match taxa in phylogeny and data }
+\description{
+  These functions compare taxa present in phylogenies with community or trait data sets, pruning and sorting the two kinds of data to match one another for subsequent analysis.
+}
+\usage{
+match.phylo.comm(phy, comm)
+match.phylo.data(phy, data)
+}
+
+\arguments{
+  \item{ phy }{ A phylogeny object of class phylo }
+  \item{ comm }{ Community data matrix }
+  \item{ data }{ A data object - a vector (with names matching phy) or a data.frame or matrix (with row names matching phy) }
+}
+
+\value{
+  A list containing the following elements, pruned and sorted to match one another:
+  \item{phy}{ A phylogeny object of class phylo }
+  \item{comm}{ Community data matrix }
+  \item{data}{ A data object (vector, data.frame or matrix) }  
+}
+\details{
+A common pitfall in comparative analyses in R is that taxa labels are assumed to match between phylogenetic and other data sets. These functions prune a phylogeny and community or trait data set to match one another, reporting taxa that are missing from one data set or the other.
+
+Taxa names for phylogeny objects are taken from the phylogeny's tip labels. Taxa names for community data are taken from the column names. Taxa names for trait data are taken from the element names (vector) or row names (data.frame or matrix).
+
+If community data lack taxa names, the function will issue a warning and no result will be returned, since the community-phylogenetic analyses in \code{picante} require named taxa in the community data set.
+
+If trait data lack names, a warning is issued and the data are assumed to be sorted in the same order as the phylogeny's tip labels.
+
+These utility functions are used by several of the functions that assume taxa labels in phylogeny and data match, including \code{\link{Kcalc}}, \code{\link{phylosignal}}, and \code{\link{raoD}}.
+}
+\author{ Steven Kembel <skembel at uoregon.edu> }
+\examples{
+data(phylocom)
+match.phylo.comm(phylocom$phylo, phylocom$sample)
+match.phylo.data(phylocom$phylo, phylocom$traits[1:10,])
+}
+\keyword{univar}

Added: pkg/man/raoD.Rd
===================================================================
--- pkg/man/raoD.Rd	                        (rev 0)
+++ pkg/man/raoD.Rd	2010-02-11 01:33:54 UTC (rev 213)
@@ -0,0 +1,73 @@
+\name{raoD}
+\alias{raoD}
+
+\title{ Rao's quadratic entropy }
+\description{
+  Calculates Rao's quadratic entropy, a measure of within- and among-community diversity taking species dissimilarities into account
+}
+\usage{
+  raoD(comm, phy=NULL)
+}
+
+\arguments{
+  \item{ comm }{ Community data matrix }
+  \item{ phy }{ Object of class phylo - an ultrametric phylogenetic tree (optional) }
+}
+
+\value{
+  A list of results
+  \item{ Dkk }{ Within-community diversity }
+  \item{ Dkl }{ Among-community diversity }
+  \item{ H }{ Among-community diversities excluding within-community diversity }
+  \item{ total }{ Total diversity across all samples }
+  \item{ alpha }{ Alpha diversity - average within-community diversity }
+  \item{ beta }{ Beta diversity - average among-community diversity }
+  \item{ Fst }{ Beta diversity / total diversity }
+}
+
+\details{
+Rao's quadratic entropy (Rao 1982) is a measure of diversity in ecological communities that can optionally take species differences (e.g. phylogenetic dissimilarity) into account. This method is conceptually similar to analyses of genetic diversity among populations (Nei 1973), but instead of diversity of alleles among populations, it measures diversity of species among communities. 
+
+If no phylogeny is supplied, Dkk is equivalent to Simpson's diversity (probability that two individuals drawn from a community are from different taxa), Dkl is a beta-diversity equivalent of Simpson's diversity (probability that individuals drawn from each of two communities belong to different taxa), and H is Dkl standardized to account for within-community diversity.
+
+If an ultrametric phylogeny is supplied, Dkk is equivalent to the mean pairwise phylogenetic distance (distance to MRCA) between two individuals from different species drawn from a community, Dkl is the mean pairwise phylogenetic distance between individuals drawn from each of two communities, and H is Dkl standardized to account for within-community diversity.
+
+\emph{
+    D[kl] = sum(t[ij] * x[ki] * x[lj])
+    
+        where x[ki] is the relative abundance of taxon i in community k
+        
+        t[ij] is a matrix of weights for all pairs of taxa i,j
+        
+            Without a phylogeny, when i=j, t[ij]=0, otherwise t[ij]=1
+            
+            With a phylogeny, t[ij] is the phylogenetic distance to MRCA for taxa i and j
+            
+    H[kl] = D[kl] - (D[kk] + D[ll])/2
+}
+
+Alpha, beta and total measure the average diversity within, among, and across all communities based on Dkk and H values taking variation in number of individuals per community into account. A Fst-like measure is calculated by dividing beta by the total diversity across all samples.
+}
+
+\section{ Warning }{
+Alpha, beta, and total diversity components and Fst should not be interpreted as a measure of relative differentiation among versus within communities. See Jost (2007) for a detailed description of this problem. Hardy and Jost (2008) suggest Fst can be interpreted as 'local species identity excess' or 'local phylogenetic similarity excess' rather than as a measure of among-community differentiation.
+}
+\references{
+Hardy, O.J., and Jost. L. 2008. Interpreting and estimating measures of community phylogenetic structuring. J. Ecol. 96:849-852.
+
+Jost, L. 2007. Partitioning diversity into independent alpha and beta components. Ecology 88: 24272439.
+
+Nei, M. 1973. Analysis of gene diversity in sub-divided populations. Proceedings of the National Academy of Sciences of the USA 70:3321-3323.
+
+Rao, C.R. 1982. Diversity and dissimilarity coefficients: a unified approach. Theoretical Population Biology 21:2443.
+
+Webb, C.O., Ackerly, D.D., and Kembel, S.W. 2008. Phylocom: software for the analysis of phylogenetic community structure and trait evolution. Version 4.0.1. \url{http://www.phylodiversity.net/phylocom/}.
+}
+\seealso{ \code{\link{mpd}}, \code{\link{comdist}}  }
+\author{ Steven Kembel <skembel at uoregon.edu> }
+\examples{
+data(phylocom)
+raoD(phylocom$sample)
+raoD(phylocom$sample, phylocom$phylo)
+}
+\keyword{univar}
\ No newline at end of file



More information about the Picante-commits mailing list