[Picante-commits] r216 - / pkg/R pkg/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Feb 12 01:00:20 CET 2010
Author: skembel
Date: 2010-02-12 01:00:20 +0100 (Fri, 12 Feb 2010)
New Revision: 216
Added:
pkg/man/unifrac.Rd
Modified:
TODO
pkg/R/evol.distinct.R
pkg/R/phylobeta.R
pkg/R/tax.distinctiveness.R
pkg/man/ses.mpd.Rd
Log:
Added UniFrac function, cleaning up code and documentation in preparation for 0.8 release
Modified: TODO
===================================================================
--- TODO 2010-02-11 22:56:05 UTC (rev 215)
+++ TODO 2010-02-12 00:00:20 UTC (rev 216)
@@ -1,6 +1,5 @@
TODO list for picante package
====
--data name checking and matching - currently many functions expect data to have names that match e.g. phylo$tip.labels, should handle/check lack of names and warn. Write helper function.
-reinstate function to call out to phylocom executable and read results back in to R
-allow trait data to be a vector or a data.frame (single or multiple columns)
-implement contribution index, nodesig, and similar node-based statistics from phylocom
Modified: pkg/R/evol.distinct.R
===================================================================
--- pkg/R/evol.distinct.R 2010-02-11 22:56:05 UTC (rev 215)
+++ pkg/R/evol.distinct.R 2010-02-12 00:00:20 UTC (rev 216)
@@ -6,6 +6,8 @@
evol.distinct<- function(tree, type=c("equal.splits", "fair.proportion"), scale=FALSE, use.branch.lengths=TRUE){
+type <- match.arg(type)
+
if(is.rooted(tree)==FALSE)
warning("A rooted phylogeny is required for meaningful output of this function", call.=FALSE)
Modified: pkg/R/phylobeta.R
===================================================================
--- pkg/R/phylobeta.R 2010-02-11 22:56:05 UTC (rev 215)
+++ pkg/R/phylobeta.R 2010-02-12 00:00:20 UTC (rev 216)
@@ -34,16 +34,16 @@
#mean distance to closest relative between taxa from two communities
comdistnt <- function(comm, dis, abundance.weighted=FALSE, exclude.conspecifics=FALSE) {
- N <- dim(comm)[1]
dat <- match.comm.dist(comm, dis)
comm <- dat$comm
dis <- dat$dist
- comm <- decostand(comm,method="total",MARGIN=1)
+ N <- dim(comm)[1]
+ comm <- decostand(comm, method="total", MARGIN=1)
comdisnt <- matrix(nrow=N,ncol=N)
for (i in 1:(N-1)) {
for (j in (i+1):N) {
- sppInSample1 <- names(comm[i, comm[i, ] > 0, drop=FALSE])
- sppInSample2 <- names(comm[j, comm[j, ] > 0, drop=FALSE])
+ sppInSample1 <- colnames(comm[i, comm[i, ] > 0, drop=FALSE])
+ sppInSample2 <- colnames(comm[j, comm[j, ] > 0, drop=FALSE])
if ((length(sppInSample1) >= 1) && (length(sppInSample2) >= 1)) {
sample.dis <- dis[sppInSample1, sppInSample2, drop=FALSE]
if (exclude.conspecifics) {
@@ -53,7 +53,7 @@
sample1NT <- apply(sample.dis,1,min,na.rm=TRUE)
sample1NT[sample1NT == Inf] <- NA
sample2NT <- apply(sample.dis,2,min,na.rm=TRUE)
- sample2NT[sample2NT == Inf] <- NA
+ sample2NT[sample2NT == Inf] <- NA
if (abundance.weighted) {
sample1.weights <- as.numeric(comm[i,sppInSample1])
sample2.weights <- as.numeric(comm[j,sppInSample2])
@@ -84,5 +84,50 @@
}
}
rownames(comdisnt) <- colnames(comdisnt) <- rownames(comm)
- as.dist(t(comdisnt))
+ return(as.dist(t(comdisnt)))
}
+
+#unifrac
+unifrac <- function (comm, tree)
+{
+ if (is.null(tree$edge.length)) {
+ stop("Tree has no branch lengths, cannot compute UniFrac")
+ }
+
+ if (!is.rooted(tree)) {
+ stop("Rooted phylogeny required for UniFrac calculation")
+ }
+
+ comm <- as.matrix(comm)
+ s <- nrow(comm)
+ phylodist <- matrix(NA, s, s)
+ rownames(phylodist) <- rownames(comm)
+ colnames(phylodist) <- rownames(comm)
+
+ comm_comb<-matrix(NA,s*(s-1)/2,ncol(comm))
+ colnames(comm_comb)<-colnames(comm)
+
+ i<-1
+ for (l in 1:(s - 1))
+ {
+ for (k in (l + 1):s)
+ {comm_comb[i,]<-comm[l, ] + comm[k, ]
+ i<-i+1}
+ }
+
+ pdcomm<-pd(comm, tree)
+ pdcomm_comb<-pd(comm_comb, tree)
+
+ i<-1
+ for (l in 1:(s - 1)) {
+ pdl <- pdcomm[l,"PD"]
+ for (k in (l + 1):s) {
+ pdk <- pdcomm[k,"PD"]
+ pdcomb <- pdcomm_comb[i,"PD"]
+ pdsharedlk <- pdl + pdk - pdcomb
+ phylodist[k, l] = (pdcomb - pdsharedlk) / pdcomb
+ i<-i+1
+ }
+ }
+ return(as.dist(phylodist))
+}
Modified: pkg/R/tax.distinctiveness.R
===================================================================
--- pkg/R/tax.distinctiveness.R 2010-02-11 22:56:05 UTC (rev 215)
+++ pkg/R/tax.distinctiveness.R 2010-02-12 00:00:20 UTC (rev 216)
@@ -1,11 +1,14 @@
#Taxic diversity: Vane-Wright et al., 1991 and May 1990 which accounts for polytomies by counting the number of branches descending from each node that lies on the path from a spp tip to the root (not just counting the number of nodes.
-tax.distinctiveness<- function(tree, type=c("Vane-Wright", "May")){
+tax.distinctiveness<- function(tree, type=c("Vane-Wright", "May")) {
-if(is.rooted(tree)==FALSE)
-warning("A rooted phylogeny is required for meaningful output of this function", call.=FALSE)
+ type <- match.arg(type)
-n.nodes<- switch(type, "Vane-Wright"=.node.number(tree),
+ if(is.rooted(tree)==FALSE) {
+ warning("A rooted phylogeny is required for meaningful output of this function", call.=FALSE)
+ }
+
+ n.nodes<- switch(type, "Vane-Wright"=.node.number(tree),
"May"={edge<- tree$edge
for(i in 1:length(tree$tip.label)){
Modified: pkg/man/ses.mpd.Rd
===================================================================
--- pkg/man/ses.mpd.Rd 2010-02-11 22:56:05 UTC (rev 215)
+++ pkg/man/ses.mpd.Rd 2010-02-12 00:00:20 UTC (rev 216)
@@ -1,7 +1,7 @@
\name{ses.mpd}
\alias{ses.mpd}
-\title{ Standardized effect size of mpd }
+\title{ Standardized effect size of MPD }
\description{
Standardized effect size of mean pairwise distances in communities. When used with a phylogenetic distance matrix, equivalent to -1 times the Nearest Relative Index (NRI).
}
Added: pkg/man/unifrac.Rd
===================================================================
--- pkg/man/unifrac.Rd (rev 0)
+++ pkg/man/unifrac.Rd 2010-02-12 00:00:20 UTC (rev 216)
@@ -0,0 +1,30 @@
+\name{unifrac}
+\alias{unifrac}
+
+\title{ Unweighted UniFrac distance between communities }
+\description{ Calculates unweighted UniFrac, a phylogenetic beta diversity metric of the the unique (non-shared) fraction of total phylogenetic diversity (branch-length) between two communities. }
+\usage{
+unifrac(comm, tree)
+}
+
+\arguments{
+ \item{comm}{ Community data matrix }
+ \item{tree}{ Object of class phylo - a rooted phylogeny}
+}
+\value{A dist object of the unweighted UniFrac distances between communities (the unique (non-shared) fraction of total phylogenetic diversity (branch-length) between two communities).}
+\references{
+Lozupone, C., Hamady, M., and Knight, R. 2006. UniFrac - an online tool for comparing microbial community diversity in a phylogenetic context. BMC Bioinformatics 7:371.
+}
+\author{ Steven Kembel <skembel at uoregon.edu> }
+\seealso{\code{\link{pd}}}
+\note{
+The supplied tree must be rooted. Single-species samples will be assigned a PD value equal to the distance from the root to the present.
+}
+\section{Warning }{
+The UniFrac distance between samples will include the branch length connecting taxa in those samples and the root of the supplied tree. The root of the supplied tree may not be spanned by any taxa in the sample. If you want the root of your tree to correspond to the most recent ancestor of the taxa actually present in your samples, you should prune the tree before running \code{unifrac}:
+\code{prunedTree <- prune.sample(sample,tree)}
+}
+\examples{
+data(phylocom)
+unifrac(phylocom$sample, phylocom$phylo)}
+\keyword{univar}
More information about the Picante-commits
mailing list