[Caic-commits] r82 - in pkg: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Oct 22 12:33:21 CEST 2008


Author: davidorme
Date: 2008-10-22 12:33:21 +0200 (Wed, 22 Oct 2008)
New Revision: 82

Added:
   pkg/R/pd.bootstrap.R
   pkg/R/pd.calc.R
   pkg/man/pd.bootstrap.Rd
   pkg/man/pd.calc.Rd
Log:
Added pd calculation code

Added: pkg/R/pd.bootstrap.R
===================================================================
--- pkg/R/pd.bootstrap.R	                        (rev 0)
+++ pkg/R/pd.bootstrap.R	2008-10-22 10:33:21 UTC (rev 82)
@@ -0,0 +1,53 @@
+"pd.bootstrap" <-
+function(cm, ntips, reps=1000, method="TBL", tip.weights=NULL){
+    
+	# check we have a valid method
+	method <- match.arg(method, c("TBL", "MST", "UEH", "SBL", "TIP"))
+
+	# check we have a clade matrix and, if not, get one
+	if(class(cm) != "clade.matrix"){
+		if(class(cm) == "phylo"){
+			warning("Converting phylo object to clade.matrix object")
+			cm <- clade.matrix(cm)
+		} else { 
+			stop("pd.calc requires a phylogeny")	
+		}
+	}
+	
+	# check for sensible sample
+	total.nb.tips <- dim(cm$clade.matrix)[2]
+	if(!(ntips %in% 1:(total.nb.tips - 1))){
+		stop("'sample' must be a positive integer lower than the number of tips")}
+	
+	#set up the store
+	pd.store <- numeric(reps)
+	tips <- 1:total.nb.tips
+	
+	# if there are weights make sure they go in the right place...
+	if( ! is.null(tip.weights)){
+	        
+        # if the vector is named then match the order to tips
+        if(! is.null(names(tip.weights))) {
+            wght.match <- match(cm$tip.label, names(tip.weights))
+            
+            if(any(is.na(wght.match))){ # this is not elegant but can't work out how to stop and return
+                warning("The returned tip labels have no matching named element in tip.weights")
+                return(cm$tip.label[is.na(wght.match)])
+                }
+            
+            tip.weights <- tip.weights[wght.match]
+            
+        } else {
+             stop("'weights' must be a vector of weights, named to match the tip labels")
+        }
+    }
+    
+	# get the pd values
+    for(rep in seq(along=pd.store)){
+        which.tips <- sample(tips, ntips, prob=tip.weights)
+        pd.store[rep] <- pd.calc(cm, tip.subset=which.tips, method=method)$pd
+	}
+	
+	return(list(pd.distrib=pd.store, method=method))
+}
+

Added: pkg/R/pd.calc.R
===================================================================
--- pkg/R/pd.calc.R	                        (rev 0)
+++ pkg/R/pd.calc.R	2008-10-22 10:33:21 UTC (rev 82)
@@ -0,0 +1,78 @@
+"pd.calc" <-
+function(cm,  tip.subset=NULL, method="TBL", root.edge=FALSE){
+
+	# check we have a valid method
+	method <- match.arg(method, c("TBL", "MST", "UEH","SBL", "TIP"))
+	
+	# check we have a clade matrix and, if not, get one
+	if(class(cm) != "clade.matrix"){
+		if(class(cm) == "phylo"){
+			warning("Converting phylo object to clade.matrix object")
+			cm <- clade.matrix(cm)
+		} else { 
+			stop("pd.calc requires a phylogeny")	
+		}
+	}
+
+    # if requested, drop the root edge
+    nSpp <- dim(cm$clade.matrix)[2]
+    if(! root.edge) cm$edge.length[nSpp + 1] <- 0
+    	
+	# subset the tips if requested
+	if(!is.null(tip.subset)){
+		# could be names - in which case they must match tip labels
+		# could be numbers - in which case they must be in range
+		switch(mode(tip.subset),
+			"character" = {
+				tip.subset <- match(tip.subset, cm$tip.label)
+				if(any(is.na(tip.subset))){
+					stop("Unmatched names in tip.subset")}},
+			"numeric" = {
+				if(any(tip.subset %in% 1:dim(cm$clade.matrix)[2] == FALSE)){
+					stop("numeric tip.subset contains outside the range 1 to number of tips")}},
+			stop("tip.subset must be either a vector of names or numbers"))
+	} else {
+		tip.subset <- 1:dim(cm$clade.matrix)[2]
+	}
+	
+	#choose method
+	switch(method,
+		"TBL" = {
+			edge.in.matrix <- cm$clade.matrix[,tip.subset]
+			if(is.array(edge.in.matrix)){
+				edge.in.matrix <- rowSums(edge.in.matrix)}
+			edge.in.matrix <- edge.in.matrix > 0 
+		},
+		"MST" = {
+			edge.in.matrix <- cm$clade.matrix[, tip.subset]
+			if(is.array(edge.in.matrix)){
+				edge.in.matrix <- rowSums(edge.in.matrix)}
+			edge.in.matrix <- edge.in.matrix > 0 & edge.in.matrix < length(tip.subset)
+		},
+		"TIP" = {
+			edge.in.matrix <- cm$clade.matrix[,tip.subset]
+			if(is.array(edge.in.matrix)){
+				edge.in.matrix <- rowSums(edge.in.matrix)}
+			edge.in.matrix <- edge.in.matrix > 0 
+			edge.in.matrix[(dim(cm$clade.matrix)[2]+1):length(edge.in.matrix)] <- FALSE
+		},
+		"UEH" = {
+			edge.in.matrix <- cm$clade.matrix[,tip.subset]
+			if(is.array(edge.in.matrix)){
+				edge.in.matrix <- rowSums(edge.in.matrix)}
+			edge.in.matrix <- edge.in.matrix == 1 
+		},
+        "SBL" = {
+			edge.in.matrix <- cm$clade.matrix[,tip.subset]
+			if(is.array(edge.in.matrix)){
+				edge.in.matrix <- rowSums(edge.in.matrix)}
+			edge.in.matrix <- edge.in.matrix > 1 
+		})
+	
+	pd <- sum(cm$edge.len[edge.in.matrix])
+	RET <- list(pd=pd, method=method)
+		
+	return(RET)
+	
+}
+

Added: pkg/man/pd.bootstrap.Rd
===================================================================
--- pkg/man/pd.bootstrap.Rd	                        (rev 0)
+++ pkg/man/pd.bootstrap.Rd	2008-10-22 10:33:21 UTC (rev 82)
@@ -0,0 +1,36 @@
+\name{pd.bootstrap}
+\alias{pd.bootstrap}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Produces a bootstrapped phylogenetic diversity distribution.}
+\description{
+Takes a phylogeny and repeatedly calculates the phylogenetic diversity (PD) of a randomly selected set of tips. The size of the sample of tips and the number of replicates are specified by the user, as is the method used to calculate PD. The probabilities of selecting each tip can be controlled by providing weights.}
+\usage{
+pd.bootstrap(cm, ntips, reps = 1000, method = "TBL", tip.weights = NULL)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{cm}{A 'clade.matrix' object (or a 'phylo' object that will be converted).}
+  \item{ntips}{A single integer giving the number of tips to be selected.}
+  \item{reps}{The number of replicate values to calculate.}
+  \item{method}{The method used to calculate PD (see \code{\link{pd.calc}}).}
+  \item{tip.weights}{A numeric vector containing weights for all the tips in the phylogeny. Each element must be named in order to match weights to the tips.}
+}
+\details{
+If used, weights are passed to \code{\link{sample}} to select a subset of tips. Note that, since this is sampling without replacement, the weights "are applied sequentially,  that is the probability of choosing the next item is proportional to the probabilities amongst the remaining items". The named 'tip.weights' must contain matches to each tip, otherwise the function will exit and return a character vector of the missing tips. A zero weight will prevent a tip from being selected and the number of non-zero weights must be equal to or greater than the number of tips to be sampled.}
+\value{
+A list of:
+  \item{pd.distrib }{A numeric vector of length 'reps' giving the PD distribution for a sample of size 'ntips' from the phylogeny.}
+  \item{method }{The method passed to \code{\link{pd.calc}}.}
+  ...
+}
+\author{David Orme}
+
+\seealso{\code{\link{pd.calc}}}
+\examples{
+tre <- read.tree(text="((((A:1,B:1):1.5,C:2.5):0.5,(D:0.6,E:0.6):2.4):0.5,((F:1.9,G:1.9):0.8,(H:1.6,I:1.6):1.1):0.8):0.2;")
+clmat <- clade.matrix(tre)
+pd.boot <- pd.bootstrap(clmat, 5)
+hist(pd.boot$pd.distrib)
+
+}
+\keyword{ utilities }% at least one, from doc/KEYWORDS

Added: pkg/man/pd.calc.Rd
===================================================================
--- pkg/man/pd.calc.Rd	                        (rev 0)
+++ pkg/man/pd.calc.Rd	2008-10-22 10:33:21 UTC (rev 82)
@@ -0,0 +1,79 @@
+\name{pd.calc}
+\alias{pd.calc}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{Calculates phylogenetic diversity measurements.}
+\description{
+Takes a phylogeny  and returns the phylogenetic diversity (PD) associated with a set of tips. Four different measures of PD are implemented as described in the details below.}
+\usage{
+pd.calc(cm, tip.subset = NULL, method = "TBL", root.edge=FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{cm}{A object of class 'clade matrix'. Alternatively an object of class 'phylo', which will be converted to a clade.matrix.}
+  \item{tip.subset}{An optional vector identifying the subset of tips to use for PD calculations. If no tip.subset is provided the method is applied to the complete phylogeny [Hmm.. this might be undesirable]. Can either be a character vector, in which case the elements are matched against tip labels, or a vector of positive integers in the range 1 to the number of tips, in which case the tips with those numbers are used.}
+  \item{method}{ One of 'TBL', 'MST', 'UEH', 'SBL', defaulting to 'TBL'. See details.}
+  \item{root.edge}{Logical indicating whether to include the root edge length in calculations, defaulting to FALSE.}
+}
+\details{
+There are five implemented methods. The default is Total Branch Length (TBL), defined as the sum of all the edge lengths in the subtree given by the tip subset. This measure can be partitioned in two ways: Shared Branch Length (SBL) calculates the sum of all edges in the subtree that are shared by more than one tip; Unique Evolutionary History (UEH) calculates the sum of the edge lengths that give rise to only one tip in the subtree. Note that TBL is the sum of SBL and UEH for a particular subtree and that UEH effectively sums the edges that form tips in the _subtree_. The TIP option calculates the sum of the edges leading to the subset tips in the _complete tree_.  Minimum Spanning Tree (MST) calculate the length of the edges for the smallest tree that links the subset tips, excluding any edges below that node. These options are illustrated in the examples below: the red edges are those summed by each method for the tips labeled in bold. 
+}
+\value{
+  A list giving:
+  \item{pd}{The phylogenetic diversity of the tips.}
+  \item{method }{The method used to calculate that PD.}
+}
+\references{There are some references! Insert them...}
+\author{David Orme, Gavin Thomas}
+
+\seealso{ \code{\link{pd.bootstrap}}}
+\examples{
+tre <- read.tree(text="((((A:1,B:1):1.5,C:2.5):0.5,(D:0.6,E:0.6):2.4):0.5,((F:1.9,G:1.9):0.8,(H:1.6,I:1.6):1.1):0.8):0.2;")
+clmat <- clade.matrix(tre)
+tips <- c("A","C","D","E","G","H")
+pd.calc(clmat, tip.subset=tips)
+pd.calc(clmat, tip.subset=c(1,3,4,5,7,8))
+pd.calc(clmat, tip.subset=tips, root.edge=TRUE)
+
+# Methods Illustrations. This is a little slow because 
+# plot.phylo needs to be called repeatedly to 
+# draw the correct horizontal lines only.
+
+
+# demo function to colour correct branches
+pd.plot <- function(tree, edges, tips){
+    tip.font <- rep(1, length=length(tree$tip.label))
+    tip.font[tips] <- 2
+    plot(tree, label.offset=0.1, cex=1.2, font=tip.font, x.lim=3.8)
+    for(edge in edges){
+        col <- rep(NA, 16)
+        col[edge] <- "red"
+        par(new=TRUE)
+        plot(tree, edge.col=col, label.offset=0.1,
+             cex=1.2, font=tip.font, x.lim=3.8)
+    }
+}
+
+par(mfrow=c(2,3), mar=rep(1,4))
+
+# TBL
+pd.plot(tre, c(1,2,3,4,6,7,8,9,10,11,13,14,15), c(1,3,4,5,7,8))
+text(0,9, "TBL", cex=1.5, adj=c(0,0))
+
+# UEH
+pd.plot(tre, c(3,4,6,8,9,11,14,13,15), c(1,3,4,5,7,8))
+text(0,9, "UEH", cex=1.5, adj=c(0,0))
+
+# SBL
+pd.plot(tre, c(1,2,7,10), c(1,3,4,5,7,8))
+text(0,9, "SBL", cex=1.5, adj=c(0,0))
+
+# TIP
+pd.plot(tre, c(4,6,8,9,13,15), c(1,3,4,5,7,8))
+text(0,9, "TIP", cex=1.5, adj=c(0,0))
+
+# MST
+pd.plot(tre, c(3,4,6), c(1,3))
+text(0,9, "MST", cex=1.5, adj=c(0,0))
+
+}
+\keyword{utilities}% at least one, from doc/KEYWORDS



More information about the Caic-commits mailing list