[Picante-commits] r34 - pkg/R

Sun Mar 2 23:52:00 CET 2008

Author: skembel
Date: 2008-03-02 23:52:00 +0100 (Sun, 02 Mar 2008)
New Revision: 34

Added:
   pkg/R/df2vec.R
Removed:
   pkg/R/Kcalc.R
Modified:
   pkg/R/phylosignal.R
Log:
Updated phylosignal calculations and Kcalc - works properly with non-ultrametric trees now

Deleted: pkg/R/Kcalc.R
===================================================================

--- pkg/R/Kcalc.R	2008-02-26 20:11:59 UTC (rev 33)
+++ pkg/R/Kcalc.R	2008-03-02 22:52:00 UTC (rev 34)
@@ -1,47 +0,0 @@
-`Kcalc` <-
-function(x,phy) {
-  
-## Kcalc calculates Blomberg et al's K statistic for continuous
-## traits evolving along a phylogeny. 
-## See: Blomberg, S. P., T. Garland, and A. R. Ives. 2003. Testing for phylogenetic signal in comparative data: Behavioral traits are more labile. Evolution 57:717-745.
-## For more information, please contact Simon Blomberg: 
-## S.Blomberg1_at_uq.edu.au
-##
-## Kcalc wrapper by: David Ackerly, dackerly at berkeley.edu
-## Further hacking by SB 7 August 2007 to do some error-checking and to handle
-## nonultrametric trees.
-## Further further hacking by SK Dec 2007 - minor bugfix
-    
-  if (length(x) != length(phy$tip.label)) stop(
-              "Data vector and tree contain different numbers of taxa.")
-  if (!setequal(names(x), phy$tip.label)) warning(
-            "Taxon names in data vector and names of tip labels do not match.")
-
-  if (!is.ultrametric(phy)) {
-    mat <- vcv.phylo(phy) 
-    vars <- diag(mat)
-    weights <- varFixed(~vars)
-  }
-  else weights <- NULL
-  
-  x <- x[phy$tip.label]  
-  matc <- vcv.phylo(phy, cor=TRUE) # correlation matrix
-  ntax <- length(phy$tip.label)
-  
-# calculate "phylogenetic" mean via gls
-  fit <- gls(x ~ 1,
-             correlation=corSymm(matc[lower.tri(matc)],
-             fixed=TRUE), weights=weights)
-  ahat <- coef(fit)
-
-#observed
-  MSE <- fit$sigma^2
-  MSE0 <- t(x-ahat) %*% (x - ahat)/(ntax-1)
-
-#expected
-  MSE0.MSE <- 1/(ntax-1) * (sum(diag(matc))-ntax/sum(solve(matc)))
-
-  K <- MSE0/MSE / MSE0.MSE
-  K
-}
-

Added: pkg/R/df2vec.R
===================================================================
--- pkg/R/df2vec.R	                        (rev 0)
+++ pkg/R/df2vec.R	2008-03-02 22:52:00 UTC (rev 34)
@@ -0,0 +1,5 @@
+df2vec <- function(x, colID=1) {
+	vec <- x[,colID]
+	names(vec) <- row.names(x)
+	vec
+}

Modified: pkg/R/phylosignal.R
===================================================================
--- pkg/R/phylosignal.R	2008-02-26 20:11:59 UTC (rev 33)
+++ pkg/R/phylosignal.R	2008-03-02 22:52:00 UTC (rev 34)
@@ -1,29 +1,64 @@
-`phylosignal` <-
-function(x,phy,reps=999) {
+Kcalc <- function(x,phy) {
+	mat <- vcv.phylo(phy, cor=TRUE) # correlation matrix
+	ntax = length(phy$tip.label)
+	ntax1 = ntax-1
 
-  if (length(x) != length(phy$tip.label)) stop(
-              "Data vector and tree contain different numbers of taxa.")
-  if (!setequal(names(x), phy$tip.label)) warning(
-            "Taxon names in data vector and names of tip labels do not match.")
+	dat = data.frame(x)
+	names(dat) = 'x'
+	# calculate "phylogenetic" mean via gls
+	fit <- gls(x ~ 1, data = dat, 	
+		correlation=corSymm(mat[lower.tri(mat)], fixed=TRUE))
+	ahat <- coef(fit)
+	
+	#observed
+	MSE <- fit$sigma^2
+	MSE0 <- t(dat$x - ahat) %*% (dat$x - ahat)/ ntax1
 
-	x <- x[phy$tip.label]
-	obs.var.pic.scaled = var.pic(x,phy)
-	K = Kcalc(x,phy)
+	#expected
+	MSE0.MSE <- 1/ ntax1 * 	
+		(sum(diag(mat))- ntax/sum(solve(mat)))
+
+	K <- MSE0/MSE / MSE0.MSE
+	return(K)
+}
+
+df2vec <- function(x, colID=1) {
+	vec <- x[,colID]
+	names(vec) <- row.names(x)
+	vec
+}
+
+pic.variance <- function(x,phy,scaled=TRUE) {
+	pics <- pic(x,phy,scaled)
+	N <- length(pics)
+	sum(pics^2) / (N -1)
+}
+
+phylosignal <- function(x,phy,reps=999,...) {
+
+    K <- Kcalc(x,phy)
+
+    if (!is.vector(x)) {
+        x.orig <- x
+        x <- as.vector(x)
+        names(x) <- row.names(x.orig)
+    }
 	
-	rnd.var.pic.scaled <- vector()
-		
+	obs.var.pic = pic.variance(x,phy,...)
+	    
+	rnd.var.pic <- numeric(reps)
+	
 	#significance based on tip shuffle
 	for (i in 1:reps) {
 		tipsh.x <- sample(x)
-		names(tipsh.x) <- names(x)
-		rnd.var.pic.scaled <- c(rnd.var.pic.scaled,var.pic(tipsh.x,phy))
+	    names(tipsh.x) <- names(x)
+		rnd.var.pic[i] <- pic.variance(tipsh.x,phy,...)
 	}
 
-	var.pics.scaled = c(obs.var.pic.scaled,rnd.var.pic.scaled)
-	var.pics.scaled.p = rank(var.pics.scaled)[1] / (reps + 1)
-	var.pics.scaled.z = (obs.var.pic.scaled - mean(var.pics.scaled))/sd(var.pics.scaled)
+	var.pics = c(obs.var.pic,rnd.var.pic)
+	var.pics.p = rank(var.pics)[1] / (reps + 1)
+	var.pics.z = (obs.var.pic - mean(var.pics))/sd(var.pics)
 
-	data.frame(K,PIC.variance.obs=obs.var.pic.scaled,PIC.variance.random=mean(var.pics.scaled),
-		PIC.variance.P=var.pics.scaled.p, PIC.variance.Z=var.pics.scaled.z)
+	data.frame(K,PIC.variance=obs.var.pic,PIC.variance.P=var.pics.p, PIC.variance.Z=var.pics.z)
+
 }
-