[Mattice-commits] r27 - in pkg: . R misc
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 14 09:04:10 CET 2008
Author: andrew_hipp
Date: 2008-11-14 09:04:10 +0100 (Fri, 14 Nov 2008)
New Revision: 27
Added:
pkg/misc/
pkg/misc/originalApeToOuch_NU.R
pkg/misc/originalBatchHansenFit.R
Removed:
pkg/R/originalApeToOuch_NU.R
pkg/R/originalBatchHansenFit.R
Log:
moved originalApeToOuch.R and originalBatchHansen.R to pkg/misc
Deleted: pkg/R/originalApeToOuch_NU.R
===================================================================
--- pkg/R/originalApeToOuch_NU.R 2008-11-14 08:00:03 UTC (rev 26)
+++ pkg/R/originalApeToOuch_NU.R 2008-11-14 08:04:10 UTC (rev 27)
@@ -1,74 +0,0 @@
-apeToOuch <-
-## Functions for converting from APE tree format to OUCH tree format
-## Andrew Hipp, February 2005
-## Generalized to non-ultrametric trees Sept 2006
-# Converts a "phylo" object from APE to a 3-column matrix ordered for OUCH format
-
-function(tree, scalingFactor = 1) {
- numberOfNodes <- length(tree$edge.length)
- treeMatrix <- c(0*1:(5*(numberOfNodes + 1)))
- dim(treeMatrix) <- c(numberOfNodes + 1, 5)
- species = 1
- ancestor = 2
- times = 3
- apeAncestor = 4
- apeChild = 5
- # totalLengthUnscaled <- distFromRoot(tree, "1")
- # 'totalLengthUnscaled' was used in the original function to determine endtime of each terminal branch; assumes ultrametricity
- nodeCounter <- 1
- treeMatrix[1, 1:3] <- c(NA, 0, 0)
- # This version sets the ancestor of the root as 0, following the older version of ouch; change this by uncommenting the following line:
- # treeMatrix[1, 1:3] <- c(NA, NA, 0)
- for (lineNumber in 1:numberOfNodes) {
- parentNode = tree$edge[lineNumber, 1]
- childNode = tree$edge[lineNumber, 2]
- if (as.integer(childNode) < 0) {
- nodeCounter <- nodeCounter + 1
- treeMatrix[nodeCounter, species] <- NA
- treeMatrix[nodeCounter, times] <- distFromRoot(tree, childNode) * scalingFactor
- treeMatrix[nodeCounter, apeChild] <- childNode
- treeMatrix[nodeCounter, apeAncestor] <- parentNode }}
- numberOfInternalNodes <- nodeCounter
-
- for (lineNumber in 1:numberOfNodes) {
- parentNode = tree$edge[lineNumber, 1]
- childNode = tree$edge[lineNumber, 2]
- if (as.integer(childNode) > 0) {
- nodeCounter <- nodeCounter + 1
- treeMatrix[nodeCounter, species] <- tree$tip.label[as.integer(childNode)]
- treeMatrix[nodeCounter, times] <- distFromRoot(tree, childNode) * scalingFactor
- # This is revised from original function, which just dropped in the maximum time
- treeMatrix[nodeCounter, apeChild] <- childNode
- treeMatrix[nodeCounter, apeAncestor] <- parentNode }}
-
- for (lineNumber in (1:numberOfNodes + 1)) {
- if (treeMatrix[lineNumber, apeAncestor] == "-1") treeMatrix[lineNumber, ancestor] <- 1
- else {treeMatrix[lineNumber, ancestor] <- match(treeMatrix[lineNumber, apeAncestor], treeMatrix[1:numberOfInternalNodes, apeChild])}
- }
-
- matrixLength <- length(treeMatrix) / 5
- treeDataFrame = data.frame(node = I(as.integer(1:matrixLength)),
- species = I(treeMatrix[1:matrixLength,1]),
- ancestor = I(as.integer(treeMatrix[1:matrixLength,2])),
- times = I(as.double(treeMatrix[1:matrixLength,3])))
- return(treeDataFrame) }
-
-distFromRoot <-
-# Finds the distance of a particular node from the root of the tree.
-# Takes an ape tree (class "phylo") and child node label (class "character") as its arguments.
-# GENERALLY ONLY CALLED BY apeToOuch
-function(tree, childNode) {
- if (childNode == "-1") return(0)
- numberOfNodes <- length(tree$edge.length)
- childLine <- match(childNode, tree$edge[1:numberOfNodes, 2])
- ancestor <- tree$edge[childLine, 1]
- distance <- tree$edge.length[childLine] + distFromRoot(tree, ancestor)
- return(distance) }
-
-mergeData <-
-# Merges an OUCH-style tree as output from apeToOuch
-function(ouchTree, dataMatrix) {
- newTree = merge(ouchTree, data.frame(dataMatrix), all.x = T)
- newTree = newTree[order(newTree$node), ]
- row.names(newTree) = 1:length(newTree$times)
- return(newTree) }
\ No newline at end of file
Deleted: pkg/R/originalBatchHansenFit.R
===================================================================
--- pkg/R/originalBatchHansenFit.R 2008-11-14 08:00:03 UTC (rev 26)
+++ pkg/R/originalBatchHansenFit.R 2008-11-14 08:04:10 UTC (rev 27)
@@ -1,89 +0,0 @@
-# ---------------------------------------------------------------------
-# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
-# ---------------------------------------------------------------------
-# Copied from functions used in Hipp 2007 Evolution paper
-# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
-# Utilizes ouch v 1.2-4
-# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
-# for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge,
-# they have not been checked for compatibility with subsequent versions of ouch.
-
-
-## Project details
-## maticce: Mapping Transitions In Continuous Character Evolution
-## full project name: Continuous Character Shifts on Phylogenies
-## unix name: mattice
-## Project page requested from R-forge on 7 november 2008
-
-## Changes needed:
-## 1. calls should be to hansen rather than hansen.fit
-## 2. measurement error portions need to be fixed
-## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
-## node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
-## and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
-## 4. Max number of simultaneous nodes should be set
-## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
-## based on those nodes. This shouldn't be too tricky using locator() or something like it.
-## 6. IT statistics should use informationCriterion or something else to clean up the code
-
-runBatchHansenFit <-
-# Runs batchHansenFit and brown.fit over a list of ouchTrees
-# Arguments:
-# "ouchTrees" = list of OUCH-style trees; if a single tree, send as 'list(TREE)'
-# "characterStates" = vector of character states, extracted from an ouch-style tree data.frame
-# "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
-# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
-# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
-function(ouchTrees, characterStates, cladeMembersList, SEM = rep(0, length(characterStates)), scalingFactor = 1, logData = FALSE) {
- hansenBatch = vector("list", length(ouchTrees))
- treeCounter = 0
- for (i in ouchTrees) {
- treeCounter = treeCounter + 1
- i = list(node = as.character(i$node), ancestor = as.character(i$ancestor), times = as.numeric(i$times), species = as.character(i$species))
- regimesList = regimeVectors(i$node, i$ancestor, i$times, i$species, cladeMembersList)
- write.table(regimesList, 'regimesList.txt')
- if (logData) hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species)))
- else hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species)))
- message(paste("Tree",treeCounter,"of",length(ouchTrees),"complete"))}
- return(list(hansens = hansenBatch, regimes = regimesList)) }
-
-batchHansenFit <-
-# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
-# Arguments:
-# "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
-# "regimesList" = list of regime-paintings as output from regimeVectors
-# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
-# Value: a matrix with nrow = regimes + 1 and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
-# ADDED "error" on 2 march 07 to accomodate the "me" in Pienaar's ouch
-function(node, ancestor, times, data, error, scalingFactor = 1, regimesList, regimeTitles, numberOfTermini = 53) {
- variables = c("loglik", "df", "alpha", "sigma", "theta0", "theta1", "theta2", "theta3", "theta4", "theta5","theta6","theta7","theta8","theta9", "LRvsBM", "AIC", "AICweight", "deltaAIC", "exp(-0.5*deltaAIC)", "AICc", "AICcWeight", "deltaAICc", "exp(-0.5*deltaAICc)")
- treeData = matrix(data = NA, nrow = (length(regimesList) + 1), ncol = length(variables),
- dimnames = list(regimeTitles,variables))
- brownianResults = brown.fit(data, error, node, ancestor, (times/max(times))*scalingFactor)
- treeData[length(regimesList) + 1, "AIC"] = brownianResults$aic
- treeData[length(regimesList) + 1, "loglik"] = brownianResults$loglik
- treeData[length(regimesList) + 1, "sigma"] = brownianResults$sigma
- treeData[length(regimesList) + 1, "theta0"] = brownianResults$theta
- treeData[length(regimesList) + 1, "df"] = brownianResults$df
- treeData[length(regimesList) + 1, "AICc"] = brownianResults$aic + ((2 * brownianResults$df * (brownianResults$df + 1)) / (numberOfTermini - brownianResults$df - 1))
- for (i in 1:length(regimesList)) {
- message(paste("Running regime",i))
- tempHansen = hansen.fit(data, error, node, ancestor, (times/max(times))*scalingFactor, regimesList[[i]])
- treeData[i,"loglik"] = tempHansen$loglik
- treeData[i,"df"] = tempHansen$df
- treeData[i,"alpha"] = tempHansen$alpha
- treeData[i,"sigma"] = tempHansen$sigma
- treeData[i,"LRvsBM"] = 2 * ((tempHansen$loglik / 2) - (treeData[length(regimesList) + 1, "loglik"] / 2))
- treeData[i,"AIC"] = tempHansen$aic
- treeData[i,"AICc"] = tempHansen$aic + ((2 * tempHansen$df * (tempHansen$df + 1)) / (numberOfTermini - tempHansen$df - 1))
- for (j in 0:(length(names(tempHansen$theta))-1)) {
- treeData[i, paste("theta",j,sep = "")] = tempHansen$theta[[j+1]]}}
- for (i in 1:(length(regimesList) + 1)) {
- treeData[i, "deltaAIC"] = treeData[i, "AIC"] - min(treeData[1:(length(regimesList) + 1), "AIC"])
- treeData[i, "exp(-0.5*deltaAIC)"] = exp(-0.5 * treeData[i,"deltaAIC"])
- treeData[i, "deltaAICc"] = treeData[i, "AICc"] - min(treeData[1:(length(regimesList) + 1), "AICc"])
- treeData[i, "exp(-0.5*deltaAICc)"] = exp(-0.5 * treeData[i,"deltaAICc"]) }
- for (i in 1:(length(regimesList) + 1)) {
- treeData[i, "AICweight"] = treeData[i, "exp(-0.5*deltaAIC)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAIC)"])
- treeData[i, "AICcWeight"] = treeData[i, "exp(-0.5*deltaAICc)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAICc)"]) }
- return(treeData) }
Added: pkg/misc/originalApeToOuch_NU.R
===================================================================
--- pkg/misc/originalApeToOuch_NU.R (rev 0)
+++ pkg/misc/originalApeToOuch_NU.R 2008-11-14 08:04:10 UTC (rev 27)
@@ -0,0 +1,74 @@
+apeToOuch <-
+## Functions for converting from APE tree format to OUCH tree format
+## Andrew Hipp, February 2005
+## Generalized to non-ultrametric trees Sept 2006
+# Converts a "phylo" object from APE to a 3-column matrix ordered for OUCH format
+
+function(tree, scalingFactor = 1) {
+ numberOfNodes <- length(tree$edge.length)
+ treeMatrix <- c(0*1:(5*(numberOfNodes + 1)))
+ dim(treeMatrix) <- c(numberOfNodes + 1, 5)
+ species = 1
+ ancestor = 2
+ times = 3
+ apeAncestor = 4
+ apeChild = 5
+ # totalLengthUnscaled <- distFromRoot(tree, "1")
+ # 'totalLengthUnscaled' was used in the original function to determine endtime of each terminal branch; assumes ultrametricity
+ nodeCounter <- 1
+ treeMatrix[1, 1:3] <- c(NA, 0, 0)
+ # This version sets the ancestor of the root as 0, following the older version of ouch; change this by uncommenting the following line:
+ # treeMatrix[1, 1:3] <- c(NA, NA, 0)
+ for (lineNumber in 1:numberOfNodes) {
+ parentNode = tree$edge[lineNumber, 1]
+ childNode = tree$edge[lineNumber, 2]
+ if (as.integer(childNode) < 0) {
+ nodeCounter <- nodeCounter + 1
+ treeMatrix[nodeCounter, species] <- NA
+ treeMatrix[nodeCounter, times] <- distFromRoot(tree, childNode) * scalingFactor
+ treeMatrix[nodeCounter, apeChild] <- childNode
+ treeMatrix[nodeCounter, apeAncestor] <- parentNode }}
+ numberOfInternalNodes <- nodeCounter
+
+ for (lineNumber in 1:numberOfNodes) {
+ parentNode = tree$edge[lineNumber, 1]
+ childNode = tree$edge[lineNumber, 2]
+ if (as.integer(childNode) > 0) {
+ nodeCounter <- nodeCounter + 1
+ treeMatrix[nodeCounter, species] <- tree$tip.label[as.integer(childNode)]
+ treeMatrix[nodeCounter, times] <- distFromRoot(tree, childNode) * scalingFactor
+ # This is revised from original function, which just dropped in the maximum time
+ treeMatrix[nodeCounter, apeChild] <- childNode
+ treeMatrix[nodeCounter, apeAncestor] <- parentNode }}
+
+ for (lineNumber in (1:numberOfNodes + 1)) {
+ if (treeMatrix[lineNumber, apeAncestor] == "-1") treeMatrix[lineNumber, ancestor] <- 1
+ else {treeMatrix[lineNumber, ancestor] <- match(treeMatrix[lineNumber, apeAncestor], treeMatrix[1:numberOfInternalNodes, apeChild])}
+ }
+
+ matrixLength <- length(treeMatrix) / 5
+ treeDataFrame = data.frame(node = I(as.integer(1:matrixLength)),
+ species = I(treeMatrix[1:matrixLength,1]),
+ ancestor = I(as.integer(treeMatrix[1:matrixLength,2])),
+ times = I(as.double(treeMatrix[1:matrixLength,3])))
+ return(treeDataFrame) }
+
+distFromRoot <-
+# Finds the distance of a particular node from the root of the tree.
+# Takes an ape tree (class "phylo") and child node label (class "character") as its arguments.
+# GENERALLY ONLY CALLED BY apeToOuch
+function(tree, childNode) {
+ if (childNode == "-1") return(0)
+ numberOfNodes <- length(tree$edge.length)
+ childLine <- match(childNode, tree$edge[1:numberOfNodes, 2])
+ ancestor <- tree$edge[childLine, 1]
+ distance <- tree$edge.length[childLine] + distFromRoot(tree, ancestor)
+ return(distance) }
+
+mergeData <-
+# Merges an OUCH-style tree as output from apeToOuch
+function(ouchTree, dataMatrix) {
+ newTree = merge(ouchTree, data.frame(dataMatrix), all.x = T)
+ newTree = newTree[order(newTree$node), ]
+ row.names(newTree) = 1:length(newTree$times)
+ return(newTree) }
\ No newline at end of file
Added: pkg/misc/originalBatchHansenFit.R
===================================================================
--- pkg/misc/originalBatchHansenFit.R (rev 0)
+++ pkg/misc/originalBatchHansenFit.R 2008-11-14 08:04:10 UTC (rev 27)
@@ -0,0 +1,89 @@
+# ---------------------------------------------------------------------
+# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
+# ---------------------------------------------------------------------
+# Copied from functions used in Hipp 2007 Evolution paper
+# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
+# Utilizes ouch v 1.2-4
+# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
+# for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge,
+# they have not been checked for compatibility with subsequent versions of ouch.
+
+
+## Project details
+## maticce: Mapping Transitions In Continuous Character Evolution
+## full project name: Continuous Character Shifts on Phylogenies
+## unix name: mattice
+## Project page requested from R-forge on 7 november 2008
+
+## Changes needed:
+## 1. calls should be to hansen rather than hansen.fit
+## 2. measurement error portions need to be fixed
+## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
+## node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
+## and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
+## 4. Max number of simultaneous nodes should be set
+## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
+## based on those nodes. This shouldn't be too tricky using locator() or something like it.
+## 6. IT statistics should use informationCriterion or something else to clean up the code
+
+runBatchHansenFit <-
+# Runs batchHansenFit and brown.fit over a list of ouchTrees
+# Arguments:
+# "ouchTrees" = list of OUCH-style trees; if a single tree, send as 'list(TREE)'
+# "characterStates" = vector of character states, extracted from an ouch-style tree data.frame
+# "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
+# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
+# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+function(ouchTrees, characterStates, cladeMembersList, SEM = rep(0, length(characterStates)), scalingFactor = 1, logData = FALSE) {
+ hansenBatch = vector("list", length(ouchTrees))
+ treeCounter = 0
+ for (i in ouchTrees) {
+ treeCounter = treeCounter + 1
+ i = list(node = as.character(i$node), ancestor = as.character(i$ancestor), times = as.numeric(i$times), species = as.character(i$species))
+ regimesList = regimeVectors(i$node, i$ancestor, i$times, i$species, cladeMembersList)
+ write.table(regimesList, 'regimesList.txt')
+ if (logData) hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species)))
+ else hansenBatch[[treeCounter]] = batchHansenFit(i$node, i$ancestor, i$times, characterStates, error = SEM, scalingFactor, regimesList, c(as.character(1:length(regimesList)), "brown"), numberOfTermini = sum(!is.na(i$species)))
+ message(paste("Tree",treeCounter,"of",length(ouchTrees),"complete"))}
+ return(list(hansens = hansenBatch, regimes = regimesList)) }
+
+batchHansenFit <-
+# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
+# Arguments:
+# "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
+# "regimesList" = list of regime-paintings as output from regimeVectors
+# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
+# Value: a matrix with nrow = regimes + 1 and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
+# ADDED "error" on 2 march 07 to accomodate the "me" in Pienaar's ouch
+function(node, ancestor, times, data, error, scalingFactor = 1, regimesList, regimeTitles, numberOfTermini = 53) {
+ variables = c("loglik", "df", "alpha", "sigma", "theta0", "theta1", "theta2", "theta3", "theta4", "theta5","theta6","theta7","theta8","theta9", "LRvsBM", "AIC", "AICweight", "deltaAIC", "exp(-0.5*deltaAIC)", "AICc", "AICcWeight", "deltaAICc", "exp(-0.5*deltaAICc)")
+ treeData = matrix(data = NA, nrow = (length(regimesList) + 1), ncol = length(variables),
+ dimnames = list(regimeTitles,variables))
+ brownianResults = brown.fit(data, error, node, ancestor, (times/max(times))*scalingFactor)
+ treeData[length(regimesList) + 1, "AIC"] = brownianResults$aic
+ treeData[length(regimesList) + 1, "loglik"] = brownianResults$loglik
+ treeData[length(regimesList) + 1, "sigma"] = brownianResults$sigma
+ treeData[length(regimesList) + 1, "theta0"] = brownianResults$theta
+ treeData[length(regimesList) + 1, "df"] = brownianResults$df
+ treeData[length(regimesList) + 1, "AICc"] = brownianResults$aic + ((2 * brownianResults$df * (brownianResults$df + 1)) / (numberOfTermini - brownianResults$df - 1))
+ for (i in 1:length(regimesList)) {
+ message(paste("Running regime",i))
+ tempHansen = hansen.fit(data, error, node, ancestor, (times/max(times))*scalingFactor, regimesList[[i]])
+ treeData[i,"loglik"] = tempHansen$loglik
+ treeData[i,"df"] = tempHansen$df
+ treeData[i,"alpha"] = tempHansen$alpha
+ treeData[i,"sigma"] = tempHansen$sigma
+ treeData[i,"LRvsBM"] = 2 * ((tempHansen$loglik / 2) - (treeData[length(regimesList) + 1, "loglik"] / 2))
+ treeData[i,"AIC"] = tempHansen$aic
+ treeData[i,"AICc"] = tempHansen$aic + ((2 * tempHansen$df * (tempHansen$df + 1)) / (numberOfTermini - tempHansen$df - 1))
+ for (j in 0:(length(names(tempHansen$theta))-1)) {
+ treeData[i, paste("theta",j,sep = "")] = tempHansen$theta[[j+1]]}}
+ for (i in 1:(length(regimesList) + 1)) {
+ treeData[i, "deltaAIC"] = treeData[i, "AIC"] - min(treeData[1:(length(regimesList) + 1), "AIC"])
+ treeData[i, "exp(-0.5*deltaAIC)"] = exp(-0.5 * treeData[i,"deltaAIC"])
+ treeData[i, "deltaAICc"] = treeData[i, "AICc"] - min(treeData[1:(length(regimesList) + 1), "AICc"])
+ treeData[i, "exp(-0.5*deltaAICc)"] = exp(-0.5 * treeData[i,"deltaAICc"]) }
+ for (i in 1:(length(regimesList) + 1)) {
+ treeData[i, "AICweight"] = treeData[i, "exp(-0.5*deltaAIC)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAIC)"])
+ treeData[i, "AICcWeight"] = treeData[i, "exp(-0.5*deltaAICc)"] / sum(treeData[1:(length(regimesList) + 1), "exp(-0.5*deltaAICc)"]) }
+ return(treeData) }
More information about the Mattice-commits
mailing list