[Mattice-commits] r28 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 14 09:05:42 CET 2008
Author: andrew_hipp
Date: 2008-11-14 09:05:41 +0100 (Fri, 14 Nov 2008)
New Revision: 28
Added:
pkg/R/batchHansen.R
pkg/R/treeTraversal.R
Removed:
pkg/R/revisedBatchHansen.R
pkg/R/revisedTreeTraversal.R
Log:
renamed revisedBatchHansen.R and revisedTreeTraversal.R ... no need to set them off any longer
Copied: pkg/R/batchHansen.R (from rev 26, pkg/R/revisedBatchHansen.R)
===================================================================
--- pkg/R/batchHansen.R (rev 0)
+++ pkg/R/batchHansen.R 2008-11-14 08:05:41 UTC (rev 28)
@@ -0,0 +1,131 @@
+# ---------------------------------------------------------------------
+# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
+# ---------------------------------------------------------------------
+# Copied from functions used in Hipp 2007 Evolution paper
+# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
+# Utilizes ouch v 1.2-4
+# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
+# for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge,
+# they have not been checked for compatibility with subsequent versions of ouch.
+
+
+## Project details
+## maticce: Mapping Transitions In Continuous Character Evolution
+## full project name: Continuous Character Shifts on Phylogenies
+## unix name: mattice
+## Project page requested from R-forge on 7 november 2008
+
+## Changes needed:
+## 1. calls should be to hansen rather than hansen.fit
+## 2. measurement error portions need to be fixed
+## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
+## node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
+## and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
+## 4. DONE -- Max number of simultaneous nodes should be set
+## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
+## based on those nodes, using locator() or something like it.
+## 6. IT statistics should use informationCriterion or something else to clean up the code
+
+runBatchHansen <-
+# 11 nov 08: renamed to runBatchHansen
+# Runs batchHansenFit and brown.fit over a list of ouchTrees
+# Arguments:
+# "ouchTrees" = list of OUCH-style trees
+# "characterStates" = vector of character states, either extracted from an ouch-style tree data.frame or a named vector
+# REMOVED: "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
+# "rescale" = factor to multiply against (times / max(times)) -- choose based on trial analyses; set at <= 0 if you don't want to rescale trees
+# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+# "brown" = whether to analyse the data under a Brownian motion model
+# "..." = additional arguments to pass along to hansen
+function(ouchTrees, characterStates, cladeMembersList, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
+ ## do all the objects in ouchTrees inherit ouchtree?
+ if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
+ treeCheck <- unlist(lapply(ouchTrees, function(x) is(x,'ouchtree')))
+ if(F %in% treeCheck)
+ stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+ '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+
+ ## Check character states to make sure that they are either named and match names in the trees, or are the same length as the tips
+ for (i in 1:length(ouchTrees)) {
+ dataFlag <- NULL
+ stopFlag <- F
+ tree <- ouchTrees[[i]]
+ terminals <- tree at nodelabels[(tree at nnodes - tree at nterm + 1):tree at nnodes]
+ if(any(F %in% (terminals %in% names(characterStates)))) {
+ message(paste("Not every terminal branch in tree", i, "has a corresponding name in", sQuote("characterStates")))
+ if(length(characterStates) == tree at nterm) {
+ message("Data assumed to be in the same order as terminals")
+ dataFlag <- 'sameOrderTerminals'
+ }
+ if(length(characterStates) == tree at nnodes) {
+ message("Data assumed to be in the same order as nodes;\nany data not associated with a terminal branch will be ignored")
+ dataFlag <- 'sameOrderNodes'
+ }
+ if(identical(dataFlag, NULL)) stopFlag <- T
+ message("-------------------\n")
+ }
+ else dataFlag <- 'named'
+ if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
+ }
+
+ hansenBatch = vector("list", length(ouchTrees))
+ treeCounter = 0
+ for (i in 1:length(ouchTrees)) {
+ tree <- ouchTrees[[i]]
+ regimesList = regimeVectors(tree, cladeMembersList, maxNodes)
+ if(identical(regimeTitles, NULL)) {
+ regimeTitles <- as.character(1:length(regimesList))
+ if(brown) regimeTitles <- c(regimeTitles, 'brown')
+ }
+
+ ## rescale tree if requested
+ if(rescale>0) tree at times <- rescale * tree at times / max(tree at times)
+
+ ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
+ ## for now, assume nodes of interest are present in all trees
+
+ ## make sure data fits the tree
+ dataIn <- NULL
+ if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
+ if(dataFlag == 'sameOrderNodes') dataIn <- characterStates
+ if(dataFlag == 'named') dataIn <- characterStates[match(tree at nodelabels), characterStates]
+ if(identical(dataIn, NULL)) stop(paste("There is a problem with your data that I failed to catch at the outset of", sQuote('runBatchHansen()')))
+ else names(dataIn) <- tree at nodes
+
+ ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large,
+ ## so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
+ ## there could also be an option to save the entire object as a series of files in addition to hanging onto
+ hansenBatch[[i]] = batchHansen(tree, dataIn, regimesList, regimeTitles, brown, ...)
+ message(paste("Tree",i,"of",length(ouchTrees),"complete"))}
+
+ ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
+
+ return(list(hansens = hansenBatch, regimes = regimesList)) }
+
+batchHansen <-
+# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
+# Arguments:
+# "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
+# "regimesList" = list of regime-paintings as output from regimeVectors
+# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
+# Value: a matrix with nrow = regimes (+ 1 if brownian model is included) and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
+function(tree, data, regimesList, regimeTitles, brown, ...) {
+ n <- tree at nterm
+ ## set up a matrix that returns lnL, K, sigmasq, theta0, and alpha for every model; thetas will go along into a list that is indexed by model
+ hansenOptima <- list(length(regimeTitles))
+ variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # it's important that 'alpha' go last so that the matrix fills up right when the brownian motion model is used
+ brVars <- c("loglik", "dof", "sigma.squared", "theta")
+ haVars <- c("loglik", "dof", "sigma.squared", "alpha")
+ treeData <- matrix(data = NA, nrow = length(regimeTitles), ncol = length(variables), dimnames = list(regimeTitles,variables))
+ if(brown) {
+ br <- brown(data, tree)
+ treeData["brown", ] <- unlist(summary(br)[brVars])
+ }
+ for (i in seq(regimesList)) {
+ message(paste("Running regime",i))
+ ## at this point, the user has to give an initial alpha and sigma for hansen to search on... this should be relaxed
+ ha = hansen(data, tree, regimesList[[i]], ...)
+ treeData[i, ] <- unlist(summary(ha)[haVars])
+ hansenOptima[[i]] <- summary(ha)$optima[[1]]
+ }
+ return(treeData) }
\ No newline at end of file
Deleted: pkg/R/revisedBatchHansen.R
===================================================================
--- pkg/R/revisedBatchHansen.R 2008-11-14 08:04:10 UTC (rev 27)
+++ pkg/R/revisedBatchHansen.R 2008-11-14 08:05:41 UTC (rev 28)
@@ -1,131 +0,0 @@
-# ---------------------------------------------------------------------
-# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
-# ---------------------------------------------------------------------
-# Copied from functions used in Hipp 2007 Evolution paper
-# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
-# Utilizes ouch v 1.2-4
-# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
-# for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge,
-# they have not been checked for compatibility with subsequent versions of ouch.
-
-
-## Project details
-## maticce: Mapping Transitions In Continuous Character Evolution
-## full project name: Continuous Character Shifts on Phylogenies
-## unix name: mattice
-## Project page requested from R-forge on 7 november 2008
-
-## Changes needed:
-## 1. calls should be to hansen rather than hansen.fit
-## 2. measurement error portions need to be fixed
-## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
-## node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
-## and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
-## 4. DONE -- Max number of simultaneous nodes should be set
-## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
-## based on those nodes, using locator() or something like it.
-## 6. IT statistics should use informationCriterion or something else to clean up the code
-
-runBatchHansen <-
-# 11 nov 08: renamed to runBatchHansen
-# Runs batchHansenFit and brown.fit over a list of ouchTrees
-# Arguments:
-# "ouchTrees" = list of OUCH-style trees
-# "characterStates" = vector of character states, either extracted from an ouch-style tree data.frame or a named vector
-# REMOVED: "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
-# "rescale" = factor to multiply against (times / max(times)) -- choose based on trial analyses; set at <= 0 if you don't want to rescale trees
-# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
-# "brown" = whether to analyse the data under a Brownian motion model
-# "..." = additional arguments to pass along to hansen
-function(ouchTrees, characterStates, cladeMembersList, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
- ## do all the objects in ouchTrees inherit ouchtree?
- if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
- treeCheck <- unlist(lapply(ouchTrees, function(x) is(x,'ouchtree')))
- if(F %in% treeCheck)
- stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
- '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-
- ## Check character states to make sure that they are either named and match names in the trees, or are the same length as the tips
- for (i in 1:length(ouchTrees)) {
- dataFlag <- NULL
- stopFlag <- F
- tree <- ouchTrees[[i]]
- terminals <- tree at nodelabels[(tree at nnodes - tree at nterm + 1):tree at nnodes]
- if(any(F %in% (terminals %in% names(characterStates)))) {
- message(paste("Not every terminal branch in tree", i, "has a corresponding name in", sQuote("characterStates")))
- if(length(characterStates) == tree at nterm) {
- message("Data assumed to be in the same order as terminals")
- dataFlag <- 'sameOrderTerminals'
- }
- if(length(characterStates) == tree at nnodes) {
- message("Data assumed to be in the same order as nodes;\nany data not associated with a terminal branch will be ignored")
- dataFlag <- 'sameOrderNodes'
- }
- if(identical(dataFlag, NULL)) stopFlag <- T
- message("-------------------\n")
- }
- else dataFlag <- 'named'
- if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
- }
-
- hansenBatch = vector("list", length(ouchTrees))
- treeCounter = 0
- for (i in 1:length(ouchTrees)) {
- tree <- ouchTrees[[i]]
- regimesList = regimeVectors(tree, cladeMembersList, maxNodes)
- if(identical(regimeTitles, NULL)) {
- regimeTitles <- as.character(1:length(regimesList))
- if(brown) regimeTitles <- c(regimeTitles, 'brown')
- }
-
- ## rescale tree if requested
- if(rescale>0) tree at times <- rescale * tree at times / max(tree at times)
-
- ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
- ## for now, assume nodes of interest are present in all trees
-
- ## make sure data fits the tree
- dataIn <- NULL
- if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
- if(dataFlag == 'sameOrderNodes') dataIn <- characterStates
- if(dataFlag == 'named') dataIn <- characterStates[match(tree at nodelabels), characterStates]
- if(identical(dataIn, NULL)) stop(paste("There is a problem with your data that I failed to catch at the outset of", sQuote('runBatchHansen()')))
- else names(dataIn) <- tree at nodes
-
- ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large,
- ## so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
- ## there could also be an option to save the entire object as a series of files in addition to hanging onto
- hansenBatch[[i]] = batchHansen(tree, dataIn, regimesList, regimeTitles, brown, ...)
- message(paste("Tree",i,"of",length(ouchTrees),"complete"))}
-
- ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
-
- return(list(hansens = hansenBatch, regimes = regimesList)) }
-
-batchHansen <-
-# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
-# Arguments:
-# "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
-# "regimesList" = list of regime-paintings as output from regimeVectors
-# "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
-# Value: a matrix with nrow = regimes (+ 1 if brownian model is included) and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
-function(tree, data, regimesList, regimeTitles, brown, ...) {
- n <- tree at nterm
- ## set up a matrix that returns lnL, K, sigmasq, theta0, and alpha for every model; thetas will go along into a list that is indexed by model
- hansenOptima <- list(length(regimeTitles))
- variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # it's important that 'alpha' go last so that the matrix fills up right when the brownian motion model is used
- brVars <- c("loglik", "dof", "sigma.squared", "theta")
- haVars <- c("loglik", "dof", "sigma.squared", "alpha")
- treeData <- matrix(data = NA, nrow = length(regimeTitles), ncol = length(variables), dimnames = list(regimeTitles,variables))
- if(brown) {
- br <- brown(data, tree)
- treeData["brown", ] <- unlist(summary(br)[brVars])
- }
- for (i in seq(regimesList)) {
- message(paste("Running regime",i))
- ## at this point, the user has to give an initial alpha and sigma for hansen to search on... this should be relaxed
- ha = hansen(data, tree, regimesList[[i]], ...)
- treeData[i, ] <- unlist(summary(ha)[haVars])
- hansenOptima[[i]] <- summary(ha)$optima[[1]]
- }
- return(treeData) }
\ No newline at end of file
Deleted: pkg/R/revisedTreeTraversal.R
===================================================================
--- pkg/R/revisedTreeTraversal.R 2008-11-14 08:04:10 UTC (rev 27)
+++ pkg/R/revisedTreeTraversal.R 2008-11-14 08:05:41 UTC (rev 28)
@@ -1,231 +0,0 @@
-# ---------------------------------------------------------------
-# FUNCTIONS FOR TRAVERSING AN S4 OUCH TREE AND PAINTING REGIMES #
-# ---------------------------------------------------------------
-
-# Modified from functions used in Hipp 2007 Evolution paper
-# Initially written for ouch v 1.2-4
-# 10 November 2008: changed everything to operate on an ouchtree object (ouch >= v2), otherwise functions the same;
-# checked on ouch v2.4-2
-# functions included in this file:
-# 1. paintBranches
-# 2. mrcaOUCH
-# 3. ancestorLine
-# 4. allPossibleRegimes
-# 5. regimeVectors
-
-
-paintBranches <-
-# Paints branches with regimes changing at nodes specified
-# arguments
-# "node" "ancestor" "times" = the standard tree specification vectors of the OUCH-style tree
-# "regimeShiftNodes" = a vector of nodes at which selective regimes shift: root must be included, but tips are meaningless in this context
-# "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes",
-# in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles"
-# but not at all in "regimeShiftNodes"... defaults to "node[x]regime
-# Value: a vector of regimes that can be plopped right into an OUCH-style tree data frame
-function(tree, regimeShiftNodes, regimeTitles) {
- ## ------------------ begin ouchtree block -----------------
- ## check to see if tree inherits 'ouchtree'
- if (!is(tree,'ouchtree'))
- stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
- '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
- ## get the vectors we need:
- ancestor <- tree at ancestors # class = "character"
- node <- tree at nodes # class = "character"
- species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
- times <- tree at times # class = "numeric"
- ## ------------------ end ouchtree block -------------------
-
- names(regimeTitles) = as.character(regimeShiftNodes)
- colorsVector = character(length(node))
- for (i in 1:length(ancestor)) {
- # First three lines fill up the vector for nodes that are hit in order
- if (is.na(ancestor[i])) {
- colorsVector[i] = regimeTitles["1"]
- next }
- if (as.character(ancestor[i]) %in% as.character(regimeShiftNodes)) {
- colorsVector[i] = regimeTitles[as.character(ancestor[i])]
- next }
- if (colorsVector[as.integer(ancestor[i])] != "") {
- colorsVector[i] = colorsVector[as.integer(ancestor[i])]
- next }
- # These lines fill up the vector for nodes run reached before their immediate ancestor
- nodeQ = integer(length(node))
- ii = i
- repeat {
- nodeQ = c(ii, nodeQ)
- ii = as.numeric(ancestor[ii])
- if (as.character(ancestor[ii]) %in% as.character(regimeShiftNodes)) {
- colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
- break}
- if (colorsVector[as.integer(ancestor[ii])] != "") {
- colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
- break} }
-
- for(j in nodeQ) {
- colorsVector[j] = colorsVector[as.integer(ancestor[j])] }
-
- } # closes for(i in 1:length(ancestor)) loop
-
- # a little hack to fix a problem I don't understand... with the undesired side effect that it colors the stem of some subtrees rather than the crown as originally written
- for(i in 1:length(colorsVector)) if(colorsVector[i] == "") colorsVector[i] <- as.character(i)
- return(colorsVector) }
-
-mrcaOUCH <-
-# Finds most recent common ancestor for a vector of tips by:
-# 1. Creating a vector of ancestral nodes leading to each tip
-# 2. Creating an intersection set of ancestral nodes for all taxa by intersecting taxa successively with the last intersection set
-# 3. Returning the node of the final intersection set that has the highest time
-# Arguments:
-# "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
-# "cladeVector" = vector of species for which you want to find the most recent common ancestor
-# Value: the node number (as an integer) of the most recent common ancestor
-# Works! 3-31-06
-function(cladeVector, tree) {
- ## ------------------ begin ouchtree block -----------------
- ## check to see if tree inherits 'ouchtree'
- if (!is(tree,'ouchtree'))
- stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
- '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
- ## get the vectors we need:
- ancestor <- tree at ancestors # class = "character"
- node <- tree at nodes # class = "character"
- species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
- times <- tree at times # class = "numeric"
- ## ------------------ end ouchtree block -------------------
-
- tips = match(cladeVector, species)
- listOfAncestorLines = lapply(tips, ancestorLine, tree = tree) # 10 nov 08: this is identical to the appropriate subset of tree at lineages
- latestMatch = listOfAncestorLines[[1]]
- for (i in listOfAncestorLines) {
- latestMatch = i[match(latestMatch, i, nomatch = 0)] }
- timesVector = times[as.integer(latestMatch)]
- if(length(timesVector) == 1) {
- if (is.na(timesVector)) mrca = "1"
- else mrca = timesVector}
- else mrca = latestMatch[match(max(as.double(timesVector), na.rm = TRUE), timesVector)]
- return(mrca) }
-
-ancestorLine <-
-# Creates a vector of ancestral nodes for a tip
-# Arguments:
-# CHANGED to "tree" 10 nov 08: "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree
-# "tip" = the tip node to trace back
-# Value: a vector of nodes leading from a tip to the root
-# 10 nov 08: changed to just grab the appropriate element from tree at lineages
-function(tip, tree) {
- ## ------------------ begin ouchtree block -----------------
- ## check to see if tree inherits 'ouchtree'
- if (!is(tree,'ouchtree'))
- stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
- '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
- ## get the vectors we need:
- #ancestor <- tree at ancestors # class = "character"
- #node <- tree at nodes # class = "character"
- #species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
- #times <- tree at times # class = "numeric"
- ## ------------------ end ouchtree block -------------------
- tip <- as.numeric(tip)
- #nodesVector = vector("character")
- #counter = 0
- #repeat {
- # if (is.na(tip)) break
- # counter = counter + 1
- # nodesVector[counter] = ancestor[tip]
- # tip = ancestor[tip] }
- nodesVector <- c(as.character(tree at lineages[[tip]][2:length(tree at lineages[[tip]])]), NA)
- return(nodesVector)
- }
-
-allPossibleRegimes <-
-# Generates a list of vectors of all possible 2^n regimes for a given list of n ancestor nodes, assuming that each node
-# can either be a change node or not, and that all combinations are meaningful.
-# NOTE: This routine returns all possible permutations, ignoring the fact that the root must be included as a changeNode
-# for paintBranches to work properly. Exclude the root when calling this routine.
-# Arguments:
-# "changeNodes" = vector of all change nodes in all possible scenarios, or number of regimes assumed if nodeMatrix = T
-# "maxNodes" = single number indicating the maximum number of nodes at which a regime can change
-# "nodeMatrix" = indicates whether to return a binary table for interp or a list of changeNode vectors for analysis
-# Value:
-# if nodeMatrix = F: a list of changeNode vectors (assumes type = "character"), one for each possible scenario
-# if nodeMatrix = T: a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output;
-# presumes nodes are labelled 1:n
-# 10 nov 08: this function now takes over regimeNodes
-function(changeNodes, maxNodes = NULL, nodeMatrix = F) {
- if(!identical(maxNodes, NULL) && maxNodes > length(changeNodes)) warning(paste(sQuote('maxNodes'), 'cannot be larger than the number of nodes; maxNodes ignored'))
- numberOfRegimes = ifelse(length(changeNodes) == 1, 2, 2^length(changeNodes))
- regime = vector("list", numberOfRegimes)
- for (i in 1:(numberOfRegimes - 1)) {
- remainder = i
- n = NULL
- for (j in as.integer(log2(i)):0) {
- if (2^j > remainder) n[j+1] = NA
- else {
- n[j+1] = changeNodes[j+1]
- remainder = remainder %% 2^j
- }
- }
- regime[[i]] = sort(n[!is.na(n)])
- }
- regime[[numberOfRegimes]] = rep("0", times = as.integer(log2(i)) + 1)
- if(nodeMatrix == T) {
- #n <- ifelse(length(changeNodes) == 1, as.numeric(changeNodes), length(changeNodes))
- regimesNameMatrix = matrix(
- data = NA, nrow = numberOfRegimes, ncol = length(changeNodes), dimnames = list(
- as.character(seq(numberOfRegimes)), as.character(seq(length(changeNodes)))
- )
- )
- for (i in seq(numberOfRegimes)) {
- for (j in seq(length(changeNodes))) {
- if (is.na(match(j,regime[[i]]))) regimesNameMatrix[i,j] = 0
- else regimesNameMatrix[i,j] = 1
- }
- }
- outdata <- regimesNameMatrix
- if(!identical(maxNodes, NULL)) {
- outdata <- outdata[apply(outdata,1,sum) <= maxNodes, ]
- dimnames(outdata)[[1]] = as.character(seq(dim(outdata)[1]))
- }
- }
- else {
- outdata <- regime
- if(!identical(maxNodes, NULL)) {
- outdata <- outdata[sapply(outdata, length) <= maxNodes]
- outdata[[length(outdata) + 1]] <- rep("0", length(changeNodes))
- }
- }
- return(outdata) }
-
-regimeVectors <-
-# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
-# species vectors that describe the clades at the bases of which regimes are specified to change.
-# Arguments:
-# "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
-# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
-# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
-function(tree, cladeMembersList, maxNodes = NULL) {
- ## ------------------ begin ouchtree block -----------------
- ## check to see if tree inherits 'ouchtree'
- if (!is(tree,'ouchtree'))
- stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
- '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
- ## get the vectors we need:
- ancestor <- tree at ancestors # class = "character"
- node <- tree at nodes # class = "character"
- species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
- times <- tree at times # class = "numeric"
- ## ------------------ end ouchtree block -------------------
-
- changeNodesList <- lapply(cladeMembersList, mrcaOUCH, tree = tree) #Returns a list of length-1 character vectors, each containing a single changeNode -- the fact that this is a list causes problems in paintBranches if not changed to a 1-d vector
- changeNodesVector <- unlist(changeNodesList)
- #changeNodesVector = vector("character", length(changeNodesList))
- #for (i in 1:length(changeNodesList)) # Changing cladeMemberList to a 1-d vector
- # {changeNodesVector[i] = changeNodesList[[i]]}
- allRegimes = allPossibleRegimes(changeNodesVector, maxNodes)
- regimePaintings = vector("list", length(allRegimes))
- for (i in 1:length(allRegimes)) {
- allRegimes[[i]] = c("1", allRegimes[[i]])
- regimePaintings[[i]] = as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
- names(regimePaintings[[i]]) <- tree at nodes
- message(paste('Created regime',i))}
- return(regimePaintings) }
Copied: pkg/R/treeTraversal.R (from rev 26, pkg/R/revisedTreeTraversal.R)
===================================================================
--- pkg/R/treeTraversal.R (rev 0)
+++ pkg/R/treeTraversal.R 2008-11-14 08:05:41 UTC (rev 28)
@@ -0,0 +1,231 @@
+# ---------------------------------------------------------------
+# FUNCTIONS FOR TRAVERSING AN S4 OUCH TREE AND PAINTING REGIMES #
+# ---------------------------------------------------------------
+
+# Modified from functions used in Hipp 2007 Evolution paper
+# Initially written for ouch v 1.2-4
+# 10 November 2008: changed everything to operate on an ouchtree object (ouch >= v2), otherwise functions the same;
+# checked on ouch v2.4-2
+# functions included in this file:
+# 1. paintBranches
+# 2. mrcaOUCH
+# 3. ancestorLine
+# 4. allPossibleRegimes
+# 5. regimeVectors
+
+
+paintBranches <-
+# Paints branches with regimes changing at nodes specified
+# arguments
+# "node" "ancestor" "times" = the standard tree specification vectors of the OUCH-style tree
+# "regimeShiftNodes" = a vector of nodes at which selective regimes shift: root must be included, but tips are meaningless in this context
+# "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes",
+# in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles"
+# but not at all in "regimeShiftNodes"... defaults to "node[x]regime
+# Value: a vector of regimes that can be plopped right into an OUCH-style tree data frame
+function(tree, regimeShiftNodes, regimeTitles) {
+ ## ------------------ begin ouchtree block -----------------
+ ## check to see if tree inherits 'ouchtree'
+ if (!is(tree,'ouchtree'))
+ stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+ '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+ ## get the vectors we need:
+ ancestor <- tree at ancestors # class = "character"
+ node <- tree at nodes # class = "character"
+ species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+ times <- tree at times # class = "numeric"
+ ## ------------------ end ouchtree block -------------------
+
+ names(regimeTitles) = as.character(regimeShiftNodes)
+ colorsVector = character(length(node))
+ for (i in 1:length(ancestor)) {
+ # First three lines fill up the vector for nodes that are hit in order
+ if (is.na(ancestor[i])) {
+ colorsVector[i] = regimeTitles["1"]
+ next }
+ if (as.character(ancestor[i]) %in% as.character(regimeShiftNodes)) {
+ colorsVector[i] = regimeTitles[as.character(ancestor[i])]
+ next }
+ if (colorsVector[as.integer(ancestor[i])] != "") {
+ colorsVector[i] = colorsVector[as.integer(ancestor[i])]
+ next }
+ # These lines fill up the vector for nodes run reached before their immediate ancestor
+ nodeQ = integer(length(node))
+ ii = i
+ repeat {
+ nodeQ = c(ii, nodeQ)
+ ii = as.numeric(ancestor[ii])
+ if (as.character(ancestor[ii]) %in% as.character(regimeShiftNodes)) {
+ colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+ break}
+ if (colorsVector[as.integer(ancestor[ii])] != "") {
+ colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+ break} }
+
+ for(j in nodeQ) {
+ colorsVector[j] = colorsVector[as.integer(ancestor[j])] }
+
+ } # closes for(i in 1:length(ancestor)) loop
+
+ # a little hack to fix a problem I don't understand... with the undesired side effect that it colors the stem of some subtrees rather than the crown as originally written
+ for(i in 1:length(colorsVector)) if(colorsVector[i] == "") colorsVector[i] <- as.character(i)
+ return(colorsVector) }
+
+mrcaOUCH <-
+# Finds most recent common ancestor for a vector of tips by:
+# 1. Creating a vector of ancestral nodes leading to each tip
+# 2. Creating an intersection set of ancestral nodes for all taxa by intersecting taxa successively with the last intersection set
+# 3. Returning the node of the final intersection set that has the highest time
+# Arguments:
+# "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
+# "cladeVector" = vector of species for which you want to find the most recent common ancestor
+# Value: the node number (as an integer) of the most recent common ancestor
+# Works! 3-31-06
+function(cladeVector, tree) {
+ ## ------------------ begin ouchtree block -----------------
+ ## check to see if tree inherits 'ouchtree'
+ if (!is(tree,'ouchtree'))
+ stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+ '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+ ## get the vectors we need:
+ ancestor <- tree at ancestors # class = "character"
+ node <- tree at nodes # class = "character"
+ species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+ times <- tree at times # class = "numeric"
+ ## ------------------ end ouchtree block -------------------
+
+ tips = match(cladeVector, species)
+ listOfAncestorLines = lapply(tips, ancestorLine, tree = tree) # 10 nov 08: this is identical to the appropriate subset of tree at lineages
+ latestMatch = listOfAncestorLines[[1]]
+ for (i in listOfAncestorLines) {
+ latestMatch = i[match(latestMatch, i, nomatch = 0)] }
+ timesVector = times[as.integer(latestMatch)]
+ if(length(timesVector) == 1) {
+ if (is.na(timesVector)) mrca = "1"
+ else mrca = timesVector}
+ else mrca = latestMatch[match(max(as.double(timesVector), na.rm = TRUE), timesVector)]
+ return(mrca) }
+
+ancestorLine <-
+# Creates a vector of ancestral nodes for a tip
+# Arguments:
+# CHANGED to "tree" 10 nov 08: "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree
+# "tip" = the tip node to trace back
+# Value: a vector of nodes leading from a tip to the root
+# 10 nov 08: changed to just grab the appropriate element from tree at lineages
+function(tip, tree) {
+ ## ------------------ begin ouchtree block -----------------
+ ## check to see if tree inherits 'ouchtree'
+ if (!is(tree,'ouchtree'))
+ stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+ '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+ ## get the vectors we need:
+ #ancestor <- tree at ancestors # class = "character"
+ #node <- tree at nodes # class = "character"
+ #species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+ #times <- tree at times # class = "numeric"
+ ## ------------------ end ouchtree block -------------------
+ tip <- as.numeric(tip)
+ #nodesVector = vector("character")
+ #counter = 0
+ #repeat {
+ # if (is.na(tip)) break
+ # counter = counter + 1
+ # nodesVector[counter] = ancestor[tip]
+ # tip = ancestor[tip] }
+ nodesVector <- c(as.character(tree at lineages[[tip]][2:length(tree at lineages[[tip]])]), NA)
+ return(nodesVector)
+ }
+
+allPossibleRegimes <-
+# Generates a list of vectors of all possible 2^n regimes for a given list of n ancestor nodes, assuming that each node
+# can either be a change node or not, and that all combinations are meaningful.
+# NOTE: This routine returns all possible permutations, ignoring the fact that the root must be included as a changeNode
+# for paintBranches to work properly. Exclude the root when calling this routine.
+# Arguments:
+# "changeNodes" = vector of all change nodes in all possible scenarios, or number of regimes assumed if nodeMatrix = T
+# "maxNodes" = single number indicating the maximum number of nodes at which a regime can change
+# "nodeMatrix" = indicates whether to return a binary table for interp or a list of changeNode vectors for analysis
+# Value:
+# if nodeMatrix = F: a list of changeNode vectors (assumes type = "character"), one for each possible scenario
+# if nodeMatrix = T: a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output;
+# presumes nodes are labelled 1:n
+# 10 nov 08: this function now takes over regimeNodes
+function(changeNodes, maxNodes = NULL, nodeMatrix = F) {
+ if(!identical(maxNodes, NULL) && maxNodes > length(changeNodes)) warning(paste(sQuote('maxNodes'), 'cannot be larger than the number of nodes; maxNodes ignored'))
+ numberOfRegimes = ifelse(length(changeNodes) == 1, 2, 2^length(changeNodes))
+ regime = vector("list", numberOfRegimes)
+ for (i in 1:(numberOfRegimes - 1)) {
+ remainder = i
+ n = NULL
+ for (j in as.integer(log2(i)):0) {
+ if (2^j > remainder) n[j+1] = NA
+ else {
+ n[j+1] = changeNodes[j+1]
+ remainder = remainder %% 2^j
+ }
+ }
+ regime[[i]] = sort(n[!is.na(n)])
+ }
+ regime[[numberOfRegimes]] = rep("0", times = as.integer(log2(i)) + 1)
+ if(nodeMatrix == T) {
+ #n <- ifelse(length(changeNodes) == 1, as.numeric(changeNodes), length(changeNodes))
+ regimesNameMatrix = matrix(
+ data = NA, nrow = numberOfRegimes, ncol = length(changeNodes), dimnames = list(
+ as.character(seq(numberOfRegimes)), as.character(seq(length(changeNodes)))
+ )
+ )
+ for (i in seq(numberOfRegimes)) {
+ for (j in seq(length(changeNodes))) {
+ if (is.na(match(j,regime[[i]]))) regimesNameMatrix[i,j] = 0
+ else regimesNameMatrix[i,j] = 1
+ }
+ }
+ outdata <- regimesNameMatrix
+ if(!identical(maxNodes, NULL)) {
+ outdata <- outdata[apply(outdata,1,sum) <= maxNodes, ]
+ dimnames(outdata)[[1]] = as.character(seq(dim(outdata)[1]))
+ }
+ }
+ else {
+ outdata <- regime
+ if(!identical(maxNodes, NULL)) {
+ outdata <- outdata[sapply(outdata, length) <= maxNodes]
+ outdata[[length(outdata) + 1]] <- rep("0", length(changeNodes))
+ }
+ }
+ return(outdata) }
+
+regimeVectors <-
+# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
+# species vectors that describe the clades at the bases of which regimes are specified to change.
+# Arguments:
+# "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
+# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+function(tree, cladeMembersList, maxNodes = NULL) {
+ ## ------------------ begin ouchtree block -----------------
+ ## check to see if tree inherits 'ouchtree'
+ if (!is(tree,'ouchtree'))
+ stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+ '\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+ ## get the vectors we need:
+ ancestor <- tree at ancestors # class = "character"
+ node <- tree at nodes # class = "character"
+ species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+ times <- tree at times # class = "numeric"
+ ## ------------------ end ouchtree block -------------------
+
+ changeNodesList <- lapply(cladeMembersList, mrcaOUCH, tree = tree) #Returns a list of length-1 character vectors, each containing a single changeNode -- the fact that this is a list causes problems in paintBranches if not changed to a 1-d vector
+ changeNodesVector <- unlist(changeNodesList)
+ #changeNodesVector = vector("character", length(changeNodesList))
+ #for (i in 1:length(changeNodesList)) # Changing cladeMemberList to a 1-d vector
+ # {changeNodesVector[i] = changeNodesList[[i]]}
+ allRegimes = allPossibleRegimes(changeNodesVector, maxNodes)
+ regimePaintings = vector("list", length(allRegimes))
+ for (i in 1:length(allRegimes)) {
+ allRegimes[[i]] = c("1", allRegimes[[i]])
+ regimePaintings[[i]] = as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
+ names(regimePaintings[[i]]) <- tree at nodes
+ message(paste('Created regime',i))}
+ return(regimePaintings) }
More information about the Mattice-commits
mailing list