[Mattice-commits] r28 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Nov 14 09:05:42 CET 2008


Author: andrew_hipp
Date: 2008-11-14 09:05:41 +0100 (Fri, 14 Nov 2008)
New Revision: 28

Added:
   pkg/R/batchHansen.R
   pkg/R/treeTraversal.R
Removed:
   pkg/R/revisedBatchHansen.R
   pkg/R/revisedTreeTraversal.R
Log:
renamed revisedBatchHansen.R and revisedTreeTraversal.R ... no need to set them off any longer

Copied: pkg/R/batchHansen.R (from rev 26, pkg/R/revisedBatchHansen.R)
===================================================================
--- pkg/R/batchHansen.R	                        (rev 0)
+++ pkg/R/batchHansen.R	2008-11-14 08:05:41 UTC (rev 28)
@@ -0,0 +1,131 @@
+# ---------------------------------------------------------------------
+# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
+# ---------------------------------------------------------------------
+# Copied from functions used in Hipp 2007 Evolution paper
+# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
+# Utilizes ouch v 1.2-4
+# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
+#  for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge, 
+#  they have not been checked for compatibility with subsequent versions of ouch.
+
+
+## Project details
+## maticce: Mapping Transitions In Continuous Character Evolution
+## full project name: Continuous Character Shifts on Phylogenies
+## unix name: mattice
+## Project page requested from R-forge on 7 november 2008
+
+## Changes needed:
+## 1. calls should be to hansen rather than hansen.fit
+## 2. measurement error portions need to be fixed
+## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
+##    node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
+##    and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
+## 4. DONE -- Max number of simultaneous nodes should be set 
+## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
+##    based on those nodes, using locator() or something like it.
+## 6. IT statistics should use informationCriterion or something else to clean up the code
+
+runBatchHansen <-
+# 11 nov 08: renamed to runBatchHansen
+# Runs batchHansenFit and brown.fit over a list of ouchTrees
+# Arguments:
+#  "ouchTrees" = list of OUCH-style trees
+#  "characterStates" = vector of character states, either extracted from an ouch-style tree data.frame or a named vector
+#  REMOVED: "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
+#  "rescale" = factor to multiply against (times / max(times)) -- choose based on trial analyses; set at <= 0 if you don't want to rescale trees
+#  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+#  "brown" = whether to analyse the data under a Brownian motion model
+#  "..." = additional arguments to pass along to hansen
+function(ouchTrees, characterStates, cladeMembersList, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
+  ## do all the objects in ouchTrees inherit ouchtree?
+  if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
+  treeCheck <- unlist(lapply(ouchTrees, function(x) is(x,'ouchtree')))
+  if(F %in% treeCheck) 
+        stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  
+  ## Check character states to make sure that they are either named and match names in the trees, or are the same length as the tips
+  for (i in 1:length(ouchTrees)) {
+    dataFlag <- NULL
+    stopFlag <- F
+    tree <- ouchTrees[[i]]
+    terminals <- tree at nodelabels[(tree at nnodes - tree at nterm + 1):tree at nnodes]
+    if(any(F %in% (terminals %in% names(characterStates)))) {
+      message(paste("Not every terminal branch in tree", i, "has a corresponding name in", sQuote("characterStates")))
+      if(length(characterStates) == tree at nterm) {
+        message("Data assumed to be in the same order as terminals")
+        dataFlag <- 'sameOrderTerminals' 
+        }
+      if(length(characterStates) == tree at nnodes) {
+        message("Data assumed to be in the same order as nodes;\nany data not associated with a terminal branch will be ignored")
+        dataFlag <- 'sameOrderNodes'
+        }
+      if(identical(dataFlag, NULL)) stopFlag <- T
+      message("-------------------\n")
+      }
+    else dataFlag <- 'named'
+    if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
+    }
+
+  hansenBatch = vector("list", length(ouchTrees))
+  treeCounter = 0
+  for (i in 1:length(ouchTrees)) {
+    tree <- ouchTrees[[i]]
+    regimesList = regimeVectors(tree, cladeMembersList, maxNodes)
+    if(identical(regimeTitles, NULL)) {
+      regimeTitles <- as.character(1:length(regimesList))
+      if(brown) regimeTitles <- c(regimeTitles, 'brown')
+      }
+    
+    ## rescale tree if requested
+    if(rescale>0) tree at times <- rescale * tree at times / max(tree at times) 
+    
+    ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
+    ## for now, assume nodes of interest are present in all trees
+
+    ## make sure data fits the tree
+    dataIn <- NULL
+    if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
+    if(dataFlag == 'sameOrderNodes') dataIn <- characterStates
+    if(dataFlag == 'named') dataIn <- characterStates[match(tree at nodelabels), characterStates]
+    if(identical(dataIn, NULL)) stop(paste("There is a problem with your data that I failed to catch at the outset of", sQuote('runBatchHansen()')))
+    else names(dataIn) <- tree at nodes
+    
+    ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large, 
+    ##   so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
+    ##   there could also be an option to save the entire object as a series of files in addition to hanging onto 
+    hansenBatch[[i]] = batchHansen(tree, dataIn, regimesList, regimeTitles, brown, ...)
+    message(paste("Tree",i,"of",length(ouchTrees),"complete"))}
+    
+    ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
+  
+  return(list(hansens = hansenBatch, regimes = regimesList)) }
+
+batchHansen <-
+# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
+# Arguments:
+#  "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
+#  "regimesList" = list of regime-paintings as output from regimeVectors
+#  "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
+# Value: a matrix with nrow = regimes (+ 1 if brownian model is included) and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
+function(tree, data, regimesList, regimeTitles, brown, ...) {
+  n <- tree at nterm
+  ## set up a matrix that returns lnL, K, sigmasq, theta0, and alpha for every model; thetas will go along into a list that is indexed by model
+  hansenOptima <- list(length(regimeTitles))
+  variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # it's important that 'alpha' go last so that the matrix fills up right when the brownian motion model is used
+  brVars <- c("loglik", "dof", "sigma.squared", "theta")
+  haVars <- c("loglik", "dof", "sigma.squared", "alpha")
+  treeData <- matrix(data = NA, nrow = length(regimeTitles), ncol = length(variables), dimnames = list(regimeTitles,variables))
+  if(brown) {
+    br <- brown(data, tree)
+    treeData["brown", ] <- unlist(summary(br)[brVars])
+    }
+  for (i in seq(regimesList)) {
+    message(paste("Running regime",i))
+    ## at this point, the user has to give an initial alpha and sigma for hansen to search on... this should be relaxed
+    ha = hansen(data, tree, regimesList[[i]], ...)
+    treeData[i, ] <- unlist(summary(ha)[haVars])
+    hansenOptima[[i]] <- summary(ha)$optima[[1]]
+  }
+  return(treeData) }
\ No newline at end of file

Deleted: pkg/R/revisedBatchHansen.R
===================================================================
--- pkg/R/revisedBatchHansen.R	2008-11-14 08:04:10 UTC (rev 27)
+++ pkg/R/revisedBatchHansen.R	2008-11-14 08:05:41 UTC (rev 28)
@@ -1,131 +0,0 @@
-# ---------------------------------------------------------------------
-# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
-# ---------------------------------------------------------------------
-# Copied from functions used in Hipp 2007 Evolution paper
-# *** To run a hansen (OU) analysis, call runBatchHansenFit ***
-# Utilizes ouch v 1.2-4
-# This is the original set of functions utilized in Hipp 2007 (Evolution 61: 2175-2194) as modified
-#  for Lumbsch, Hipp et al. 2008 (BMC Evolutionary Biology 8: 257). At the time of uploade to r-forge, 
-#  they have not been checked for compatibility with subsequent versions of ouch.
-
-
-## Project details
-## maticce: Mapping Transitions In Continuous Character Evolution
-## full project name: Continuous Character Shifts on Phylogenies
-## unix name: mattice
-## Project page requested from R-forge on 7 november 2008
-
-## Changes needed:
-## 1. calls should be to hansen rather than hansen.fit
-## 2. measurement error portions need to be fixed
-## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
-##    node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
-##    and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
-## 4. DONE -- Max number of simultaneous nodes should be set 
-## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
-##    based on those nodes, using locator() or something like it.
-## 6. IT statistics should use informationCriterion or something else to clean up the code
-
-runBatchHansen <-
-# 11 nov 08: renamed to runBatchHansen
-# Runs batchHansenFit and brown.fit over a list of ouchTrees
-# Arguments:
-#  "ouchTrees" = list of OUCH-style trees
-#  "characterStates" = vector of character states, either extracted from an ouch-style tree data.frame or a named vector
-#  REMOVED: "SEM"= standard error of the mean, vector extracted from an ouch-style tree data.frame
-#  "rescale" = factor to multiply against (times / max(times)) -- choose based on trial analyses; set at <= 0 if you don't want to rescale trees
-#  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
-#  "brown" = whether to analyse the data under a Brownian motion model
-#  "..." = additional arguments to pass along to hansen
-function(ouchTrees, characterStates, cladeMembersList, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
-  ## do all the objects in ouchTrees inherit ouchtree?
-  if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
-  treeCheck <- unlist(lapply(ouchTrees, function(x) is(x,'ouchtree')))
-  if(F %in% treeCheck) 
-        stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
-	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-  
-  ## Check character states to make sure that they are either named and match names in the trees, or are the same length as the tips
-  for (i in 1:length(ouchTrees)) {
-    dataFlag <- NULL
-    stopFlag <- F
-    tree <- ouchTrees[[i]]
-    terminals <- tree at nodelabels[(tree at nnodes - tree at nterm + 1):tree at nnodes]
-    if(any(F %in% (terminals %in% names(characterStates)))) {
-      message(paste("Not every terminal branch in tree", i, "has a corresponding name in", sQuote("characterStates")))
-      if(length(characterStates) == tree at nterm) {
-        message("Data assumed to be in the same order as terminals")
-        dataFlag <- 'sameOrderTerminals' 
-        }
-      if(length(characterStates) == tree at nnodes) {
-        message("Data assumed to be in the same order as nodes;\nany data not associated with a terminal branch will be ignored")
-        dataFlag <- 'sameOrderNodes'
-        }
-      if(identical(dataFlag, NULL)) stopFlag <- T
-      message("-------------------\n")
-      }
-    else dataFlag <- 'named'
-    if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
-    }
-
-  hansenBatch = vector("list", length(ouchTrees))
-  treeCounter = 0
-  for (i in 1:length(ouchTrees)) {
-    tree <- ouchTrees[[i]]
-    regimesList = regimeVectors(tree, cladeMembersList, maxNodes)
-    if(identical(regimeTitles, NULL)) {
-      regimeTitles <- as.character(1:length(regimesList))
-      if(brown) regimeTitles <- c(regimeTitles, 'brown')
-      }
-    
-    ## rescale tree if requested
-    if(rescale>0) tree at times <- rescale * tree at times / max(tree at times) 
-    
-    ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
-    ## for now, assume nodes of interest are present in all trees
-
-    ## make sure data fits the tree
-    dataIn <- NULL
-    if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
-    if(dataFlag == 'sameOrderNodes') dataIn <- characterStates
-    if(dataFlag == 'named') dataIn <- characterStates[match(tree at nodelabels), characterStates]
-    if(identical(dataIn, NULL)) stop(paste("There is a problem with your data that I failed to catch at the outset of", sQuote('runBatchHansen()')))
-    else names(dataIn) <- tree at nodes
-    
-    ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large, 
-    ##   so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
-    ##   there could also be an option to save the entire object as a series of files in addition to hanging onto 
-    hansenBatch[[i]] = batchHansen(tree, dataIn, regimesList, regimeTitles, brown, ...)
-    message(paste("Tree",i,"of",length(ouchTrees),"complete"))}
-    
-    ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
-  
-  return(list(hansens = hansenBatch, regimes = regimesList)) }
-
-batchHansen <-
-# Runs hansen.fit and brown.fit on a tree over a batch of selective regimes
-# Arguments:
-#  "node" "ancestor" "times" "data" = the standard tree specification vectors of the OUCH-style tree
-#  "regimesList" = list of regime-paintings as output from regimeVectors
-#  "scalingFactor" = factor to multiply against (times / max(times)) -- choose based on trial analyses
-# Value: a matrix with nrow = regimes (+ 1 if brownian model is included) and columns for u, d.f., all estimated parameters, LRvsBM, AIC, and AIC weight
-function(tree, data, regimesList, regimeTitles, brown, ...) {
-  n <- tree at nterm
-  ## set up a matrix that returns lnL, K, sigmasq, theta0, and alpha for every model; thetas will go along into a list that is indexed by model
-  hansenOptima <- list(length(regimeTitles))
-  variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # it's important that 'alpha' go last so that the matrix fills up right when the brownian motion model is used
-  brVars <- c("loglik", "dof", "sigma.squared", "theta")
-  haVars <- c("loglik", "dof", "sigma.squared", "alpha")
-  treeData <- matrix(data = NA, nrow = length(regimeTitles), ncol = length(variables), dimnames = list(regimeTitles,variables))
-  if(brown) {
-    br <- brown(data, tree)
-    treeData["brown", ] <- unlist(summary(br)[brVars])
-    }
-  for (i in seq(regimesList)) {
-    message(paste("Running regime",i))
-    ## at this point, the user has to give an initial alpha and sigma for hansen to search on... this should be relaxed
-    ha = hansen(data, tree, regimesList[[i]], ...)
-    treeData[i, ] <- unlist(summary(ha)[haVars])
-    hansenOptima[[i]] <- summary(ha)$optima[[1]]
-  }
-  return(treeData) }
\ No newline at end of file

Deleted: pkg/R/revisedTreeTraversal.R
===================================================================
--- pkg/R/revisedTreeTraversal.R	2008-11-14 08:04:10 UTC (rev 27)
+++ pkg/R/revisedTreeTraversal.R	2008-11-14 08:05:41 UTC (rev 28)
@@ -1,231 +0,0 @@
-# ---------------------------------------------------------------
-# FUNCTIONS FOR TRAVERSING AN S4 OUCH TREE AND PAINTING REGIMES #
-# ---------------------------------------------------------------
-
-# Modified from functions used in Hipp 2007 Evolution paper
-# Initially written for ouch v 1.2-4
-# 10 November 2008: changed everything to operate on an ouchtree object (ouch >= v2), otherwise functions the same;
-#  checked on ouch v2.4-2
-# functions included in this file:
-# 1. paintBranches
-# 2. mrcaOUCH
-# 3. ancestorLine
-# 4. allPossibleRegimes
-# 5. regimeVectors
-
-
-paintBranches <-
-# Paints branches with regimes changing at nodes specified
-# arguments
-#  "node" "ancestor" "times" = the standard tree specification vectors of the OUCH-style tree
-#  "regimeShiftNodes" = a vector of nodes at which selective regimes shift: root must be included, but tips are meaningless in this context
-#  "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes",
-#                   in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles"
-#                   but not at all in "regimeShiftNodes"... defaults to "node[x]regime
-# Value: a vector of regimes that can be plopped right into an OUCH-style tree data frame
-function(tree, regimeShiftNodes, regimeTitles) {
-  ## ------------------ begin ouchtree block -----------------
-  ## check to see if tree inherits 'ouchtree'
-  if (!is(tree,'ouchtree')) 
-	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
-	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-  ## get the vectors we need:
-  ancestor <- tree at ancestors # class = "character"
-  node <- tree at nodes # class = "character"
-  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
-  times <- tree at times # class = "numeric"
-  ## ------------------ end ouchtree block -------------------
-  
-  names(regimeTitles) = as.character(regimeShiftNodes)
-  colorsVector = character(length(node))
-  for (i in 1:length(ancestor)) {
-    # First three lines fill up the vector for nodes that are hit in order
-    if (is.na(ancestor[i])) {
-      colorsVector[i] = regimeTitles["1"]
-      next }
-    if (as.character(ancestor[i]) %in% as.character(regimeShiftNodes)) {
-      colorsVector[i] = regimeTitles[as.character(ancestor[i])]
-      next }
-    if (colorsVector[as.integer(ancestor[i])] != "") {
-      colorsVector[i] = colorsVector[as.integer(ancestor[i])]
-      next }
-    # These lines fill up the vector for nodes run reached before their immediate ancestor
-    nodeQ = integer(length(node))
-    ii = i
-    repeat {
-      nodeQ = c(ii, nodeQ)
-      ii = as.numeric(ancestor[ii])
-      if (as.character(ancestor[ii]) %in% as.character(regimeShiftNodes)) {
-        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
-        break}
-      if (colorsVector[as.integer(ancestor[ii])] != "") {
-        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
-        break} }
-
-    for(j in nodeQ) {
-      colorsVector[j] = colorsVector[as.integer(ancestor[j])] } 
-      
-      } # closes for(i in 1:length(ancestor)) loop 
-
-      # a little hack to fix a problem I don't understand... with the undesired side effect that it colors the stem of some subtrees rather than the crown as originally written
-      for(i in 1:length(colorsVector)) if(colorsVector[i] == "") colorsVector[i] <- as.character(i) 
-  return(colorsVector) }
-
-mrcaOUCH <-
-# Finds most recent common ancestor for a vector of tips by:
-#  1. Creating a vector of ancestral nodes leading to each tip
-#  2. Creating an intersection set of ancestral nodes for all taxa by intersecting taxa successively with the last intersection set
-#  3. Returning the node of the final intersection set that has the highest time
-# Arguments:
-#  "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
-#  "cladeVector" = vector of species for which you want to find the most recent common ancestor
-# Value: the node number (as an integer) of the most recent common ancestor
-# Works! 3-31-06
-function(cladeVector, tree) {
-  ## ------------------ begin ouchtree block -----------------
-  ## check to see if tree inherits 'ouchtree'
-  if (!is(tree,'ouchtree')) 
-	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
-	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-  ## get the vectors we need:
-  ancestor <- tree at ancestors # class = "character"
-  node <- tree at nodes # class = "character"
-  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
-  times <- tree at times # class = "numeric"
-  ## ------------------ end ouchtree block -------------------
-  
-  tips = match(cladeVector, species) 
-  listOfAncestorLines = lapply(tips, ancestorLine, tree = tree) # 10 nov 08: this is identical to the appropriate subset of tree at lineages
-  latestMatch = listOfAncestorLines[[1]]
-  for (i in listOfAncestorLines) {
-    latestMatch = i[match(latestMatch, i, nomatch = 0)] }
-  timesVector = times[as.integer(latestMatch)]
-  if(length(timesVector) == 1) {
-    if (is.na(timesVector)) mrca = "1"
-      else mrca = timesVector}
-    else mrca = latestMatch[match(max(as.double(timesVector), na.rm = TRUE), timesVector)]
-  return(mrca) }
-
-ancestorLine <-
-# Creates a vector of ancestral nodes for a tip
-# Arguments:
-#  CHANGED to "tree" 10 nov 08: "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree
-#  "tip" = the tip node to trace back
-# Value: a vector of nodes leading from a tip to the root
-# 10 nov 08: changed to just grab the appropriate element from tree at lineages
-function(tip, tree) {
-  ## ------------------ begin ouchtree block -----------------
-  ## check to see if tree inherits 'ouchtree'
-  if (!is(tree,'ouchtree')) 
-	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
-	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-  ## get the vectors we need:
-  #ancestor <- tree at ancestors # class = "character"
-  #node <- tree at nodes # class = "character"
-  #species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
-  #times <- tree at times # class = "numeric"
-  ## ------------------ end ouchtree block -------------------
-  tip <- as.numeric(tip)
-  #nodesVector = vector("character")
-  #counter = 0
-  #repeat {
-  #  if (is.na(tip)) break
-  #  counter = counter + 1
-  #  nodesVector[counter] = ancestor[tip]
-  #  tip = ancestor[tip] }
-  nodesVector <- c(as.character(tree at lineages[[tip]][2:length(tree at lineages[[tip]])]), NA)
-  return(nodesVector) 
-  }
-
-allPossibleRegimes <-
-# Generates a list of vectors of all possible 2^n regimes for a given list of n ancestor nodes, assuming that each node
-#  can either be a change node or not, and that all combinations are meaningful.
-# NOTE: This routine returns all possible permutations, ignoring the fact that the root must be included as a changeNode
-#  for paintBranches to work properly. Exclude the root when calling this routine.
-# Arguments:
-#  "changeNodes" = vector of all change nodes in all possible scenarios, or number of regimes assumed if nodeMatrix = T
-#  "maxNodes" = single number indicating the maximum number of nodes at which a regime can change
-#  "nodeMatrix" = indicates whether to return a binary table for interp or a list of changeNode vectors for analysis
-# Value:
-#    if nodeMatrix = F: a list of changeNode vectors (assumes type = "character"), one for each possible scenario
-#    if nodeMatrix = T: a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output; 
-#                       presumes nodes are labelled 1:n
-# 10 nov 08: this function now takes over regimeNodes
-function(changeNodes, maxNodes = NULL, nodeMatrix = F) {
-    if(!identical(maxNodes, NULL) && maxNodes > length(changeNodes)) warning(paste(sQuote('maxNodes'), 'cannot be larger than the number of nodes; maxNodes ignored'))
-    numberOfRegimes = ifelse(length(changeNodes) == 1, 2, 2^length(changeNodes))
-    regime = vector("list", numberOfRegimes)
-    for (i in 1:(numberOfRegimes - 1)) {
-      remainder = i
-      n = NULL
-      for (j in as.integer(log2(i)):0) {
-        if (2^j > remainder) n[j+1] = NA
-        else {
-          n[j+1] = changeNodes[j+1]
-          remainder = remainder %% 2^j 
-        }
-      }
-      regime[[i]] = sort(n[!is.na(n)]) 
-    }
-    regime[[numberOfRegimes]] = rep("0", times = as.integer(log2(i)) + 1) 
-    if(nodeMatrix == T) {
-      #n <- ifelse(length(changeNodes) == 1, as.numeric(changeNodes), length(changeNodes))
-      regimesNameMatrix = matrix(
-        data = NA, nrow = numberOfRegimes, ncol = length(changeNodes), dimnames = list(
-          as.character(seq(numberOfRegimes)), as.character(seq(length(changeNodes)))
-          )
-      )
-      for (i in seq(numberOfRegimes)) {
-        for (j in seq(length(changeNodes))) {
-          if (is.na(match(j,regime[[i]]))) regimesNameMatrix[i,j] = 0
-          else regimesNameMatrix[i,j] = 1 
-        }
-      }
-      outdata <- regimesNameMatrix
-      if(!identical(maxNodes, NULL)) {
-        outdata <- outdata[apply(outdata,1,sum) <= maxNodes, ]
-        dimnames(outdata)[[1]] = as.character(seq(dim(outdata)[1]))
-        }
-    }
-    else {
-      outdata <- regime 
-      if(!identical(maxNodes, NULL)) {
-        outdata <- outdata[sapply(outdata, length) <= maxNodes]
-        outdata[[length(outdata) + 1]] <- rep("0", length(changeNodes))
-        }
-      }
-  return(outdata) }
-
-regimeVectors <-
-# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
-# species vectors that describe the clades at the bases of which regimes are specified to change.
-# Arguments:
-#  "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
-#  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
-# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
-function(tree, cladeMembersList, maxNodes = NULL) {
-  ## ------------------ begin ouchtree block -----------------
-  ## check to see if tree inherits 'ouchtree'
-  if (!is(tree,'ouchtree')) 
-	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
-	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
-  ## get the vectors we need:
-  ancestor <- tree at ancestors # class = "character"
-  node <- tree at nodes # class = "character"
-  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
-  times <- tree at times # class = "numeric"
-  ## ------------------ end ouchtree block -------------------
-      
-  changeNodesList <- lapply(cladeMembersList, mrcaOUCH, tree = tree) #Returns a list of length-1 character vectors, each containing a single changeNode -- the fact that this is a list causes problems in paintBranches if not changed to a 1-d vector
-  changeNodesVector <- unlist(changeNodesList)
-  #changeNodesVector = vector("character", length(changeNodesList))
-  #for (i in 1:length(changeNodesList)) # Changing cladeMemberList to a 1-d vector
-  #  {changeNodesVector[i] = changeNodesList[[i]]}
-  allRegimes = allPossibleRegimes(changeNodesVector, maxNodes)
-  regimePaintings = vector("list", length(allRegimes))
-  for (i in 1:length(allRegimes)) {
-    allRegimes[[i]] = c("1", allRegimes[[i]])
-    regimePaintings[[i]] = as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
-    names(regimePaintings[[i]]) <- tree at nodes
-    message(paste('Created regime',i))}
-  return(regimePaintings) }

Copied: pkg/R/treeTraversal.R (from rev 26, pkg/R/revisedTreeTraversal.R)
===================================================================
--- pkg/R/treeTraversal.R	                        (rev 0)
+++ pkg/R/treeTraversal.R	2008-11-14 08:05:41 UTC (rev 28)
@@ -0,0 +1,231 @@
+# ---------------------------------------------------------------
+# FUNCTIONS FOR TRAVERSING AN S4 OUCH TREE AND PAINTING REGIMES #
+# ---------------------------------------------------------------
+
+# Modified from functions used in Hipp 2007 Evolution paper
+# Initially written for ouch v 1.2-4
+# 10 November 2008: changed everything to operate on an ouchtree object (ouch >= v2), otherwise functions the same;
+#  checked on ouch v2.4-2
+# functions included in this file:
+# 1. paintBranches
+# 2. mrcaOUCH
+# 3. ancestorLine
+# 4. allPossibleRegimes
+# 5. regimeVectors
+
+
+paintBranches <-
+# Paints branches with regimes changing at nodes specified
+# arguments
+#  "node" "ancestor" "times" = the standard tree specification vectors of the OUCH-style tree
+#  "regimeShiftNodes" = a vector of nodes at which selective regimes shift: root must be included, but tips are meaningless in this context
+#  "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes",
+#                   in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles"
+#                   but not at all in "regimeShiftNodes"... defaults to "node[x]regime
+# Value: a vector of regimes that can be plopped right into an OUCH-style tree data frame
+function(tree, regimeShiftNodes, regimeTitles) {
+  ## ------------------ begin ouchtree block -----------------
+  ## check to see if tree inherits 'ouchtree'
+  if (!is(tree,'ouchtree')) 
+	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  ## get the vectors we need:
+  ancestor <- tree at ancestors # class = "character"
+  node <- tree at nodes # class = "character"
+  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+  times <- tree at times # class = "numeric"
+  ## ------------------ end ouchtree block -------------------
+  
+  names(regimeTitles) = as.character(regimeShiftNodes)
+  colorsVector = character(length(node))
+  for (i in 1:length(ancestor)) {
+    # First three lines fill up the vector for nodes that are hit in order
+    if (is.na(ancestor[i])) {
+      colorsVector[i] = regimeTitles["1"]
+      next }
+    if (as.character(ancestor[i]) %in% as.character(regimeShiftNodes)) {
+      colorsVector[i] = regimeTitles[as.character(ancestor[i])]
+      next }
+    if (colorsVector[as.integer(ancestor[i])] != "") {
+      colorsVector[i] = colorsVector[as.integer(ancestor[i])]
+      next }
+    # These lines fill up the vector for nodes run reached before their immediate ancestor
+    nodeQ = integer(length(node))
+    ii = i
+    repeat {
+      nodeQ = c(ii, nodeQ)
+      ii = as.numeric(ancestor[ii])
+      if (as.character(ancestor[ii]) %in% as.character(regimeShiftNodes)) {
+        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+        break}
+      if (colorsVector[as.integer(ancestor[ii])] != "") {
+        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+        break} }
+
+    for(j in nodeQ) {
+      colorsVector[j] = colorsVector[as.integer(ancestor[j])] } 
+      
+      } # closes for(i in 1:length(ancestor)) loop 
+
+      # a little hack to fix a problem I don't understand... with the undesired side effect that it colors the stem of some subtrees rather than the crown as originally written
+      for(i in 1:length(colorsVector)) if(colorsVector[i] == "") colorsVector[i] <- as.character(i) 
+  return(colorsVector) }
+
+mrcaOUCH <-
+# Finds most recent common ancestor for a vector of tips by:
+#  1. Creating a vector of ancestral nodes leading to each tip
+#  2. Creating an intersection set of ancestral nodes for all taxa by intersecting taxa successively with the last intersection set
+#  3. Returning the node of the final intersection set that has the highest time
+# Arguments:
+#  "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
+#  "cladeVector" = vector of species for which you want to find the most recent common ancestor
+# Value: the node number (as an integer) of the most recent common ancestor
+# Works! 3-31-06
+function(cladeVector, tree) {
+  ## ------------------ begin ouchtree block -----------------
+  ## check to see if tree inherits 'ouchtree'
+  if (!is(tree,'ouchtree')) 
+	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  ## get the vectors we need:
+  ancestor <- tree at ancestors # class = "character"
+  node <- tree at nodes # class = "character"
+  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+  times <- tree at times # class = "numeric"
+  ## ------------------ end ouchtree block -------------------
+  
+  tips = match(cladeVector, species) 
+  listOfAncestorLines = lapply(tips, ancestorLine, tree = tree) # 10 nov 08: this is identical to the appropriate subset of tree at lineages
+  latestMatch = listOfAncestorLines[[1]]
+  for (i in listOfAncestorLines) {
+    latestMatch = i[match(latestMatch, i, nomatch = 0)] }
+  timesVector = times[as.integer(latestMatch)]
+  if(length(timesVector) == 1) {
+    if (is.na(timesVector)) mrca = "1"
+      else mrca = timesVector}
+    else mrca = latestMatch[match(max(as.double(timesVector), na.rm = TRUE), timesVector)]
+  return(mrca) }
+
+ancestorLine <-
+# Creates a vector of ancestral nodes for a tip
+# Arguments:
+#  CHANGED to "tree" 10 nov 08: "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree
+#  "tip" = the tip node to trace back
+# Value: a vector of nodes leading from a tip to the root
+# 10 nov 08: changed to just grab the appropriate element from tree at lineages
+function(tip, tree) {
+  ## ------------------ begin ouchtree block -----------------
+  ## check to see if tree inherits 'ouchtree'
+  if (!is(tree,'ouchtree')) 
+	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  ## get the vectors we need:
+  #ancestor <- tree at ancestors # class = "character"
+  #node <- tree at nodes # class = "character"
+  #species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+  #times <- tree at times # class = "numeric"
+  ## ------------------ end ouchtree block -------------------
+  tip <- as.numeric(tip)
+  #nodesVector = vector("character")
+  #counter = 0
+  #repeat {
+  #  if (is.na(tip)) break
+  #  counter = counter + 1
+  #  nodesVector[counter] = ancestor[tip]
+  #  tip = ancestor[tip] }
+  nodesVector <- c(as.character(tree at lineages[[tip]][2:length(tree at lineages[[tip]])]), NA)
+  return(nodesVector) 
+  }
+
+allPossibleRegimes <-
+# Generates a list of vectors of all possible 2^n regimes for a given list of n ancestor nodes, assuming that each node
+#  can either be a change node or not, and that all combinations are meaningful.
+# NOTE: This routine returns all possible permutations, ignoring the fact that the root must be included as a changeNode
+#  for paintBranches to work properly. Exclude the root when calling this routine.
+# Arguments:
+#  "changeNodes" = vector of all change nodes in all possible scenarios, or number of regimes assumed if nodeMatrix = T
+#  "maxNodes" = single number indicating the maximum number of nodes at which a regime can change
+#  "nodeMatrix" = indicates whether to return a binary table for interp or a list of changeNode vectors for analysis
+# Value:
+#    if nodeMatrix = F: a list of changeNode vectors (assumes type = "character"), one for each possible scenario
+#    if nodeMatrix = T: a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output; 
+#                       presumes nodes are labelled 1:n
+# 10 nov 08: this function now takes over regimeNodes
+function(changeNodes, maxNodes = NULL, nodeMatrix = F) {
+    if(!identical(maxNodes, NULL) && maxNodes > length(changeNodes)) warning(paste(sQuote('maxNodes'), 'cannot be larger than the number of nodes; maxNodes ignored'))
+    numberOfRegimes = ifelse(length(changeNodes) == 1, 2, 2^length(changeNodes))
+    regime = vector("list", numberOfRegimes)
+    for (i in 1:(numberOfRegimes - 1)) {
+      remainder = i
+      n = NULL
+      for (j in as.integer(log2(i)):0) {
+        if (2^j > remainder) n[j+1] = NA
+        else {
+          n[j+1] = changeNodes[j+1]
+          remainder = remainder %% 2^j 
+        }
+      }
+      regime[[i]] = sort(n[!is.na(n)]) 
+    }
+    regime[[numberOfRegimes]] = rep("0", times = as.integer(log2(i)) + 1) 
+    if(nodeMatrix == T) {
+      #n <- ifelse(length(changeNodes) == 1, as.numeric(changeNodes), length(changeNodes))
+      regimesNameMatrix = matrix(
+        data = NA, nrow = numberOfRegimes, ncol = length(changeNodes), dimnames = list(
+          as.character(seq(numberOfRegimes)), as.character(seq(length(changeNodes)))
+          )
+      )
+      for (i in seq(numberOfRegimes)) {
+        for (j in seq(length(changeNodes))) {
+          if (is.na(match(j,regime[[i]]))) regimesNameMatrix[i,j] = 0
+          else regimesNameMatrix[i,j] = 1 
+        }
+      }
+      outdata <- regimesNameMatrix
+      if(!identical(maxNodes, NULL)) {
+        outdata <- outdata[apply(outdata,1,sum) <= maxNodes, ]
+        dimnames(outdata)[[1]] = as.character(seq(dim(outdata)[1]))
+        }
+    }
+    else {
+      outdata <- regime 
+      if(!identical(maxNodes, NULL)) {
+        outdata <- outdata[sapply(outdata, length) <= maxNodes]
+        outdata[[length(outdata) + 1]] <- rep("0", length(changeNodes))
+        }
+      }
+  return(outdata) }
+
+regimeVectors <-
+# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
+# species vectors that describe the clades at the bases of which regimes are specified to change.
+# Arguments:
+#  "node" "ancestor" "times" "species" = the standard tree specification vectors of the OUCH-style tree
+#  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+function(tree, cladeMembersList, maxNodes = NULL) {
+  ## ------------------ begin ouchtree block -----------------
+  ## check to see if tree inherits 'ouchtree'
+  if (!is(tree,'ouchtree')) 
+	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  ## get the vectors we need:
+  ancestor <- tree at ancestors # class = "character"
+  node <- tree at nodes # class = "character"
+  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+  times <- tree at times # class = "numeric"
+  ## ------------------ end ouchtree block -------------------
+      
+  changeNodesList <- lapply(cladeMembersList, mrcaOUCH, tree = tree) #Returns a list of length-1 character vectors, each containing a single changeNode -- the fact that this is a list causes problems in paintBranches if not changed to a 1-d vector
+  changeNodesVector <- unlist(changeNodesList)
+  #changeNodesVector = vector("character", length(changeNodesList))
+  #for (i in 1:length(changeNodesList)) # Changing cladeMemberList to a 1-d vector
+  #  {changeNodesVector[i] = changeNodesList[[i]]}
+  allRegimes = allPossibleRegimes(changeNodesVector, maxNodes)
+  regimePaintings = vector("list", length(allRegimes))
+  for (i in 1:length(allRegimes)) {
+    allRegimes[[i]] = c("1", allRegimes[[i]])
+    regimePaintings[[i]] = as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
+    names(regimePaintings[[i]]) <- tree at nodes
+    message(paste('Created regime',i))}
+  return(regimePaintings) }



More information about the Mattice-commits mailing list