[Mattice-commits] r43 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Nov 20 07:15:50 CET 2008


Author: andrew_hipp
Date: 2008-11-20 07:15:50 +0100 (Thu, 20 Nov 2008)
New Revision: 43

Modified:
   pkg/R/batchHansen.R
   pkg/R/regimes.R
Log:
batchHansen and all regime-painting functions now work over multiple trees

Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R	2008-11-19 22:51:40 UTC (rev 42)
+++ pkg/R/batchHansen.R	2008-11-20 06:15:50 UTC (rev 43)
@@ -1,17 +1,11 @@
 # ---------------------------------------------------------------------
 # FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
 # ---------------------------------------------------------------------
-
 ## Changes needed:
 ## 2. measurement error portions need to be fixed
-## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
-##    node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
-##    and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
 ## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
 ##    based on those nodes, using locator() or something like it.
 
-## to do: make change to deal with phylogenetic uncertainty, by revising how regimeVectors works. Call a new function regimeMatrix once at the outset of the analysis to create a matrix of change nodes (nodes present or absent for each regime), then once for each tree pass that matrix along with the taxa defining the node into a revisedRegime vectors to (1) check which of the nodes are present in the tree and create a new row in a matrix of nodes (columns) by trees (rows), where 1 indicates the node is present in the tree; and (2) make regime vectors for regimes whose nodes are all present in the tree.
-
 runBatchHansen <-
 # 11 nov 08: renamed to runBatchHansen
 # Runs batchHansenFit and brown over a list of ouchTrees
@@ -23,7 +17,6 @@
 #  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
 #  "brown" = whether to analyse the data under a Brownian motion model
 #  "..." = additional arguments to pass along to hansen
-
 function(ouchTrees, characterStates, cladeMembersList, nodeNames = NULL, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
   ## do all the objects in ouchTrees inherit ouchtree?
   if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
@@ -55,25 +48,18 @@
     if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
     }
 
-  nnodes <- length(cladeMembersList)
-  regMatrix <- regimeMatrix(nodeNames = ifelse(identical(nodeNames, NULL), seq(nnodes), nodeNames), digits = nnodes) # only make regMatrix once
-  regimeLists <- regimeMaker(ouchTrees = ouchTrees, regMatrix = regMatrix, nodeMembers = cladeMembersList) # new function... get a list of lists
+  ar = regimeVectors(ouchTrees, cladeMembersList, maxNodes)
   hansenBatch <- list(length(ouchTrees))
-  # regimeMatrices <- list(length(ouchTrees))
   for (i in 1:length(ouchTrees)) {
     tree <- ouchTrees[[i]]
-    rl = regimeVectors(tree, cladeMembersList, maxNodes)
     if(identical(regimeTitles, NULL)) {
-      regimeTitles <- as.character(1:length(rl$regimeList))
+      regimeTitles <- as.character(1:length(ar$regList[[i]]))
       if(brown) regimeTitles <- c(regimeTitles, 'brown')
       }
     
     ## rescale tree if requested
     if(rescale>0) tree at times <- rescale * tree at times / max(tree at times) 
     
-    ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
-    ## for now, assume nodes of interest are present in all trees
-
     ## make sure data fits the tree
     dataIn <- NULL
     if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
@@ -85,15 +71,13 @@
     ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large, 
     ##   so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
     ##   there could also be an option to save the entire object as a series of files in addition to hanging onto 
-    hansenBatch[[i]] <- batchHansen(tree, dataIn, rl$regimeList, regimeTitles, brown, ...)
-    regimeLists[[i]] <- rl$regimeList
-    regimeMatrices[[i]] <- rl$regimeMatrix
-    message(paste("Tree",i,"of",length(ouchTrees),"complete"))
+    hansenBatch[[i]] <- batchHansen(tree, dataIn, ar$regList[[i]], regimeTitles, brown, ...)
+    message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
   }
     
     ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
     ## the below returns presuppose a single tree
-  outdata <- list(hansens = hansenBatch, regimeLists = regimeLists, regimeMatrices = regimeMatrices, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
+  outdata <- list(hansens = hansenBatch, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
   class(outdata) <- 'hansenBatch'
   return(outdata)}
 

Modified: pkg/R/regimes.R
===================================================================
--- pkg/R/regimes.R	2008-11-19 22:51:40 UTC (rev 42)
+++ pkg/R/regimes.R	2008-11-20 06:15:50 UTC (rev 43)
@@ -7,13 +7,16 @@
 # updated to accommodate multiple trees Nov 2008
 
 regimeVectors <-
+# This is the basic call to get the full range of regimes over a set of trees
 # Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
 # species vectors that describe the clades at the bases of which regimes are specified to change.
 # Arguments:
 #  "tree" = the standard tree specification vectors of the OUCH-style tree
 #  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
 #  "maxNodes" = maximum number of nodes at which regime is permitted to change
-# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+# Value: 
+#  "regList" = list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+#  "nodeMatrix" = matrix of trees (rows) by nodes (columns) indicating what nodes are present on which trees
 # 19 nov 08: changing to accept a list of trees and trimmed down greatly
 function(ouchTrees, cladeMembersList, maxNodes = NULL) {
   ## ------------------ begin ouchtree block -----------------
@@ -30,8 +33,8 @@
   
   nnode <- length(cladeMembersList)
   regMatrix <- regimeMatrix(n = nnode, maxNodes = maxNodes)
-  apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList) ## HOLD IT! NOW REGIME MAKER WORKS ON ALL TREES AT ONCE... RETHINK THIS
-  outdata <- list(regList = apr$regList, nodeMatrix = apr$nodeMatrix)
+  apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList)
+  outdata <- list(regList = apr$regList, regMatrix = apr$regMatrices, nodeMatrix = apr$nodeMatrix)
   return(outdata) }
 
 paintBranches <-
@@ -106,21 +109,30 @@
   nodeMatrix <- matrix(NA, nrow = numTrees, ncol = numNodes, dimnames = list(seq(numTrees), dimnames(regMatrix)[[2]]))
   changeNodes <- list(numTrees)
   regList <- list(numTrees)
+  regMatrices <- list(numTrees)
   
   # fill outdata
   for(i in seq(numNodes)) nodeMatrix[, i] <- unlist(lapply(ouchTrees, isMonophyletic, taxa = nodeMembers[[i]]))
   for(i in seq(numTrees)) {
     tree <- ouchTrees[[i]]
-    treeRegMatrix <- regMatrix * matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = T) # multiplies regMatrix by nodes present
-    treeRegMatrix <- treeRegMatrix[which(apply(treeRegMatrix, 1, sum) > 0), ] # subset for regimes that still have nodes
-    treeRegMatrix <- rbind(treeRegMatrix, treeRegMatrix[1,] * 0) # add one all-zero row for the OU1 model
-    numTreeRegs <- dim(treeRegMatrix)[1]
+    regMatrices[[i]] <- regMatrix * as.numeric(matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = T)) # multiplies regMatrix by nodes present
+      if(class(regMatrices[[i]]) != "matrix") regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+    regMatrices[[i]] <- regMatrices[[i]][which(apply(regMatrices[[i]], 1, sum) > 0), ] # subset for regimes that still have nodes
+      if(class(regMatrices[[i]]) != "matrix") regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+    regMatrices[[i]] <- regMatrices[[i]][!duplicated(apply(regMatrices[[i]], 1, as.decimal)), ] ## gets rid of non-unique regimes
+    if(class(regMatrices[[i]]) == "matrix") dimnames(regMatrices[[i]]) <- list(seq(dim(regMatrices[[i]])[1]), dimnames(regMatrices[[i]])[[2]])
+      else regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+    regMatrices[[i]] <- rbind(regMatrices[[i]], regMatrices[[i]][1,] * 0) # add one all-zero row for the OU1 model
+    numTreeRegs <- dim(regMatrices[[i]])[1]
     treeRegs <- list(numTreeRegs) # this will be assigned to regList[[i]]
     nodesVector <- unlist(lapply(nodeMembers, mrcaOUCH, tree = ouchTrees[[i]])) # as written, gets the MRCA for even invalid nodes just so indexing stays right
-    for(j in seq(numTreeRegs)) treeRegs[[j]] <- paintBranches(c("1", nodesVector[as.logical(treeRegMatrix[j, ])]), tree)
+    for(j in seq(numTreeRegs)) {
+      treeRegs[[j]] <- as.factor(paintBranches(c("1", nodesVector[as.logical(regMatrices[[i]][j, ])]), tree))
+      names(treeRegs[[j]]) <- tree at nodes
+    }
     regList[[i]] <- treeRegs
   }
-  outdata <- list(regList = regList, nodeMatrix = nodeMatrix)
+  outdata <- list(regList = regList, nodeMatrix = nodeMatrix, regMatrices = regMatrices)
   return(outdata)
 }
 
@@ -144,7 +156,6 @@
 # submitted to R listserv Thu Apr 15 12:27:39 CEST 2004
 # AH added 'digits' to make it work with regimeMatrix
 # https://stat.ethz.ch/pipermail/r-help/2004-April/049419.html
-
 {
    out <- NULL
    while(n > 0) {
@@ -158,4 +169,12 @@
    if(!identical(digits, NULL) && !r) out <- c(rep(0, digits-length(out)), out)
    if(!identical(digits, NULL) && r) out <- c(out, rep(0, digits-length(out)))
    return(out)
+}
+
+as.decimal <- function(n) {
+# takes a binary vector and makes it a decimal
+  digits <- length(n)
+  result <- 0
+  for(i in digits:1) result <- result + n[i] * 2 ^ (digits - i)
+  result
 }
\ No newline at end of file



More information about the Mattice-commits mailing list