[Mattice-commits] r43 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Nov 20 07:15:50 CET 2008
Author: andrew_hipp
Date: 2008-11-20 07:15:50 +0100 (Thu, 20 Nov 2008)
New Revision: 43
Modified:
pkg/R/batchHansen.R
pkg/R/regimes.R
Log:
batchHansen and all regime-painting functions now work over multiple trees
Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R 2008-11-19 22:51:40 UTC (rev 42)
+++ pkg/R/batchHansen.R 2008-11-20 06:15:50 UTC (rev 43)
@@ -1,17 +1,11 @@
# ---------------------------------------------------------------------
# FUNCTIONS FOR PERFORMING A SERIES OF OU ANALYSES ON A BATCH OF TREES
# ---------------------------------------------------------------------
-
## Changes needed:
## 2. measurement error portions need to be fixed
-## 3. Analysis should be conducted over multiple trees, summarizing only over trees for which a given node is present;
-## node presence should be checked on each tree by looking to see whether the defining group is monophyletic,
-## and probably a matrix created for each multiple-tree analysis that makes summarizing quicker.
## 5. In a better world, allow graphical selection of subtrees to test on a single tree, then extract defining taxa
## based on those nodes, using locator() or something like it.
-## to do: make change to deal with phylogenetic uncertainty, by revising how regimeVectors works. Call a new function regimeMatrix once at the outset of the analysis to create a matrix of change nodes (nodes present or absent for each regime), then once for each tree pass that matrix along with the taxa defining the node into a revisedRegime vectors to (1) check which of the nodes are present in the tree and create a new row in a matrix of nodes (columns) by trees (rows), where 1 indicates the node is present in the tree; and (2) make regime vectors for regimes whose nodes are all present in the tree.
-
runBatchHansen <-
# 11 nov 08: renamed to runBatchHansen
# Runs batchHansenFit and brown over a list of ouchTrees
@@ -23,7 +17,6 @@
# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
# "brown" = whether to analyse the data under a Brownian motion model
# "..." = additional arguments to pass along to hansen
-
function(ouchTrees, characterStates, cladeMembersList, nodeNames = NULL, maxNodes = NULL, regimeTitles = NULL, brown = F, rescale = 1, ...) {
## do all the objects in ouchTrees inherit ouchtree?
if(is(ouchTrees,'ouchtree')) ouchTrees <- list(ouchTrees)
@@ -55,25 +48,18 @@
if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
}
- nnodes <- length(cladeMembersList)
- regMatrix <- regimeMatrix(nodeNames = ifelse(identical(nodeNames, NULL), seq(nnodes), nodeNames), digits = nnodes) # only make regMatrix once
- regimeLists <- regimeMaker(ouchTrees = ouchTrees, regMatrix = regMatrix, nodeMembers = cladeMembersList) # new function... get a list of lists
+ ar = regimeVectors(ouchTrees, cladeMembersList, maxNodes)
hansenBatch <- list(length(ouchTrees))
- # regimeMatrices <- list(length(ouchTrees))
for (i in 1:length(ouchTrees)) {
tree <- ouchTrees[[i]]
- rl = regimeVectors(tree, cladeMembersList, maxNodes)
if(identical(regimeTitles, NULL)) {
- regimeTitles <- as.character(1:length(rl$regimeList))
+ regimeTitles <- as.character(1:length(ar$regList[[i]]))
if(brown) regimeTitles <- c(regimeTitles, 'brown')
}
## rescale tree if requested
if(rescale>0) tree at times <- rescale * tree at times / max(tree at times)
- ## need to revise regimeVectors so that it only returns regimes for nodes that are supported in the tree
- ## for now, assume nodes of interest are present in all trees
-
## make sure data fits the tree
dataIn <- NULL
if(dataFlag == 'sameOrderTerminals') dataIn <- c(rep(NA, tree at nnodes - tree at nterm), characterStates)
@@ -85,15 +71,13 @@
## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large,
## so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
## there could also be an option to save the entire object as a series of files in addition to hanging onto
- hansenBatch[[i]] <- batchHansen(tree, dataIn, rl$regimeList, regimeTitles, brown, ...)
- regimeLists[[i]] <- rl$regimeList
- regimeMatrices[[i]] <- rl$regimeMatrix
- message(paste("Tree",i,"of",length(ouchTrees),"complete"))
+ hansenBatch[[i]] <- batchHansen(tree, dataIn, ar$regList[[i]], regimeTitles, brown, ...)
+ message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
}
## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
## the below returns presuppose a single tree
- outdata <- list(hansens = hansenBatch, regimeLists = regimeLists, regimeMatrices = regimeMatrices, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
+ outdata <- list(hansens = hansenBatch, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
class(outdata) <- 'hansenBatch'
return(outdata)}
Modified: pkg/R/regimes.R
===================================================================
--- pkg/R/regimes.R 2008-11-19 22:51:40 UTC (rev 42)
+++ pkg/R/regimes.R 2008-11-20 06:15:50 UTC (rev 43)
@@ -7,13 +7,16 @@
# updated to accommodate multiple trees Nov 2008
regimeVectors <-
+# This is the basic call to get the full range of regimes over a set of trees
# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
# species vectors that describe the clades at the bases of which regimes are specified to change.
# Arguments:
# "tree" = the standard tree specification vectors of the OUCH-style tree
# "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
# "maxNodes" = maximum number of nodes at which regime is permitted to change
-# Value: list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+# Value:
+# "regList" = list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+# "nodeMatrix" = matrix of trees (rows) by nodes (columns) indicating what nodes are present on which trees
# 19 nov 08: changing to accept a list of trees and trimmed down greatly
function(ouchTrees, cladeMembersList, maxNodes = NULL) {
## ------------------ begin ouchtree block -----------------
@@ -30,8 +33,8 @@
nnode <- length(cladeMembersList)
regMatrix <- regimeMatrix(n = nnode, maxNodes = maxNodes)
- apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList) ## HOLD IT! NOW REGIME MAKER WORKS ON ALL TREES AT ONCE... RETHINK THIS
- outdata <- list(regList = apr$regList, nodeMatrix = apr$nodeMatrix)
+ apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList)
+ outdata <- list(regList = apr$regList, regMatrix = apr$regMatrices, nodeMatrix = apr$nodeMatrix)
return(outdata) }
paintBranches <-
@@ -106,21 +109,30 @@
nodeMatrix <- matrix(NA, nrow = numTrees, ncol = numNodes, dimnames = list(seq(numTrees), dimnames(regMatrix)[[2]]))
changeNodes <- list(numTrees)
regList <- list(numTrees)
+ regMatrices <- list(numTrees)
# fill outdata
for(i in seq(numNodes)) nodeMatrix[, i] <- unlist(lapply(ouchTrees, isMonophyletic, taxa = nodeMembers[[i]]))
for(i in seq(numTrees)) {
tree <- ouchTrees[[i]]
- treeRegMatrix <- regMatrix * matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = T) # multiplies regMatrix by nodes present
- treeRegMatrix <- treeRegMatrix[which(apply(treeRegMatrix, 1, sum) > 0), ] # subset for regimes that still have nodes
- treeRegMatrix <- rbind(treeRegMatrix, treeRegMatrix[1,] * 0) # add one all-zero row for the OU1 model
- numTreeRegs <- dim(treeRegMatrix)[1]
+ regMatrices[[i]] <- regMatrix * as.numeric(matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = T)) # multiplies regMatrix by nodes present
+ if(class(regMatrices[[i]]) != "matrix") regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+ regMatrices[[i]] <- regMatrices[[i]][which(apply(regMatrices[[i]], 1, sum) > 0), ] # subset for regimes that still have nodes
+ if(class(regMatrices[[i]]) != "matrix") regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+ regMatrices[[i]] <- regMatrices[[i]][!duplicated(apply(regMatrices[[i]], 1, as.decimal)), ] ## gets rid of non-unique regimes
+ if(class(regMatrices[[i]]) == "matrix") dimnames(regMatrices[[i]]) <- list(seq(dim(regMatrices[[i]])[1]), dimnames(regMatrices[[i]])[[2]])
+ else regMatrices[[i]] <- matrix(regMatrices[[i]], nrow = 1)
+ regMatrices[[i]] <- rbind(regMatrices[[i]], regMatrices[[i]][1,] * 0) # add one all-zero row for the OU1 model
+ numTreeRegs <- dim(regMatrices[[i]])[1]
treeRegs <- list(numTreeRegs) # this will be assigned to regList[[i]]
nodesVector <- unlist(lapply(nodeMembers, mrcaOUCH, tree = ouchTrees[[i]])) # as written, gets the MRCA for even invalid nodes just so indexing stays right
- for(j in seq(numTreeRegs)) treeRegs[[j]] <- paintBranches(c("1", nodesVector[as.logical(treeRegMatrix[j, ])]), tree)
+ for(j in seq(numTreeRegs)) {
+ treeRegs[[j]] <- as.factor(paintBranches(c("1", nodesVector[as.logical(regMatrices[[i]][j, ])]), tree))
+ names(treeRegs[[j]]) <- tree at nodes
+ }
regList[[i]] <- treeRegs
}
- outdata <- list(regList = regList, nodeMatrix = nodeMatrix)
+ outdata <- list(regList = regList, nodeMatrix = nodeMatrix, regMatrices = regMatrices)
return(outdata)
}
@@ -144,7 +156,6 @@
# submitted to R listserv Thu Apr 15 12:27:39 CEST 2004
# AH added 'digits' to make it work with regimeMatrix
# https://stat.ethz.ch/pipermail/r-help/2004-April/049419.html
-
{
out <- NULL
while(n > 0) {
@@ -158,4 +169,12 @@
if(!identical(digits, NULL) && !r) out <- c(rep(0, digits-length(out)), out)
if(!identical(digits, NULL) && r) out <- c(out, rep(0, digits-length(out)))
return(out)
+}
+
+as.decimal <- function(n) {
+# takes a binary vector and makes it a decimal
+ digits <- length(n)
+ result <- 0
+ for(i in digits:1) result <- result + n[i] * 2 ^ (digits - i)
+ result
}
\ No newline at end of file
More information about the Mattice-commits
mailing list