[Mattice-commits] r241 - in pkg: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Mar 11 23:09:11 CET 2011


Author: andrew_hipp
Date: 2011-03-11 23:09:10 +0100 (Fri, 11 Mar 2011)
New Revision: 241

Added:
   pkg/R/OLDregimes.R
Modified:
   pkg/R/regimes.R
   pkg/R/summarizingAnalyses.R
   pkg/man/regimeMaker.Rd
Log:
trying to figure out why the ic weights are not being averaged correctly... tallying trees wrong somewhere? Also changed documentation: regimeMaker does not return an NA where a node is missing, but a 0. NAs are used for rows that are all zeros or duplicates. 

Added: pkg/R/OLDregimes.R
===================================================================
--- pkg/R/OLDregimes.R	                        (rev 0)
+++ pkg/R/OLDregimes.R	2011-03-11 22:09:10 UTC (rev 241)
@@ -0,0 +1,167 @@
+# ---------------------------------------------------
+# FUNCTIONS FOR PAINTING REGIMES ON AN S4 OUCH TREE #
+# ---------------------------------------------------
+# Adapted from functions used in Hipp 2007 Evolution paper
+# Initially written for ouch v 1.2-4
+# updated to ouch >= 2.4-2 Nov 2008
+# updated to accommodate multiple trees Nov 2008
+
+regimeVectors <-
+# This is the basic call to get the full range of regimes over a set of trees
+# Generates the list of painted branches representing all possible selective regimes for OU analyses, taking as argument
+# species vectors that describe the clades at the bases of which regimes are specified to change.
+# Arguments:
+#  "tree" = the standard tree specification vectors of the OUCH-style tree
+#  "cladeMembersList" = list of vectors containing names of the members of each clade (except for the root of the tree)
+#  "maxNodes" = maximum number of nodes at which regime is permitted to change
+# Value: 
+#  "regList" = list of vectors that can each be plugged directly into OU analysis as the "regimes" argument
+#  "nodeMatrix" = matrix of trees (rows) by nodes (columns) indicating what nodes are present on which trees
+# 19 nov 08: changing to accept a list of trees and trimmed down greatly
+function(ouchTrees, cladeMembersList, maxNodes = NULL) {
+  nnode <- length(cladeMembersList)
+  regMatrix <- regimeMatrix(n = nnode, maxNodes = maxNodes)
+  apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList)
+  outdata <- list(regList = apr$regList, regMatrix = apr$regMatrix, nodeMatrix = apr$nodeMatrix)
+  return(outdata) 
+}
+
+paintBranches <-
+# Paints branches with regimes changing at nodes specified
+# arguments
+#  "tree" = OUCH-style (S4) tree
+#  "regimeShiftNodes" = a vector of nodes or a list of taxa defining nodes at which selective regimes shift: 
+#                       root may be included--if not present, will be added--but tips are meaningless in this context
+#  "regimeTitles" = a vector of titles for the regimes that begin at the root and at the nodes indicated in "regimeShiftNodes",
+#                   in order of description in "regimeShiftNodes", except that the root is listed first in "regimeTitles"
+#                   but not at all in "regimeShiftNodes"... defaults to "node[x]regime
+# Value: a vector of regimes that can be handed to hansen
+# this is an especially ugly function, and I'm sure there are prettier ways of doing it. The 'paint' function in
+# ouch could be adapted to this purpose, if one makes a series of consecutive calls going down the tree,
+# and down the road it would probably make sense to turn this into just such a function.
+
+function(regimeShiftNodes, tree, regimeTitles = NULL) {
+  ## ------------------ begin ouchtree block -----------------
+  ## check to see if tree inherits 'ouchtree'
+  if (!is(tree,'ouchtree')) 
+	stop(paste('This function has been rewritten to use the new S4 ', sQuote('ouchtree'), ' class.',
+	'\nYou can generate a tree of this class by calling ', sQuote('ouchtree()'), '.', sep = ""))
+  ## get the vectors we need:
+  ancestor <- tree at ancestors # class = "character"
+  node <- tree at nodes # class = "character"
+  species <- tree at nodelabels # class = "character" -- note that nodelabels is more general than this indicates and the name should be changed throughout at some point
+  times <- tree at times # class = "numeric"
+  ## ------------------ end ouchtree block -------------------
+  
+  if(class(regimeShiftNodes) == "list") regimeShiftNodes <- unlist(lapply(regimeShiftNodes, mrcaOUCH, tree = tree))
+  regimeShiftNodes <- unique(c(as.character(tree at root), regimeShiftNodes))
+  if(identical(regimeTitles, NULL)) regimeTitles <- as.character(regimeShiftNodes)
+  names(regimeTitles) = as.character(regimeShiftNodes)
+  colorsVector = character(length(node))
+  for (i in 1:length(ancestor)) {
+    # First three lines fill up the vector for nodes that are hit in order
+    if (is.na(ancestor[i])) {
+      colorsVector[i] = regimeTitles["1"]
+      next }
+    if (as.character(ancestor[i]) %in% as.character(regimeShiftNodes)) {
+      colorsVector[i] = regimeTitles[as.character(ancestor[i])]
+      next }
+    if (colorsVector[as.integer(ancestor[i])] != "") {
+      colorsVector[i] = colorsVector[as.integer(ancestor[i])]
+      next }
+    # These lines fill up the vector for nodes run reached before their immediate ancestor
+    nodeQ = integer(length(node))
+    ii = i
+    repeat {
+      nodeQ = c(ii, nodeQ)
+      ii = as.numeric(ancestor[ii])
+      if (as.character(ancestor[ii]) %in% as.character(regimeShiftNodes)) {
+        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+        break}
+      if (colorsVector[as.integer(ancestor[ii])] != "") {
+        colorsVector[ii] = colorsVector[as.integer(ancestor[ii])]
+        break} }
+    
+    for(j in nodeQ) {
+      colorsVector[j] = colorsVector[as.integer(ancestor[j])] } 
+      
+      } # closes for(i in 1:length(ancestor)) loop 
+
+      # a hack to fix a problem I don't understand... with the undesired side effect that it colors the stem of some subtrees rather than the crown as originally written
+      for(i in 1:length(colorsVector)) if(colorsVector[i] == "") colorsVector[i] <- as.character(i) 
+      
+      # colors terminal branches if any terminal branches are in the regimeShiftNodes
+      for(i in regimeShiftNodes) if(i %in% tree at term) colorsVector[as.numeric(i)] <- as.character(i)
+      colorsVector <- as.factor(colorsVector)
+      names(colorsVector) <- tree at nodes
+  return(colorsVector) }
+
+regimeMaker <- function(ouchTrees, regMatrix, nodeMembers) {
+## supplants the old 'allPossibleRegimes'
+## takes a list of ouchtree objects, a regimeMatrix ouput, and a list of nodeMembers (the taxa definining each node of interest)
+## Value:
+##  regList = a list of nodes defining the change points for each tree (i.e., a list of lists)
+##  nodeMatrix = a matrix of trees (rows) by nodes (columns) indicating whether the node is present in each tree
+  
+  # set up variables
+  numTrees <- length(ouchTrees)
+  numNodes <- length(nodeMembers)
+  if(numNodes != dim(regMatrix)[2]) stop('Number of nodes (columns) in regMatrix must equal number of items in nodeMembers list')
+  nodeMatrix <- matrix(NA, nrow = numTrees, ncol = numNodes, dimnames = list(seq(numTrees), dimnames(regMatrix)[[2]]))
+  changeNodes <- list(numTrees)
+  regList <- list(numTrees)
+  regMatrices <- list(numTrees)
+  
+  # fill outdata
+  for(i in seq(numNodes)) nodeMatrix[, i] <- unlist(lapply(ouchTrees, isMonophyletic, taxa = nodeMembers[[i]]))
+  for(i in seq(numTrees)) {
+    tree <- ouchTrees[[i]]
+    regMatrices[[i]] <- regMatrix * as.numeric(matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = TRUE)) # multiplies regMatrix by nodes present
+    regMatrices[[i]][1:(dim(regMatrices[[i]])[1] - 1), ][which(apply(regMatrices[[i]][1:(dim(regMatrices[[i]])[1] - 1), ], 1, sum) == 0), ] <- rep(NA, numNodes) # set to NA regimes that have no nodes, except for OU1 model
+    regMatrices[[i]][duplicated(apply(regMatrices[[i]], 1, as.decimal)), ] <- rep(NA, numNodes) ## set to NA non-unique regimes
+    dimnames(regMatrices[[i]]) <- list(seq(dim(regMatrices[[i]])[1]), dimnames(regMatrices[[i]])[[2]])
+    numTreeRegs <- dim(regMatrices[[i]])[1]
+    treeRegs <- list(numTreeRegs) # this will be assigned to regList[[i]]
+    nodesVector <- unlist(lapply(nodeMembers, mrcaOUCH, tree = ouchTrees[[i]])) # as written, gets the MRCA for even invalid nodes just so indexing stays right
+    for(j in seq(numTreeRegs)) {
+      if(any(is.na(regMatrices[[i]][j, ]))) treeRegs[[j]] <- NA
+      else {
+        treeRegs[[j]] <- as.factor(paintBranches(c("1", nodesVector[as.logical(regMatrices[[i]][j, ])]), tree))
+        names(treeRegs[[j]]) <- tree at nodes
+      }
+    }
+    regList[[i]] <- treeRegs
+  }
+  regMatrices$overall <- regMatrix # this is the matrix that includes all regimes without regard to any tree
+  outdata <- list(regList = regList, nodeMatrix = nodeMatrix, regMatrix = regMatrices)
+  return(outdata)
+}
+
+regimeMatrix <- function(n, maxNodes) {
+## recursive function that returns the same thing as oldRegimeMatrix, but much more efficient, at least for small maxNodes
+## actually, it appears to be more efficient even at n = maxNodes
+  if(n == 1) return(matrix(1:0, nrow = 2, ncol = 1))
+  outmat <- matrix(NA, nrow = 0, ncol = n)
+  for (i in 1:(n-1)) {
+    temp <- c(rep(0, (i-1)), 1)
+    remainder <- n - i
+    if (maxNodes > 1 && remainder > 0) {
+      nextMat <- regimeMatrix(remainder, maxNodes - 1)
+      temp <- cbind(matrix(temp, dim(nextMat)[1], length(temp), byrow = TRUE), nextMat)
+      }
+    else temp[(i+1):n] <- rep(0, length((i+1):n))
+    outmat <- rbind(outmat, temp)
+  }
+  outmat <- rbind(outmat, c(rep(0, n-1), 1))
+  outmat <- rbind(outmat, rep(0,n))
+  dimnames(outmat) = list(seq(dim(outmat)[1]), seq(dim(outmat)[2]))
+  return(outmat)
+}
+
+as.decimal <- function(n) {
+# takes a binary vector and makes it a decimal
+  digits <- length(n)
+  result <- 0
+  for(i in digits:1) result <- result + n[i] * 2 ^ (digits - i)
+  result
+}

Modified: pkg/R/regimes.R
===================================================================
--- pkg/R/regimes.R	2011-03-11 16:29:30 UTC (rev 240)
+++ pkg/R/regimes.R	2011-03-11 22:09:10 UTC (rev 241)
@@ -113,7 +113,7 @@
   regMatrices <- list(numTrees)
   
   # fill outdata
-  for(i in seq(numNodes)) nodeMatrix[, i] <- unlist(lapply(ouchTrees, isMonophyletic, taxa = nodeMembers[[i]]))
+  for(i in seq(numNodes)) nodeMatrix[, i] <- unlist(lapply(ouchTrees, isMonophyletic, taxa = nodeMembers[[i]])) # each column of nodeMatrix indicates whether a node is present or absent for that clade; works
   for(i in seq(numTrees)) {
     tree <- ouchTrees[[i]]
     regMatrices[[i]] <- regMatrix * as.numeric(matrix(nodeMatrix[i, ], dim(regMatrix)[1], dim(regMatrix)[2], byrow = TRUE)) # multiplies regMatrix by nodes present

Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R	2011-03-11 16:29:30 UTC (rev 240)
+++ pkg/R/summarizingAnalyses.R	2011-03-11 22:09:10 UTC (rev 241)
@@ -5,7 +5,7 @@
 # March 2011 - summary.hansenBatch found to give incorrect answers with multiple trees; corrected
 # also changed the weighting so that model averaging can be by AICc, AIC, or BIC
 
-summary.hansenBatch <- function(object, ic = 'AICc', ...){
+summary.hansenBatch <- function(object, ic = 'aicc', ...){
 ## items in output: hansens, regimeList, regimeMatrix
 ## ic = choice of information criterion weight to use in model averaging
   hansenBatch <- object
@@ -16,10 +16,13 @@
   nnodes <- length(nodeSums) # number of nodes being studied
   nodes <- dimnames(hansenBatch$regMatrix$overall)[[2]] # grab the overall regMatrix, which includes all possible nodes
   sigmaSqVector <- numeric(ntrees) # vector to capture model-averaged sigma^2 for each tree
-  sqrt.alphaVector <- numeric(ntrees) # vector to capture model-averaged sqrt.alpha for each tree
+  alphaVector <- numeric(ntrees) # vector to capture model-averaged sqrt.alpha for each tree # s/b sqrt.alpha
   modelsMatrix <- vector('list', ntrees) # list of matrices, indexed by tree, holding the weight for each model
-  matrixRows <- c('AIC.weight', 'AICc.weight', 'BIC.weight') # rows in the matrix
+  matrixRows <- c('AICwi', 'AICcwi', 'BICwi') # rows in the matrix
   nodeWeightsSummed <- matrix(0, nrow = length(matrixRows), ncol = nnodes, dimnames = list(matrixRows, nodes)) # holds node weights summed; zero-filled b/c it is a sum?
+  icMats <- vector('list', length(matrixRows))
+  names(icMats) <- matrixRows
+  for(i in matrixRows) icMats[[i]] <- matrix(NA, nrow = ntrees, ncol = nnodes, dimnames = list(NULL, nodes))
   thetaMatrix <- matrix(NA, nrow = ntrees, 
                             ncol = dim(hansenBatch$thetas[[1]])[2], 
                             dimnames = list(1:ntrees, dimnames(hansenBatch$thetas[[1]])[[2]])
@@ -28,20 +31,25 @@
     modelsMatrix[[tree]] <- cbind(icObject[[tree]]$AICwi, icObject[[tree]]$AICcwi, icObject[[tree]]$BICwi)
     dimnames(modelsMatrix[[tree]]) <- list( dimnames(hansenBatch$hansens[[1]])[[1]], c("AICwi", "AICcwi", "BICwi"))
     icWeight <- switch(ic,
-	              bic = icObject[[tree]]$BICwi,
-				  aic = icObject[[tree]]$AICwi,
-				  aicc = icObject[[tree]]$AICcwi)
-    for(i in seq(nnodes)) {
-      modelsMatrixSubset <- modelsMatrix[[tree]][hansenBatch$regMatrix$overall[, nodes[i]] == 1, ] # subset models that contain node i
-      if(identical(dim(modelsMatrixSubset), NULL)) # is modelsMatrixSubset a 1-d vector? if so then:
-        nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + replace.matrix(modelsMatrixSubset, NA, 0) # because extracting a single row yields a vector, and dim returns NULL for a vector
-      else nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + colSums(modelsMatrixSubset, na.rm = TRUE)
-	  }
+    			bic = icObject[[tree]]$BICwi,
+			aic = icObject[[tree]]$AICwi,
+			aicc = icObject[[tree]]$AICcwi
+			)
+    for(i in matrixRows) icMats[[i]][tree, ] <-  colSums(replace(matrix(modelsMatrix[[tree]][, i], nmodels, nnodes), NA, 1) * hansenBatch$regMatrix$overall, na.rm = T) # somewhat vectorized
+    nodeWeightsSummed[i, ] <- icMats[[i]][tree, ] + nodeWeightsSummed[i, ] 
+    #OLD:
+    #for(i in seq(nnodes)) {
+    #  modelsMatrixSubset <- modelsMatrix[[tree]][hansenBatch$regMatrix$overall[, nodes[i]] == 1, ] # subset models that contain node i
+    #  if(identical(dim(modelsMatrixSubset), NULL)) # is modelsMatrixSubset a 1-d vector? if so then:
+    #    nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + replace.matrix(modelsMatrixSubset, NA, 0) # because extracting a single row yields a vector, and dim returns NULL for a vector
+    #  else nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + colSums(modelsMatrixSubset, na.rm = TRUE)
+    #	  }
+    
     sigmaSqVector[tree] <- weighted.mean(hansenBatch$hansens[[tree]][, 'sigma.squared'], icWeight, na.rm = TRUE)
     if(hansenBatch$brown) icOU <- icWeight[1: (length(icWeight) - 1)]
-    sqrt.alphaVector[tree] <- ifelse(hansenBatch$brown, 
-                                weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / sqrt.alpha'], icOU, na.rm = TRUE),
-                                weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / sqrt.alpha'], icWeight, na.rm = TRUE) 
+    alphaVector[tree] <- ifelse(hansenBatch$brown, # s/b sqrt.alpha 
+                                weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / alpha'], icOU, na.rm = TRUE), # s/b sqrt.alpha
+                                weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / alpha'], icWeight, na.rm = TRUE) # s/b sqrt.alpha
                                 )
     if(hansenBatch$brown) w <- icOU else w <- icWeight
     thetaMatrix[tree, ] <- apply(hansenBatch$thetas[[tree]], 2, 
@@ -69,9 +77,9 @@
   #  if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset # is modelsMatrixSubset a 1-d vector?
   #  else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) 
   #}
-  modelAvgAlpha <- mean(sqrt.alphaVector, na.rm = TRUE)
+  modelAvgAlpha <- mean(alphaVector, na.rm = TRUE) # s/b sqrt.alpha
   modelAvgSigmaSq <- mean(sigmaSqVector, na.rm = TRUE)
-  outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix)
+  outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix, icMats = icMats)
   class(outdata) <- 'hansenSummary'
   return(outdata)
 }

Modified: pkg/man/regimeMaker.Rd
===================================================================
--- pkg/man/regimeMaker.Rd	2011-03-11 16:29:30 UTC (rev 240)
+++ pkg/man/regimeMaker.Rd	2011-03-11 22:09:10 UTC (rev 241)
@@ -36,7 +36,8 @@
     cells indicate by \code{TRUE} or \code{FALSE} whether a node is present in each of the trees being analyzed.
     }
   \item{regMatrix}{
-    A list of \code{regimeMatrix}-format matrices that define the models applicable for each tree in \code{ouchTrees}, with missing nodes designated by NAs.
+    A list of \code{regimeMatrix}-format matrices that define the models applicable for each tree in \code{ouchTrees}, with missing nodes designated by 0s and 
+    duplicate rows blanked out with NA.
     }
 }
 \author{



More information about the Mattice-commits mailing list