[Mattice-commits] r31 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Nov 17 07:33:04 CET 2008


Author: andrew_hipp
Date: 2008-11-17 07:33:03 +0100 (Mon, 17 Nov 2008)
New Revision: 31

Modified:
   pkg/R/batchHansen.R
   pkg/R/informationCriterion.R
   pkg/R/summarizingAnalyses.R
   pkg/R/treeTraversal.R
Log:
fixes to various files so that summary now gives meaningful sums by node and K on a hansenBatch object
Somewhere along the way, the single-optimum OU model is getting an extra summary line... have to remove this

Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R	2008-11-14 21:00:03 UTC (rev 30)
+++ pkg/R/batchHansen.R	2008-11-17 06:33:03 UTC (rev 31)
@@ -68,8 +68,9 @@
     if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
     }
 
-  hansenBatch = vector("list", length(ouchTrees))
-  treeCounter = 0
+  hansenBatch <- list(length(ouchTrees))
+  regimeLists <- list(length(ouchTrees))
+  regimeMatrices <- list(length(ouchTrees))
   for (i in 1:length(ouchTrees)) {
     tree <- ouchTrees[[i]]
     rl = regimeVectors(tree, cladeMembersList, maxNodes)
@@ -95,11 +96,15 @@
     ## send it off to batchHansen and just stick the results in hansenBatch... this won't work as the number of regimes gets large, 
     ##   so there should be some option here to just hang onto the coefficients for each run (i.e., hang onto 'coef(hansen(...))' rather than 'hansen(...)')
     ##   there could also be an option to save the entire object as a series of files in addition to hanging onto 
-    hansenBatch[[i]] = batchHansen(tree, dataIn, rl$regimeList, regimeTitles, brown, ...)
-    message(paste("Tree",i,"of",length(ouchTrees),"complete"))}
+    hansenBatch[[i]] <- batchHansen(tree, dataIn, rl$regimeList, regimeTitles, brown, ...)
+    regimeLists[[i]] <- rl$regimeList
+    regimeMatrices[[i]] <- rl$regimeMatrix
+    message(paste("Tree",i,"of",length(ouchTrees),"complete"))
+  }
     
     ## right now no summary is returned; one is needed, summarizing over trees what is summarized for each tree in batchHansen
-  outdata <- list(hansens = hansenBatch, regimeList = rl$regimeList, regimeMatrix = rl$regimeMatrix)
+    ## the below returns presuppose a single tree
+  outdata <- list(hansens = hansenBatch, regimeLists = regimeLists, regimeMatrices = regimeMatrices, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
   class(outdata) <- 'hansenBatch'
   return(outdata)}
 
@@ -114,7 +119,7 @@
   n <- tree at nterm
   ## set up a matrix that returns lnL, K, sigmasq, theta0, and alpha for every model; thetas will go along into a list that is indexed by model
   hansenOptima <- list(length(regimeTitles))
-  variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # it's important that 'alpha' go last so that the matrix fills up right when the brownian motion model is used
+  variables <- c("loglik", "dof", "sigma.squared", "theta / alpha") # only display variables... set the selecting variables in the next two lines
   brVars <- c("loglik", "dof", "sigma.squared", "theta")
   haVars <- c("loglik", "dof", "sigma.squared", "alpha")
   treeData <- matrix(data = NA, nrow = length(regimeTitles), ncol = length(variables), dimnames = list(regimeTitles,variables))

Modified: pkg/R/informationCriterion.R
===================================================================
--- pkg/R/informationCriterion.R	2008-11-14 21:00:03 UTC (rev 30)
+++ pkg/R/informationCriterion.R	2008-11-17 06:33:03 UTC (rev 31)
@@ -16,7 +16,16 @@
   BICwi <- as.vector(lapply(deltaBIC, function(x, allDelta) {exp(-0.5 * x) / sum(exp(-0.5 * allDelta))}, allDelta = deltaBIC), mode = "numeric")
   return(list(names = names, u = u, K = K, AIC = AIC, AICc = AICc, BIC = BIC, AICwi = AICwi, AICcwi = AICcwi, BICwi = BICwi)) }
 
-informationCriterion.hansenBatch <- function(hansenBatch)
+informationCriterion.hansenBatch <- function(hansenBatch) {
 ## call informationCriterion for a 'hansen.batch' object
 ## recall that there are a bunch of trees in here... summarize over these
-
+## for right now, just returns AIC, AICc, and BIC weights for the trees analyzed in a hansenBatch object
+##       loglik dof sigma.squared theta / alpha
+  outdata <- list(length(hansenBatch$hansens))
+  N = hansenBatch$N
+  for(i in 1:length(outdata)) {
+    temp <- hansenBatch$hansens[[i]]
+    outdata[[i]] <- informationCriterion(lnL = temp[, 'loglik'], K = temp[, 'dof'], n = N, names = row.names(temp))
+    }
+  outdata
+}
\ No newline at end of file

Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R	2008-11-14 21:00:03 UTC (rev 30)
+++ pkg/R/summarizingAnalyses.R	2008-11-17 06:33:03 UTC (rev 31)
@@ -1,30 +1,52 @@
 # ---------------------------------------------------------------------
-# FUNCTIONS FOR PERFORMING SUMMARIZING ANALYSES
+# FUNCTIONS FOR SUMMARIZING ANALYSES
 # ---------------------------------------------------------------------
-# Copied from functions used in Hipp 2007 Evolution paper
-# Last checked in ouch v 1.2-4
 # functions included in this file:
-# 1. hansenStats
+# 1. summary.hansenBatch
 
-
-hansenStats <-
-# Returns a list of statistics for a runBatchHansenFit list
-function(hansenRunList) {
-  columnNames = c("maxAlpha","bestFitModel", "maxAICweight")
-  rowNames = as.character(1:length(hansenRunList))
-  hansenStatsTemp = matrix(data = NA, ncol = length(columnNames), nrow = length(hansenRunList), dimnames = list(rowNames, columnNames))
-  counter = 0
-  for (i in hansenRunList) {
-    counter = counter + 1
-    hansenStatsTemp[counter, "maxAlpha"] = max(i[0:16, "alpha"])
-    hansenStatsTemp[counter, "bestFitModel"] = match(max(i[0:16, "AICweight"]), i[0:16, "AICweight"])
-    hansenStatsTemp[counter, "maxAICweight"] = max(i[0:16, "AICweight"])}
-  return(hansenStatsTemp)}
-
-
 summary.hansenBatch <- function(hansenBatch){
 ## items in output: hansens, regimeList, regimeMatrix
 ## the summary will eventually sum weights over all nodes over all trees
 ## for now, only doing first tree
+  # 0. Get information criterion weights for all models
+  icObject <- informationCriterion.hansenBatch(hansenBatch)
+  matrixRows <- c('aic', 'aicc', 'bic')
   
+  tree <- 1 # for now ignore all but the first tree
+  
+  # 1. sum over nodes
+  aicList <- icObject[[tree]]$AICwi
+  aiccList <- icObject[[tree]]$AICcwi
+  bicList <- icObject[[tree]]$BICwi
+  modelsMatrix <- cbind(aicList, aiccList, bicList) # value: modelsMatrix
+  dimnames(modelsMatrix) = list(icObject[[tree]]$names, matrixRows)
+  
+  nonBrownWI <- 1 # defaults to no brownian motion model weights in case none are present
+  if(hansenBatch$brown) {
+    brownWeights <- modelsMatrix['brown', ] # value: brownWeights
+    nonBrownWI <- 1-brownWeights # this is just to make it easy to normalize the non-Brownian OU models
+    }
+
+  nodes <- dimnames(hansenBatch$regimeMatrices[[tree]])[[2]]
+  nodeWeightsMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(nodes), dimnames = list(matrixRows, nodes)) # value: nodeWeightsMatrix
+  for(i in 1:length(nodes)) {
+    modelsMatrixSubset <- modelsMatrix[hansenBatch$regimeMatrices[[tree]][, i] == 1, ]
+    if(identical(dim(modelsMatrixSubset), NULL)) nodeWeightsMatrix[, i] <- modelsMatrixSubset / nonBrownWI # because extracting a single row yields a vector, and dim returns NULL for a vector
+    else nodeWeightsMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) / nonBrownWI
+    }
+
+  # 2. sum over number of parameters
+  
+  kCats <- sort(unique(hansenBatch$hansens[[tree]][, 'dof']))
+  kMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(kCats), dimnames = list(matrixRows, as.character(kCats))) #value: kMatrix
+  for(i in as.character(kCats)) {
+    modelsMatrixSubset <- modelsMatrix[hansenBatch$hansens[[tree]][, 'dof'] == i, ]
+    if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset
+    else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) 
+    }
+  
+  outdata <- list(brownWeights = brownWeights, modelsMatrix = modelsMatrix, nodeWeightsMatrix = nodeWeightsMatrix, kMatrix = kMatrix)
+  print(brownWeights)
+  print(nodeWeightsMatrix)
+  return(outdata)
   }
\ No newline at end of file

Modified: pkg/R/treeTraversal.R
===================================================================
--- pkg/R/treeTraversal.R	2008-11-14 21:00:03 UTC (rev 30)
+++ pkg/R/treeTraversal.R	2008-11-17 06:33:03 UTC (rev 31)
@@ -112,7 +112,7 @@
 #  CHANGED to "tree" 10 nov 08: "node" and "ancestor" = the standard tree specification vectors of the OUCH-style tree
 #  "tip" = the tip node to trace back
 # Value: a vector of nodes leading from a tip to the root
-# 10 nov 08: changed to just grab the appropriate element from tree at lineages
+# 10 nov 08: changed to just grab tree at lineages and make a vector that fits the old code
 function(tip, tree) {
   ## ------------------ begin ouchtree block -----------------
   ## check to see if tree inherits 'ouchtree'
@@ -147,9 +147,9 @@
 #  "maxNodes" = single number indicating the maximum number of nodes at which a regime can change
 #  "nodeMatrix" = indicates whether to return a binary table for interp or a list of changeNode vectors for analysis
 # Value:
-#    if nodeMatrix = F: a list of changeNode vectors (assumes type = "character"), one for each possible scenario
-#    if nodeMatrix = T: a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output; 
-#                       presumes nodes are labelled 1:n
+#    regimeList = a list of changeNode vectors (assumes type = "character"), one for each possible scenario
+#    regimeMatrix = : a binary table indicating whether a regime node is present or absent based on allPossibleRegimes output; 
+#                     presumes nodes are labeled 1:n
 # 10 nov 08: this function now takes over regimeNodes
 function(changeNodes, maxNodes = NULL, nodeMatrix = F) {
     if(!identical(maxNodes, NULL) && maxNodes > length(changeNodes)) warning(paste(sQuote('maxNodes'), 'cannot be larger than the number of nodes; maxNodes ignored'))
@@ -177,7 +177,7 @@
       )
       for (i in seq(numberOfRegimes)) {
         for (j in seq(length(changeNodes))) {
-          if (is.na(match(j,regime[[i]]))) regimesNameMatrix[i,j] = 0
+          if (is.na(match(changeNodes[j],regime[[i]]))) regimesNameMatrix[i,j] = 0 # changed this so that j indexes changeNodes
           else regimesNameMatrix[i,j] = 1 
         }
       }
@@ -193,8 +193,8 @@
         outlist <- outlist[sapply(outlist, length) <= maxNodes]
         outlist[[length(outlist) + 1]] <- rep("0", length(changeNodes))
         }
-      }
-  outdata <- list(regimeList = outlist, regimeMatrix = outMatrix)
+  #    }
+  outdata <- list(regimeList = outlist, regimeMatrix = outmatrix)
   return(outdata) }
 
 regimeVectors <-
@@ -227,8 +227,8 @@
   regimeMatrix <- apr$regimeMatrix
   regimePaintings = vector("list", length(allRegimes))
   for (i in 1:length(allRegimes)) {
-    allRegimes[[i]] = c("1", allRegimes[[i]])
-    regimePaintings[[i]] = as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
+    allRegimes[[i]] <- c("1", allRegimes[[i]])
+    regimePaintings[[i]] <- as.factor(paintBranches(tree, allRegimes[[i]], as.character(allRegimes[[i]])))
     names(regimePaintings[[i]]) <- tree at nodes
     message(paste('Created regime',i))}
   outdata <- list(regimeList = regimePaintings, regimeMatrix = regimeMatrix)



More information about the Mattice-commits mailing list