[Mattice-commits] r249 - in pkg: R man

Mon Mar 14 20:53:32 CET 2011

Author: andrew_hipp
Date: 2011-03-14 20:53:28 +0100 (Mon, 14 Mar 2011)
New Revision: 249

Modified:
   pkg/R/batchHansen.R
   pkg/R/regimes.R
   pkg/R/summarizingAnalyses.R
   pkg/man/summary.hansenBatch.Rd
Log:
Everything is working properly now! v1.0-1 is ready for packaging.

Modified: pkg/R/batchHansen.R
===================================================================

--- pkg/R/batchHansen.R	2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/batchHansen.R	2011-03-14 19:53:28 UTC (rev 249)
@@ -83,7 +83,7 @@
     thetas[[i]] <- hb$thetas
     message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
   }
-  outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
+  outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, nodeNames = nodeNames, analysisDate = date(), call = match.call())
   class(outdata) <- 'hansenBatch'
   return(outdata)}
 

Modified: pkg/R/regimes.R
===================================================================
--- pkg/R/regimes.R	2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/regimes.R	2011-03-14 19:53:28 UTC (rev 249)
@@ -5,7 +5,7 @@
 # Initially written for ouch v 1.2-4
 # updated to ouch >= 2.4-2 Nov 2008
 # updated to accommodate multiple trees Nov 2008
-# updated to correct scoring of NAs in regimeMaker, 14 mar 2011
+# updated to correct scoring of NAs in regimeMaker and fixed use of alpha vs. sqrt.alpha (ouch2), 14 mar 2011
 
 regimeVectors <-
 # This is the basic call to get the full range of regimes over a set of trees

Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R	2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/summarizingAnalyses.R	2011-03-14 19:53:28 UTC (rev 249)
@@ -2,20 +2,11 @@
 # FUNCTIONS FOR SUMMARIZING ANALYSES
 # ---------------------------------------------------------------------
 
-# March 2011 - summary.hansenBatch found to give incorrect answers with multiple trees; corrected
-# also changed the weighting so that model averaging can be by AICc, AIC, or BIC
+# March 2011 - summary.hansenBatch found to give incorrect answers with multiple trees; corrected. Also:
+#  - changed the weighting so that model averaging can be by AICc, AIC, or BIC
+#  - added support for nodeNames
 
-## 11 mar 11 note: trying to figure out why the ic weights are not being averaged correctly... tallying trees wrong somewhere? 
-## Also changed documentation: regimeMaker does not return an NA where a node is missing, but a 0. NAs are used for rows that are all zeros or duplicates.
-## It seems that the summary code is giving a probability for every node for every tree, even when the tree does not have the node. Something is wrong here. Also, models are 
-## being treated as though they were commensurate across trees when they are not. Models that are defined for nodes that are not absent for a tree still get a probability, as though the node were 
-## there. This is not right. If a tree has all the nodes in a model, that model should be analyzed. If not, the model should not be analyzed. I think this is what is causing the problem,
-## because I am summing the probabilities for each node based on the assumption that a model that possesses a node has only been analyzed on trees that have that node.
-## In effect, I am changing the model across trees rather than deleting models from trees where they don't apply. This is undoubtedly the problem. Probabilities are consequently
-## inflated across trees. regimeMaker SHOULD return NAs for each model that is missing any nodes, in addition to duplicate models. I need to look back at the logic and see what I was
-## thinking on this.
-
-summary.hansenBatch <- function(object, ic = 'aicc', ...){
+summary.hansenBatch <- function(object, ic = 'AICc', ...){
 ## items in output: hansens, regimeList, regimeMatrix
 ## ic = choice of information criterion weight to use in model averaging
   hansenBatch <- object
@@ -41,25 +32,19 @@
     modelsMatrix[[tree]] <- cbind(icObject[[tree]]$AICwi, icObject[[tree]]$AICcwi, icObject[[tree]]$BICwi)
     dimnames(modelsMatrix[[tree]]) <- list( dimnames(hansenBatch$hansens[[1]])[[1]], c("AICwi", "AICcwi", "BICwi"))
     icWeight <- switch(ic,
-    			bic = icObject[[tree]]$BICwi,
-			aic = icObject[[tree]]$AICwi,
-			aicc = icObject[[tree]]$AICcwi
+    			BIC = icObject[[tree]]$BICwi,
+			AIC = icObject[[tree]]$AICwi,
+			AICc = icObject[[tree]]$AICcwi
 			)
-    for(i in matrixRows) icMats[[i]][tree, ] <-  colSums(replace(matrix(modelsMatrix[[tree]][, i], nmodels, nnodes), NA, 1) * hansenBatch$regMatrix$overall, na.rm = T) # somewhat vectorized
-    nodeWeightsSummed[i, ] <- icMats[[i]][tree, ] + nodeWeightsSummed[i, ] 
-    #OLD:
-    #for(i in seq(nnodes)) {
-    #  modelsMatrixSubset <- modelsMatrix[[tree]][hansenBatch$regMatrix$overall[, nodes[i]] == 1, ] # subset models that contain node i
-    #  if(identical(dim(modelsMatrixSubset), NULL)) # is modelsMatrixSubset a 1-d vector? if so then:
-    #    nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + replace.matrix(modelsMatrixSubset, NA, 0) # because extracting a single row yields a vector, and dim returns NULL for a vector
-    #  else nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + colSums(modelsMatrixSubset, na.rm = TRUE)
-    #	  }
-    
+     for(i in matrixRows) {
+	  icMats[[i]][tree, ] <- colSums(modelsMatrix[[tree]][, i] * hansenBatch$regMatrix$overall, na.rm = T)
+      nodeWeightsSummed[i, ] <- icMats[[i]][tree, ] + nodeWeightsSummed[i, ]
+	  } # close i
     sigmaSqVector[tree] <- weighted.mean(hansenBatch$hansens[[tree]][, 'sigma.squared'], icWeight, na.rm = TRUE)
     if(hansenBatch$brown) icOU <- icWeight[1: (length(icWeight) - 1)]
-    alphaVector[tree] <- ifelse(hansenBatch$brown, # s/b sqrt.alpha 
-                                weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / alpha'], icOU, na.rm = TRUE), # s/b sqrt.alpha
-                                weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / alpha'], icWeight, na.rm = TRUE) # s/b sqrt.alpha
+    alphaVector[tree] <- ifelse(hansenBatch$brown, 
+                                weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / alpha'], icOU, na.rm = TRUE),
+                                weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / alpha'], icWeight, na.rm = TRUE)
                                 )
     if(hansenBatch$brown) w <- icOU else w <- icWeight
     thetaMatrix[tree, ] <- apply(hansenBatch$thetas[[tree]], 2, 
@@ -74,22 +59,11 @@
   # in this matrix, the weight for each node is averaged over all trees
   nodeWeightsMatrix.allNodes <- nodeWeightsSummed / ntrees 
   
-  # sum over number of parameters
-  # create a vector of sums that tells us how many categories there are for each model: dof = sum(nodes) + 1 [because a node indicates a change in 
-  #   regime, thus the total number of thetas = nodes + 1] + sqrt.alpha + sigma = sum(nodes) + 3; for Brownian motion model, dof = 2
-  
-  #nodeSums <- apply(hansenBatch$regMatrix$overall, 1, sum) + 3
-  #if(hansenBatch$brown) nodeSums['brown'] <- 2
-  #kCats <- sort(unique(nodeSums)) # and just give us the unique degree-of-freedom categories, sorted
-  #kMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(kCats), dimnames = list(matrixRows, as.character(kCats))) # make the empty kMatrix
-  #for(i in as.character(kCats)) {
-  #  modelsMatrixSubset <- weightsMatrix.allNodes[nodeSums == i, ] 
-  #  if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset # is modelsMatrixSubset a 1-d vector?
-  #  else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) 
-  #}
-  modelAvgAlpha <- mean(alphaVector, na.rm = TRUE) # s/b sqrt.alpha
-  modelAvgSigmaSq <- mean(sigmaSqVector, na.rm = TRUE)
-  outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix, icMats = icMats)
+  if(!identical(object$nodeNames, NULL)) dimnames(nodeWeightsMatrix.unnormalized)[[2]] <- dimnames(nodeWeightsMatrix.allNodes)[[2]] <- object$nodeNames
+  modelAvgAlpha <- c(mean(alphaVector, na.rm = TRUE), quantile(alphaVector, c(0.025, 0.975)))
+  modelAvgSigmaSq <- c(mean(sigmaSqVector, na.rm = TRUE),quantile(sigmaSqVector, c(0.025, 0.975)))
+  names(modelAvgAlpha) <- names(modelAvgSigmaSq) <- c('mean', 'lower.CI', 'upper.CI')
+  outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix, icMats = icMats, ic = ic)
   class(outdata) <- 'hansenSummary'
   return(outdata)
 }
@@ -110,12 +84,11 @@
   print(hansenSummary$nodeWeightsMatrix$allNodes)
   cat("\nSupport conditioned on trees that possess the node\n")
   print(hansenSummary$nodeWeightsMatrix$unnormalized)
-  #message("\nESTIMATED SUPPORT FOR NUMBER OF PARAMETERS IN THE MODEL")
-  #message("The properties of this support value have not been studied and are likely to be biased strongly toward the median value of\nK, as K is largest at the median values (they are distributed according to Stirling numbers of the first kind).")
-  #print(hansenSummary$kMatrix)
-  cat("\nMODEL-AVERAGED PARAMETERS")
-  cat("\nsqrt.alpha =", hansenSummary$modelAvgAlpha)
-  cat("\nsigma^2 =", hansenSummary$modelAvgSigmaSq)
+  cat("\nMODEL-AVERAGED PARAMETERS BASED ON", x$ic, "WEIGHTS\n")
+  cat("\nalpha:\n")
+  print(hansenSummary$modelAvgAlpha)
+  cat("\nsigma^2:\n")
+  print(hansenSummary$modelAvgSigmaSq)
   if(any(dim(hansenSummary$thetaMatrix) > 12)) message(paste("\ntheta matrix is too long to display; access through the summary object"))
   else {
     cat("\ntheta matrix, with branches as the columns and trees as the rows:\n")

Modified: pkg/man/summary.hansenBatch.Rd
===================================================================
--- pkg/man/summary.hansenBatch.Rd	2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/man/summary.hansenBatch.Rd	2011-03-14 19:53:28 UTC (rev 249)
@@ -7,7 +7,7 @@
   \code{print.hansenSummary} is a print method for a \code{hansenSummary} object.
 }
 \usage{
-  \method{summary}{hansenBatch}(object, ic = 'aicc', ...)
+  \method{summary}{hansenBatch}(object, ic = 'AICc', ...)
   \method{print}{hansenSummary}(x, ...)
 }
 \arguments{
@@ -15,7 +15,7 @@
   Output from \code{runBatchHansen}.
   }
   \item{ic}{
-  Choice of information criterion to use in model-averaging of parameters. Choices are \code{aicc} (default), \code{aic}, or \code{bic}.
+  Choice of information criterion to use in model-averaging of parameters. Choices are \code{AICc} (default), \code{AIC}, or \code{BIC}.
   }
   \item{x}{
   Output from \code{summary.hansenBatch}.