[Mattice-commits] r52 - pkg/R

Fri Nov 21 18:37:46 CET 2008

Author: andrew_hipp
Date: 2008-11-21 18:37:46 +0100 (Fri, 21 Nov 2008)
New Revision: 52

Modified:
   pkg/R/regimes.R
   pkg/R/summarizingAnalyses.R
Log:
summarizingAnalyses.R now works correctly with phylogenetic uncertainty, though the estimate of support partitioned by number of parameters in the model is not corrected.

Modified: pkg/R/regimes.R
===================================================================

--- pkg/R/regimes.R	2008-11-21 16:44:14 UTC (rev 51)
+++ pkg/R/regimes.R	2008-11-21 17:37:46 UTC (rev 52)
@@ -34,7 +34,7 @@
   nnode <- length(cladeMembersList)
   regMatrix <- regimeMatrix(n = nnode, maxNodes = maxNodes)
   apr = regimeMaker(ouchTrees, regMatrix, cladeMembersList)
-  outdata <- list(regList = apr$regList, regMatrix = apr$regMatrices, nodeMatrix = apr$nodeMatrix)
+  outdata <- list(regList = apr$regList, regMatrix = apr$regMatrix, nodeMatrix = apr$nodeMatrix)
   return(outdata) }
 
 paintBranches <-
@@ -131,7 +131,8 @@
     }
     regList[[i]] <- treeRegs
   }
-  outdata <- list(regList = regList, nodeMatrix = nodeMatrix, regMatrices = regMatrices)
+  regMatrices$overall <- regMatrix # this is the matrix that includes all regimes without regard to any tree
+  outdata <- list(regList = regList, nodeMatrix = nodeMatrix, regMatrix = regMatrices)
   return(outdata)
 }
 

Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R	2008-11-21 16:44:14 UTC (rev 51)
+++ pkg/R/summarizingAnalyses.R	2008-11-21 17:37:46 UTC (rev 52)
@@ -12,10 +12,11 @@
   # 0. Get information criterion weights for all models
   icObject <- informationCriterion.hansenBatch(hansenBatch)
   nmodels <- dim(hansenBatch$hansens[[1]])[1]
+  ntrees <- length(hansenBatch$hansens)
   matrixRows <- c('AIC.weight', 'AICc.weight', 'BIC.weight')
   outMatrix <- matrix(0, nrow = length(icObject[[1]]$AICwi), ncol = length(matrixRows), dimnames = list(icObject[[1]]$names, matrixRows))
   nmodelsMatrix <- outMatrix # a matrix to divide outMatrix by so that average BICwi are only based on trees that have a node
-  for(tree in 1:length(hansenBatch$hansens)) {
+  for(tree in 1:ntrees) {
     aicList <- icObject[[tree]]$AICwi
     aiccList <- icObject[[tree]]$AICcwi
     bicList <- icObject[[tree]]$BICwi
@@ -34,30 +35,41 @@
     #   nonBrownWI <- 1-brownWeights # this is just to make it easy to normalize the non-Brownian OU models
     # }
   }
-  outMatrix <- outMatrix / nmodelsMatrix
-  print(nmodelsMatrix)
+  weightsMatrix.unnormalized <- outMatrix / nmodelsMatrix # in this matrix, the weight for each model is averaged only over trees
+                                                          # that possess that node; weights will not sum to 1.0
+  weightsMatrix.allNodes <- outMatrix / ntrees # in this matrix, the weight for each model is averaged over all trees, setting weight
+                                               # equal to zero in any trees that lack that model. Weights sum to 1.0, and they
+                                               # factor in the posterior probability (or bootstrap proportion) for each model. 
+                                               # Which to use? they are both informative, but this one has the desirable property of
+                                               # acting being a proper probability distribution, albeit one that confounds clade support
+                                               # with model support.
   
   # 1. sum over nodes
   nodes <- dimnames(hansenBatch$regMatrix[[tree]])[[2]]
-    nodeWeightsMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(nodes), dimnames = list(matrixRows, nodes)) # value: nodeWeightsMatrix
-    for(i in 1:length(nodes)) {
-      modelsMatrixSubset <- modelsMatrix[hansenBatch$regimeMatrices[[tree]][, i] == 1, ]
-      if(identical(dim(modelsMatrixSubset), NULL)) nodeWeightsMatrix[, i] <- modelsMatrixSubset # because extracting a single row yields a vector, and dim returns NULL for a vector
-      else nodeWeightsMatrix[, i] <- apply(modelsMatrixSubset, 2, sum)
-      }
+  nodeWeightsMatrix.unnormalized <- matrix(NA, nrow = length(matrixRows), ncol = length(nodes), dimnames = list(matrixRows, nodes)) # value: nodeWeightsMatrix
+  nodeWeightsMatrix.allNodes <- nodeWeightsMatrix.unnormalized
+  for(i in 1:length(nodes)) {
+    modelsMatrixSubset <- weightsMatrix.allNodes[hansenBatch$regMatrix$overall[, i] == 1, ]
+    if(identical(dim(modelsMatrixSubset), NULL)) nodeWeightsMatrix.allNodes[, i] <- modelsMatrixSubset # because extracting a single row yields a vector, and dim returns NULL for a vector
+    else nodeWeightsMatrix.allNodes[, i] <- apply(modelsMatrixSubset, 2, sum)
+    
+    modelsMatrixSubset <- weightsMatrix.unnormalized[hansenBatch$regMatrix$overall[, i] == 1, ]
+    if(identical(dim(modelsMatrixSubset), NULL)) nodeWeightsMatrix.unnormalized[, i] <- modelsMatrixSubset # because extracting a single row yields a vector, and dim returns NULL for a vector
+    else nodeWeightsMatrix.unnormalized[, i] <- apply(modelsMatrixSubset, 2, sum)
+  }
 
-    # 2. sum over number of parameters
-  
-    kCats <- sort(unique(hansenBatch$hansens[[tree]][, 'dof']))
-    kMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(kCats), dimnames = list(matrixRows, as.character(kCats))) #value: kMatrix
-    for(i in as.character(kCats)) {
-      modelsMatrixSubset <- modelsMatrix[hansenBatch$hansens[[tree]][, 'dof'] == i, ]
-      if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset
-      else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) 
-      }
-  outdata <- list(modelsMatrix = modelsMatrix, meanWeights = outMatrix, nodeWeightsMatrix = nodeWeightsMatrix, kMatrix = kMatrix)
+  # 2. sum over number of parameters
+  # currently only works when there is no phylogenetic uncertainty
+  kCats <- sort(unique(hansenBatch$hansens[[tree]][, 'dof']))
+  kMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(kCats), dimnames = list(matrixRows, as.character(kCats))) #value: kMatrix
+  for(i in as.character(kCats)) {
+    modelsMatrixSubset <- modelsMatrix[hansenBatch$hansens[[tree]][, 'dof'] == i, ]
+    if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset
+    else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum) 
+  }
+  outdata <- list(modelsMatrix = modelsMatrix, weightsMatrix = list(unnormalized = weightsMatrix.unnormalized, allNodes = weightsMatrix.allNodes), nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), kMatrix = kMatrix)
   return(outdata)
-  }
+}
 
 replace.matrix <- function (x, oldValue, newValue) {
   if(is.na(oldValue)) x[is.na(x)] <- newValue