[Mattice-commits] r249 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Mar 14 20:53:32 CET 2011
Author: andrew_hipp
Date: 2011-03-14 20:53:28 +0100 (Mon, 14 Mar 2011)
New Revision: 249
Modified:
pkg/R/batchHansen.R
pkg/R/regimes.R
pkg/R/summarizingAnalyses.R
pkg/man/summary.hansenBatch.Rd
Log:
Everything is working properly now! v1.0-1 is ready for packaging.
Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R 2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/batchHansen.R 2011-03-14 19:53:28 UTC (rev 249)
@@ -83,7 +83,7 @@
thetas[[i]] <- hb$thetas
message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
}
- outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, analysisDate = date(), call = match.call())
+ outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, nodeNames = nodeNames, analysisDate = date(), call = match.call())
class(outdata) <- 'hansenBatch'
return(outdata)}
Modified: pkg/R/regimes.R
===================================================================
--- pkg/R/regimes.R 2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/regimes.R 2011-03-14 19:53:28 UTC (rev 249)
@@ -5,7 +5,7 @@
# Initially written for ouch v 1.2-4
# updated to ouch >= 2.4-2 Nov 2008
# updated to accommodate multiple trees Nov 2008
-# updated to correct scoring of NAs in regimeMaker, 14 mar 2011
+# updated to correct scoring of NAs in regimeMaker and fixed use of alpha vs. sqrt.alpha (ouch2), 14 mar 2011
regimeVectors <-
# This is the basic call to get the full range of regimes over a set of trees
Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R 2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/R/summarizingAnalyses.R 2011-03-14 19:53:28 UTC (rev 249)
@@ -2,20 +2,11 @@
# FUNCTIONS FOR SUMMARIZING ANALYSES
# ---------------------------------------------------------------------
-# March 2011 - summary.hansenBatch found to give incorrect answers with multiple trees; corrected
-# also changed the weighting so that model averaging can be by AICc, AIC, or BIC
+# March 2011 - summary.hansenBatch found to give incorrect answers with multiple trees; corrected. Also:
+# - changed the weighting so that model averaging can be by AICc, AIC, or BIC
+# - added support for nodeNames
-## 11 mar 11 note: trying to figure out why the ic weights are not being averaged correctly... tallying trees wrong somewhere?
-## Also changed documentation: regimeMaker does not return an NA where a node is missing, but a 0. NAs are used for rows that are all zeros or duplicates.
-## It seems that the summary code is giving a probability for every node for every tree, even when the tree does not have the node. Something is wrong here. Also, models are
-## being treated as though they were commensurate across trees when they are not. Models that are defined for nodes that are not absent for a tree still get a probability, as though the node were
-## there. This is not right. If a tree has all the nodes in a model, that model should be analyzed. If not, the model should not be analyzed. I think this is what is causing the problem,
-## because I am summing the probabilities for each node based on the assumption that a model that possesses a node has only been analyzed on trees that have that node.
-## In effect, I am changing the model across trees rather than deleting models from trees where they don't apply. This is undoubtedly the problem. Probabilities are consequently
-## inflated across trees. regimeMaker SHOULD return NAs for each model that is missing any nodes, in addition to duplicate models. I need to look back at the logic and see what I was
-## thinking on this.
-
-summary.hansenBatch <- function(object, ic = 'aicc', ...){
+summary.hansenBatch <- function(object, ic = 'AICc', ...){
## items in output: hansens, regimeList, regimeMatrix
## ic = choice of information criterion weight to use in model averaging
hansenBatch <- object
@@ -41,25 +32,19 @@
modelsMatrix[[tree]] <- cbind(icObject[[tree]]$AICwi, icObject[[tree]]$AICcwi, icObject[[tree]]$BICwi)
dimnames(modelsMatrix[[tree]]) <- list( dimnames(hansenBatch$hansens[[1]])[[1]], c("AICwi", "AICcwi", "BICwi"))
icWeight <- switch(ic,
- bic = icObject[[tree]]$BICwi,
- aic = icObject[[tree]]$AICwi,
- aicc = icObject[[tree]]$AICcwi
+ BIC = icObject[[tree]]$BICwi,
+ AIC = icObject[[tree]]$AICwi,
+ AICc = icObject[[tree]]$AICcwi
)
- for(i in matrixRows) icMats[[i]][tree, ] <- colSums(replace(matrix(modelsMatrix[[tree]][, i], nmodels, nnodes), NA, 1) * hansenBatch$regMatrix$overall, na.rm = T) # somewhat vectorized
- nodeWeightsSummed[i, ] <- icMats[[i]][tree, ] + nodeWeightsSummed[i, ]
- #OLD:
- #for(i in seq(nnodes)) {
- # modelsMatrixSubset <- modelsMatrix[[tree]][hansenBatch$regMatrix$overall[, nodes[i]] == 1, ] # subset models that contain node i
- # if(identical(dim(modelsMatrixSubset), NULL)) # is modelsMatrixSubset a 1-d vector? if so then:
- # nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + replace.matrix(modelsMatrixSubset, NA, 0) # because extracting a single row yields a vector, and dim returns NULL for a vector
- # else nodeWeightsSummed[, nodes[i]] <- nodeWeightsSummed[, nodes[i]] + colSums(modelsMatrixSubset, na.rm = TRUE)
- # }
-
+ for(i in matrixRows) {
+ icMats[[i]][tree, ] <- colSums(modelsMatrix[[tree]][, i] * hansenBatch$regMatrix$overall, na.rm = T)
+ nodeWeightsSummed[i, ] <- icMats[[i]][tree, ] + nodeWeightsSummed[i, ]
+ } # close i
sigmaSqVector[tree] <- weighted.mean(hansenBatch$hansens[[tree]][, 'sigma.squared'], icWeight, na.rm = TRUE)
if(hansenBatch$brown) icOU <- icWeight[1: (length(icWeight) - 1)]
- alphaVector[tree] <- ifelse(hansenBatch$brown, # s/b sqrt.alpha
- weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / alpha'], icOU, na.rm = TRUE), # s/b sqrt.alpha
- weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / alpha'], icWeight, na.rm = TRUE) # s/b sqrt.alpha
+ alphaVector[tree] <- ifelse(hansenBatch$brown,
+ weighted.mean(hansenBatch$hansens[[tree]][1:(nmodels - 1), 'theta / alpha'], icOU, na.rm = TRUE),
+ weighted.mean(hansenBatch$hansens[[tree]][ , 'theta / alpha'], icWeight, na.rm = TRUE)
)
if(hansenBatch$brown) w <- icOU else w <- icWeight
thetaMatrix[tree, ] <- apply(hansenBatch$thetas[[tree]], 2,
@@ -74,22 +59,11 @@
# in this matrix, the weight for each node is averaged over all trees
nodeWeightsMatrix.allNodes <- nodeWeightsSummed / ntrees
- # sum over number of parameters
- # create a vector of sums that tells us how many categories there are for each model: dof = sum(nodes) + 1 [because a node indicates a change in
- # regime, thus the total number of thetas = nodes + 1] + sqrt.alpha + sigma = sum(nodes) + 3; for Brownian motion model, dof = 2
-
- #nodeSums <- apply(hansenBatch$regMatrix$overall, 1, sum) + 3
- #if(hansenBatch$brown) nodeSums['brown'] <- 2
- #kCats <- sort(unique(nodeSums)) # and just give us the unique degree-of-freedom categories, sorted
- #kMatrix <- matrix(NA, nrow = length(matrixRows), ncol = length(kCats), dimnames = list(matrixRows, as.character(kCats))) # make the empty kMatrix
- #for(i in as.character(kCats)) {
- # modelsMatrixSubset <- weightsMatrix.allNodes[nodeSums == i, ]
- # if(identical(dim(modelsMatrixSubset), NULL)) kMatrix[, i] <- modelsMatrixSubset # is modelsMatrixSubset a 1-d vector?
- # else kMatrix[, i] <- apply(modelsMatrixSubset, 2, sum)
- #}
- modelAvgAlpha <- mean(alphaVector, na.rm = TRUE) # s/b sqrt.alpha
- modelAvgSigmaSq <- mean(sigmaSqVector, na.rm = TRUE)
- outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix, icMats = icMats)
+ if(!identical(object$nodeNames, NULL)) dimnames(nodeWeightsMatrix.unnormalized)[[2]] <- dimnames(nodeWeightsMatrix.allNodes)[[2]] <- object$nodeNames
+ modelAvgAlpha <- c(mean(alphaVector, na.rm = TRUE), quantile(alphaVector, c(0.025, 0.975)))
+ modelAvgSigmaSq <- c(mean(sigmaSqVector, na.rm = TRUE),quantile(sigmaSqVector, c(0.025, 0.975)))
+ names(modelAvgAlpha) <- names(modelAvgSigmaSq) <- c('mean', 'lower.CI', 'upper.CI')
+ outdata <- list(modelsMatrix = modelsMatrix, nodeWeightsMatrix = list(unnormalized = nodeWeightsMatrix.unnormalized, allNodes = nodeWeightsMatrix.allNodes), modelAvgAlpha = modelAvgAlpha, modelAvgSigmaSq = modelAvgSigmaSq, thetaMatrix = thetaMatrix, icMats = icMats, ic = ic)
class(outdata) <- 'hansenSummary'
return(outdata)
}
@@ -110,12 +84,11 @@
print(hansenSummary$nodeWeightsMatrix$allNodes)
cat("\nSupport conditioned on trees that possess the node\n")
print(hansenSummary$nodeWeightsMatrix$unnormalized)
- #message("\nESTIMATED SUPPORT FOR NUMBER OF PARAMETERS IN THE MODEL")
- #message("The properties of this support value have not been studied and are likely to be biased strongly toward the median value of\nK, as K is largest at the median values (they are distributed according to Stirling numbers of the first kind).")
- #print(hansenSummary$kMatrix)
- cat("\nMODEL-AVERAGED PARAMETERS")
- cat("\nsqrt.alpha =", hansenSummary$modelAvgAlpha)
- cat("\nsigma^2 =", hansenSummary$modelAvgSigmaSq)
+ cat("\nMODEL-AVERAGED PARAMETERS BASED ON", x$ic, "WEIGHTS\n")
+ cat("\nalpha:\n")
+ print(hansenSummary$modelAvgAlpha)
+ cat("\nsigma^2:\n")
+ print(hansenSummary$modelAvgSigmaSq)
if(any(dim(hansenSummary$thetaMatrix) > 12)) message(paste("\ntheta matrix is too long to display; access through the summary object"))
else {
cat("\ntheta matrix, with branches as the columns and trees as the rows:\n")
Modified: pkg/man/summary.hansenBatch.Rd
===================================================================
--- pkg/man/summary.hansenBatch.Rd 2011-03-14 18:25:41 UTC (rev 248)
+++ pkg/man/summary.hansenBatch.Rd 2011-03-14 19:53:28 UTC (rev 249)
@@ -7,7 +7,7 @@
\code{print.hansenSummary} is a print method for a \code{hansenSummary} object.
}
\usage{
- \method{summary}{hansenBatch}(object, ic = 'aicc', ...)
+ \method{summary}{hansenBatch}(object, ic = 'AICc', ...)
\method{print}{hansenSummary}(x, ...)
}
\arguments{
@@ -15,7 +15,7 @@
Output from \code{runBatchHansen}.
}
\item{ic}{
- Choice of information criterion to use in model-averaging of parameters. Choices are \code{aicc} (default), \code{aic}, or \code{bic}.
+ Choice of information criterion to use in model-averaging of parameters. Choices are \code{AICc} (default), \code{AIC}, or \code{BIC}.
}
\item{x}{
Output from \code{summary.hansenBatch}.
More information about the Mattice-commits
mailing list