[Mattice-commits] r252 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed May 2 23:34:37 CEST 2012
Author: andrew_hipp
Date: 2012-05-02 23:34:37 +0200 (Wed, 02 May 2012)
New Revision: 252
Modified:
pkg/R/batchHansen.R
pkg/R/multiModel.R
pkg/R/ouSim.hansenBatch.R
pkg/R/ouSim.ouchtree.R
pkg/R/ouSim.phylo.R
pkg/R/summarizingAnalyses.R
Log:
refixing the alpha problem in a variety of functions... simulations still don't work correctly
Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/batchHansen.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -48,11 +48,10 @@
if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
}
if(!identical(di, NULL)) dir.create(di)
- if(class(try(sqrt.alpha, silent = TRUE)) == 'try-error') sqrt.alpha = 1
- if(class(try(sigma, silent = TRUE)) == 'try-error') sigma = 1
+ if(class(try(sqrt.alpha, silent = TRUE)) == 'try-error') sqrt.alpha = 1 # sets sqrt.alpha to 1 if it has not been assigned already
+ if(class(try(sigma, silent = TRUE)) == 'try-error') sigma = 1 # sets sigma to 1 if it has not been assigned already
ar = regimeVectors(ouchTrees, cladeMembersList, maxNodes)
- hansenBatch <- list(length(ouchTrees))
- thetas <- list(length(ouchTrees))
+ hansenBatch <- thetas <- vector('list',length(ouchTrees))
for (i in 1:length(ouchTrees)) {
fP <- NULL
if(!identical(filePrefix, NULL)) fP <- paste(filePrefix, ".t", i, ".", sep = "")
@@ -81,7 +80,8 @@
# return(hb) ### ONLY FOR DEBUGGING
hansenBatch[[i]] <- hb$treeData
thetas[[i]] <- hb$thetas
- message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
+ # thetas[[i]] <- coef(hb)$theta[[1]] ## assumes only a univariate case... maticce is not currently set up for multivariate datasets
+ message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
}
outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, nodeNames = nodeNames, analysisDate = date(), call = match.call())
class(outdata) <- 'hansenBatch'
@@ -123,7 +123,7 @@
message(paste("Running regime",i))
## at this point, the user has to give an initial sqrt.alpha and sigma for hansen to search on... this should be relaxed
ha = hansen(data = data, tree = tree, regimes = regimesList[[i]], sqrt.alpha = sqrt.alpha, sigma = sigma, ...)
- # return(ha) # ONLY FOR DEBUGGING
+ #return(ha) # ONLY FOR DEBUGGING
treeData[i, ] <- unlist(summary(ha)[haVars])
thetas[i, ] <- ha at theta$data[ha at regimes[[1]]]
if(!identical(filePrefix, NULL)) save(ha, file = paste(filePrefix, 'r', i, '.Rdata', sep = ""))
Modified: pkg/R/multiModel.R
===================================================================
--- pkg/R/multiModel.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/multiModel.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -2,11 +2,11 @@
# test the support for alternative models on simple and partitioned trees
# currently only works on one tree; eventually should be modified so it runs on a set of trees, conditioned on those trees
# that have the node of interest and returning percent of trees possessing that node as an additional value
- paramHeader <- c('loglik', 'dof', 'sigma.squared', 'sqrt.alpha', 'theta', 'optimum', 'optimum.uptree', 'optimum.downtree')
+ paramHeader <- c('loglik', 'dof', 'sigma.squared', 'alpha', 'theta', 'optimum', 'optimum.uptree', 'optimum.downtree')
paramsAll <- c('loglik', 'dof', 'sigma.squared')
paramSets <- list(brown = c(paramsAll, 'theta'),
- ou1 = c(paramsAll, 'sqrt.alpha', 'optimum'),
- ou2 = c(paramsAll, 'sqrt.alpha', 'optimum.uptree', 'optimum.downtree')
+ ou1 = c(paramsAll, 'alpha', 'optimum'),
+ ou2 = c(paramsAll, 'alpha', 'optimum.uptree', 'optimum.downtree')
)
modelsAll = c('whole.ou2', 'whole.ou1', 'whole.brown', 'part.ou.uptree', 'part.ou.downtree', 'part.ou.summed', 'part.brown.uptree', 'part.brown.downtree', 'part.brown.summed')
pSum <- c('loglik', 'dof') # parameters to sum for evaluating partitioned trees
Modified: pkg/R/ouSim.hansenBatch.R
===================================================================
--- pkg/R/ouSim.hansenBatch.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.hansenBatch.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -1,9 +1,9 @@
ouSim.hansenSummary <- function(object, tree, treeNum = 1, rootState = NULL, ...) {
-## runs ouSim.ouchtree for a hansenBatch or hansenSummary object, using the model-averaged alpha, sigma.squared, and theta vector from one tree
+## runs ouSim.ouchtree for a hansenBatch or hansenSummary object, using the model-averaged sqrt.alpha, sigma.squared, and theta vector from one tree
analysis <- object
# if(class(analysis) == "hansenBatch") analysis <- summary(analysis)
if(identical(rootState, NULL)) rootState <- analysis$thetaMatrix[treeNum, ][tree at root] # rootstate taken to be the optimum at the root
- outdata <- ouSim(tree, rootState, alpha = analysis$modelAvgAlpha, variance = analysis$modelAvgSigmaSq, theta = analysis$thetaMatrix[treeNum, ], ...)
+ outdata <- ouSim(tree, rootState, sqrt.alpha = analysis$modelAvgAlpha, variance = analysis$modelAvgSigmaSq, theta = analysis$thetaMatrix[treeNum, ], ...)
class(outdata) <- "ouSim"
return(outdata)
}
@@ -14,13 +14,13 @@
analysis <- object
su <- summary(analysis)
if(length(analysis at regimes) > 1) warning("Theta is based on analysis at regimes[[1]]")
- if(dim(su$alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
- alpha <- as.vector(su$alpha)
+ if(dim(su$sqrt.alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
+ sqrt.alpha <- as.vector(su$sqrt.alpha)
theta <- su$optima[[1]][analysis at regimes[[1]]]
rootState <- theta[analysis at root] # rootstate taken to be the optimum at the root
variance <- as.vector(su$sigma.squared)
tree <- ouchtree(analysis at nodes, analysis at ancestors, analysis at times)
- outdata <- ouSim.ouchtree(tree, rootState, alpha, variance, theta, ...)
+ outdata <- ouSim.ouchtree(tree, rootState, sqrt.alpha, variance, theta, ...)
outdata$colors <- analysis at regimes[[1]]
class(outdata) <- "ouSim"
return(outdata)
@@ -30,13 +30,13 @@
analysis <- object
su <- summary(analysis)
if(length(analysis at regimes) > 1) warning("Theta is based on analysis at regimes[[1]]")
- if(dim(su$alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
- alpha <- 0
+ if(dim(su$sqrt.alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
+ sqrt.alpha <- 0
theta <- 0
rootState <- su$theta[[1]]
variance <- as.vector(su$sigma.squared)
tree <- ouchtree(analysis at nodes, analysis at ancestors, analysis at times)
- outdata <- ouSim.ouchtree(tree, rootState, alpha, variance, theta, ...)
+ outdata <- ouSim.ouchtree(tree, rootState, sqrt.alpha, variance, theta, ...)
outdata$colors <- analysis at regimes[[1]]
class(outdata) <- "ouSim"
return(outdata)
Modified: pkg/R/ouSim.ouchtree.R
===================================================================
--- pkg/R/ouSim.ouchtree.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.ouchtree.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -1,14 +1,14 @@
-ouSim.ouchtree <- function(object, rootState = 0, alpha = 0, variance = 1, theta = rootState, steps = 1000, ...) {
+ouSim.ouchtree <- function(object, rootState = 0, sqrt.alpha = 0, variance = 1, theta = rootState, steps = 1000, ...) {
## function to plot a simulated dataset under brownian motion or Ornstein-Uhlenbeck (OU) model
## Arguments:
## object is an ouch-style (S4) tree
-## alpha and theta are either single values or vectors of length (length(branchList))
+## sqrt.alpha and theta are either single values or vectors of length (length(branchList))
tree <- object
-message(paste("running sim with root =", rootState, ", alpha =", mean(alpha), ", var =", variance, "theta =", mean(theta)))
+message(paste("running sim with root =", rootState, ", sqrt.alpha =", mean(sqrt.alpha), ", var =", variance, "theta =", mean(theta)))
## embedded function---------------------
## could be released to the wild, but more arguments would need to be passed around
- preorderOU <- function(branchList, tree, startNode, startState, alpha, theta) {
+ preorderOU <- function(branchList, tree, startNode, startState, sqrt.alpha, theta) {
## Recursive function to generate the data under a Brownian motion or OU model
## modified for ouchtree (s4) Dec 08
## branch times back from each tip are in tree at epochs, indexed by tip number
@@ -21,24 +21,24 @@
else {
for (brStep in 1:length(workingBranch)) {
workingBranch[brStep] <-
- startState + workingBranch[brStep] + alpha[startBranch] / steps * (theta[startBranch] - startState) # denom was mult'd by steps... should be?
+ startState + workingBranch[brStep] + sqrt.alpha[startBranch] / steps * (theta[startBranch] - startState) # denom was mult'd by steps... should be?
startState <- workingBranch[brStep]
}
branchList[[startBranch]] <- workingBranch
endState <- branchList[[startBranch]][length(branchList[[startBranch]])]
}
if(!identical(as.integer(daughterBranches), integer(0))) {
- for(i in daughterBranches) branchList <- preorderOU(branchList, tree, i, endState, alpha, theta) }
+ for(i in daughterBranches) branchList <- preorderOU(branchList, tree, i, endState, sqrt.alpha, theta) }
return(branchList)
}
## --------------------------------------
## 1. initialize
- if(length(alpha) == 1) alpha <- rep(alpha, tree at nnodes)
+ if(length(sqrt.alpha) == 1) sqrt.alpha <- rep(sqrt.alpha, tree at nnodes)
if(length(theta) == 1) theta <- rep(theta, tree at nnodes)
brLengths <- c(0, unlist(lapply(2:tree at nnodes, branchLength, tree = tree))) # assumes first node is root; this should be relaxed
names(brLengths) <- tree at nodes # branches are indexed by end node
- names(alpha) <- tree at nodes
+ names(sqrt.alpha) <- tree at nodes
names(theta) <- tree at nodes
## 2. The following creates a list of random draws from the normal distribution, with standard deviation scaled by total
@@ -60,10 +60,10 @@
## 3. traverse
for(i in which(tree at ancestors == tree at root)) { ## calls preorderOU for each descendent from the root.
- branchList <- preorderOU(branchList, tree, tree at nodes[i], rootState, alpha, theta)
+ branchList <- preorderOU(branchList, tree, tree at nodes[i], rootState, sqrt.alpha, theta)
}
- value <- list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, alpha = alpha, variance = variance, theta = theta))
+ value <- list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, sqrt.alpha = sqrt.alpha, variance = variance, theta = theta))
class(value) <- "ouSim"
return(value)
}
Modified: pkg/R/ouSim.phylo.R
===================================================================
--- pkg/R/ouSim.phylo.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.phylo.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -1,19 +1,19 @@
-ouSim.phylo <- function(object, rootState = 0, shiftBranches = NULL, shiftStates = NULL, alpha = 0, variance = 1, theta = rootState, model = "OU", branchMeans = NULL, steps = 1000, ...) {
+ouSim.phylo <- function(object, rootState = 0, shiftBranches = NULL, shiftStates = NULL, sqrt.alpha = 0, variance = 1, theta = rootState, model = "OU", branchMeans = NULL, steps = 1000, ...) {
## function to plot a simulated dataset under brownian motion or Ornstein-Uhlenbeck (OU) model
## Arguments:
## phy is an ape-style tree
-## alpha and theta are either single values or vectors of length (length(branchList))
+## sqrt.alpha and theta are either single values or vectors of length (length(branchList))
## shiftBranches is a vector indicating any branches at which an OU or brownian motion model has a determined shift in ancestral state
## shiftStates is a vector of length = length(shiftBranches) indicaing the ancestral states for the determined break points
## Models:
## "OU" is a brownian motion or OU model
## "meanVar" is a model in which the only phylogenetic effect is the mean and variance for a given branch
## Andrew Hipp (ahipp at mortonarb.org), January 2008
-## July 2008: modified to accomodate a vector of alpha and theta corresponding to branches
+## July 2008: modified to accomodate a vector of sqrt.alpha and theta corresponding to branches
## Dec 2008: This function I'm leaving as is for the time being and just letting the phylo method be as raw as always.
## New developments will be in the ouchtree, brown, hansen, and hansenBatch methods
phy <- object
-preorderOU <- function(branchList, phy, startNode, startState, alpha, theta) {
+preorderOU <- function(branchList, phy, startNode, startState, sqrt.alpha, theta) {
## Recursive function to generate the data under a Brownian motion or OU model (not needed in the Platt model)
startBranch = which(phy$edge[,2] == startNode)
if(!identical(shiftStates, NULL)) {
@@ -21,16 +21,16 @@
message(paste('Working on branch',startBranch,'with starting state',startState))
branchList[[startBranch]][1] <- startState
for (i in 2:length(branchList[[startBranch]])) {
- branchList[[startBranch]][i] <- branchList[[startBranch]][i - 1] + branchList[[startBranch]][i] + alpha[startBranch] / steps * (theta[startBranch] - branchList[[startBranch]][i - 1]) }
+ branchList[[startBranch]][i] <- branchList[[startBranch]][i - 1] + branchList[[startBranch]][i] + sqrt.alpha[startBranch] / steps * (theta[startBranch] - branchList[[startBranch]][i - 1]) }
endState = branchList[[startBranch]][length(branchList[[startBranch]])]
daughterBranches <- phy$edge[which(phy$edge[, 1] == startNode), 2]
if(!identical(as.integer(daughterBranches), integer(0))) {
- for(i in daughterBranches) branchList <- preorderOU(branchList, phy, i, endState, alpha, theta) }
+ for(i in daughterBranches) branchList <- preorderOU(branchList, phy, i, endState, sqrt.alpha, theta) }
return(branchList) }
## 1. initialize
-if(length(alpha) == 1) alpha <- rep(alpha, length(phy$edge.length))
+if(length(sqrt.alpha) == 1) sqrt.alpha <- rep(sqrt.alpha, length(phy$edge.length))
if(length(theta) == 1) theta <- rep(theta, length(phy$edge.length))
## The following creates a list of random draws from the normal distribution, with standard deviation scaled by total tree length and the number of draws for each branch equal to the number of steps in that branch. If there is a separate variance for each branch, I assume the variance is expressed in tree-length units, not branch-length units, so the scaling is the same for all branches (viz., sd = sqrt(variance / steps))
if(model == "OU") {
@@ -58,10 +58,10 @@
## 3. traverse
if(model == "OU") {
for(i in which(phy$edge[, 1] == rootNode)) {
- branchList <- preorderOU(branchList, phy, phy$edge[i,2], rootState, alpha, theta) }}
+ branchList <- preorderOU(branchList, phy, phy$edge[i,2], rootState, sqrt.alpha, theta) }}
if(model == "meanVar") branchList <- branchList
-value <- (list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, alpha = alpha, variance = variance, theta = theta)))
+value <- (list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, sqrt.alpha = sqrt.alpha, variance = variance, theta = theta)))
class(value) <- "ouSim"
return(value)
}
\ No newline at end of file
Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R 2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/summarizingAnalyses.R 2012-05-02 21:34:37 UTC (rev 252)
@@ -17,7 +17,7 @@
nnodes <- length(nodeSums) # number of nodes being studied
nodes <- dimnames(hansenBatch$regMatrix$overall)[[2]] # grab the overall regMatrix, which includes all possible nodes
sigmaSqVector <- numeric(ntrees) # vector to capture model-averaged sigma^2 for each tree
- alphaVector <- numeric(ntrees) # vector to capture model-averaged alpha for each tree
+ alphaVector <- numeric(ntrees) # vector to capture model-averaged sqrt.alpha for each tree # s/b sqrt.alpha
modelsMatrix <- vector('list', ntrees) # list of matrices, indexed by tree, holding the weight for each model
matrixRows <- c('AICwi', 'AICcwi', 'BICwi') # rows in the matrix
nodeWeightsSummed <- matrix(0, nrow = length(matrixRows), ncol = nnodes, dimnames = list(matrixRows, nodes)) # holds node weights summed; zero-filled b/c it is a sum?
More information about the Mattice-commits
mailing list