[Mattice-commits] r252 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed May 2 23:34:37 CEST 2012


Author: andrew_hipp
Date: 2012-05-02 23:34:37 +0200 (Wed, 02 May 2012)
New Revision: 252

Modified:
   pkg/R/batchHansen.R
   pkg/R/multiModel.R
   pkg/R/ouSim.hansenBatch.R
   pkg/R/ouSim.ouchtree.R
   pkg/R/ouSim.phylo.R
   pkg/R/summarizingAnalyses.R
Log:
refixing the alpha problem in a variety of functions... simulations still don't work correctly

Modified: pkg/R/batchHansen.R
===================================================================
--- pkg/R/batchHansen.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/batchHansen.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -48,11 +48,10 @@
     if(stopFlag) stop("Correct discrepancies between trees and data and try again!")
     }
   if(!identical(di, NULL)) dir.create(di)
-  if(class(try(sqrt.alpha, silent = TRUE)) == 'try-error') sqrt.alpha = 1
-  if(class(try(sigma, silent = TRUE)) == 'try-error') sigma = 1
+  if(class(try(sqrt.alpha, silent = TRUE)) == 'try-error') sqrt.alpha = 1 # sets sqrt.alpha to 1 if it has not been assigned already
+  if(class(try(sigma, silent = TRUE)) == 'try-error') sigma = 1 # sets sigma to 1 if it has not been assigned already
   ar = regimeVectors(ouchTrees, cladeMembersList, maxNodes)
-  hansenBatch <- list(length(ouchTrees))
-  thetas <- list(length(ouchTrees))
+  hansenBatch <- thetas <- vector('list',length(ouchTrees))
   for (i in 1:length(ouchTrees)) {
     fP <- NULL
     if(!identical(filePrefix, NULL)) fP <- paste(filePrefix, ".t", i, ".", sep = "")
@@ -81,7 +80,8 @@
 	# return(hb) ### ONLY FOR DEBUGGING
     hansenBatch[[i]] <- hb$treeData
     thetas[[i]] <- hb$thetas
-    message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
+    # thetas[[i]] <- coef(hb)$theta[[1]] ## assumes only a univariate case... maticce is not currently set up for multivariate datasets
+	message(paste("Tree",i,"of",length(ouchTrees),"complete", "\n-----------------------------"))
   }
   outdata <- list(hansens = hansenBatch, thetas = thetas, regList = ar$regList, regMatrix = ar$regMatrix, nodeMatrix = ar$nodeMatrix, brown = brown, N = ouchTrees[[i]]@nterm, nodeNames = nodeNames, analysisDate = date(), call = match.call())
   class(outdata) <- 'hansenBatch'
@@ -123,7 +123,7 @@
       message(paste("Running regime",i))
       ## at this point, the user has to give an initial sqrt.alpha and sigma for hansen to search on... this should be relaxed
       ha = hansen(data = data, tree = tree, regimes = regimesList[[i]], sqrt.alpha = sqrt.alpha, sigma = sigma, ...)
-	  # return(ha) # ONLY FOR DEBUGGING
+	  #return(ha) # ONLY FOR DEBUGGING
       treeData[i, ] <- unlist(summary(ha)[haVars])
       thetas[i, ] <- ha at theta$data[ha at regimes[[1]]]
       if(!identical(filePrefix, NULL)) save(ha, file = paste(filePrefix, 'r', i, '.Rdata', sep = ""))

Modified: pkg/R/multiModel.R
===================================================================
--- pkg/R/multiModel.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/multiModel.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -2,11 +2,11 @@
 # test the support for alternative models on simple and partitioned trees
 # currently only works on one tree; eventually should be modified so it runs on a set of trees, conditioned on those trees 
 #   that have the node of interest and returning percent of trees possessing that node as an additional value
-  paramHeader <- c('loglik', 'dof', 'sigma.squared', 'sqrt.alpha', 'theta', 'optimum', 'optimum.uptree', 'optimum.downtree')
+  paramHeader <- c('loglik', 'dof', 'sigma.squared', 'alpha', 'theta', 'optimum', 'optimum.uptree', 'optimum.downtree')
   paramsAll <- c('loglik', 'dof', 'sigma.squared')
   paramSets <- list(brown = c(paramsAll, 'theta'), 
-                  ou1 = c(paramsAll, 'sqrt.alpha', 'optimum'), 
-                  ou2 = c(paramsAll, 'sqrt.alpha', 'optimum.uptree', 'optimum.downtree')
+                  ou1 = c(paramsAll, 'alpha', 'optimum'), 
+                  ou2 = c(paramsAll, 'alpha', 'optimum.uptree', 'optimum.downtree')
                   )
   modelsAll = c('whole.ou2', 'whole.ou1', 'whole.brown', 'part.ou.uptree', 'part.ou.downtree', 'part.ou.summed', 'part.brown.uptree', 'part.brown.downtree', 'part.brown.summed')
   pSum <- c('loglik', 'dof') # parameters to sum for evaluating partitioned trees

Modified: pkg/R/ouSim.hansenBatch.R
===================================================================
--- pkg/R/ouSim.hansenBatch.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.hansenBatch.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -1,9 +1,9 @@
 ouSim.hansenSummary <- function(object, tree, treeNum = 1, rootState = NULL, ...) {
-## runs ouSim.ouchtree for a hansenBatch or hansenSummary object, using the model-averaged alpha, sigma.squared, and theta vector from one tree
+## runs ouSim.ouchtree for a hansenBatch or hansenSummary object, using the model-averaged sqrt.alpha, sigma.squared, and theta vector from one tree
   analysis <- object
   # if(class(analysis) == "hansenBatch") analysis <- summary(analysis)
   if(identical(rootState, NULL)) rootState <- analysis$thetaMatrix[treeNum, ][tree at root] # rootstate taken to be the optimum at the root
-  outdata <- ouSim(tree, rootState, alpha = analysis$modelAvgAlpha, variance = analysis$modelAvgSigmaSq, theta = analysis$thetaMatrix[treeNum, ], ...)
+  outdata <- ouSim(tree, rootState, sqrt.alpha = analysis$modelAvgAlpha, variance = analysis$modelAvgSigmaSq, theta = analysis$thetaMatrix[treeNum, ], ...)
   class(outdata) <- "ouSim"
   return(outdata)
 }
@@ -14,13 +14,13 @@
   analysis <- object
   su <- summary(analysis)
   if(length(analysis at regimes) > 1) warning("Theta is based on analysis at regimes[[1]]")
-  if(dim(su$alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
-  alpha <- as.vector(su$alpha)
+  if(dim(su$sqrt.alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
+  sqrt.alpha <- as.vector(su$sqrt.alpha)
   theta <- su$optima[[1]][analysis at regimes[[1]]]
   rootState <- theta[analysis at root] # rootstate taken to be the optimum at the root
   variance <- as.vector(su$sigma.squared)
   tree <- ouchtree(analysis at nodes, analysis at ancestors, analysis at times) 
-  outdata <- ouSim.ouchtree(tree, rootState, alpha, variance, theta, ...)
+  outdata <- ouSim.ouchtree(tree, rootState, sqrt.alpha, variance, theta, ...)
   outdata$colors <- analysis at regimes[[1]]
   class(outdata) <- "ouSim"
   return(outdata)
@@ -30,13 +30,13 @@
   analysis <- object
   su <- summary(analysis)
   if(length(analysis at regimes) > 1) warning("Theta is based on analysis at regimes[[1]]")
-  if(dim(su$alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
-  alpha <- 0
+  if(dim(su$sqrt.alpha)[1] != 1) stop("This is a one-character simulation; analysis appears to be based on > 1 character")
+  sqrt.alpha <- 0
   theta <- 0
   rootState <- su$theta[[1]]
   variance <- as.vector(su$sigma.squared)
   tree <- ouchtree(analysis at nodes, analysis at ancestors, analysis at times) 
-  outdata <- ouSim.ouchtree(tree, rootState, alpha, variance, theta, ...)
+  outdata <- ouSim.ouchtree(tree, rootState, sqrt.alpha, variance, theta, ...)
   outdata$colors <- analysis at regimes[[1]]
   class(outdata) <- "ouSim"
   return(outdata)

Modified: pkg/R/ouSim.ouchtree.R
===================================================================
--- pkg/R/ouSim.ouchtree.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.ouchtree.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -1,14 +1,14 @@
-ouSim.ouchtree <- function(object, rootState = 0, alpha = 0, variance = 1, theta = rootState, steps = 1000, ...) {
+ouSim.ouchtree <- function(object, rootState = 0, sqrt.alpha = 0, variance = 1, theta = rootState, steps = 1000, ...) {
 ## function to plot a simulated dataset under brownian motion or Ornstein-Uhlenbeck (OU) model
 ## Arguments:
 ##   object is an ouch-style (S4) tree
-##   alpha and theta are either single values or vectors of length (length(branchList))
+##   sqrt.alpha and theta are either single values or vectors of length (length(branchList))
 tree <- object
-message(paste("running sim with root =", rootState, ", alpha =", mean(alpha), ", var =", variance, "theta =", mean(theta)))
+message(paste("running sim with root =", rootState, ", sqrt.alpha =", mean(sqrt.alpha), ", var =", variance, "theta =", mean(theta)))
 
 	## embedded function---------------------
 	## could be released to the wild, but more arguments would need to be passed around
-	preorderOU <- function(branchList, tree, startNode, startState, alpha, theta) {
+	preorderOU <- function(branchList, tree, startNode, startState, sqrt.alpha, theta) {
 	  ## Recursive function to generate the data under a Brownian motion or OU model
 	  ## modified for ouchtree (s4) Dec 08
 	  ## branch times back from each tip are in tree at epochs, indexed by tip number
@@ -21,24 +21,24 @@
 	  else {
 	    for (brStep in 1:length(workingBranch)) {
 	      workingBranch[brStep] <- 
-	        startState + workingBranch[brStep] + alpha[startBranch] / steps * (theta[startBranch] - startState) # denom was mult'd by steps... should be? 
+	        startState + workingBranch[brStep] + sqrt.alpha[startBranch] / steps * (theta[startBranch] - startState) # denom was mult'd by steps... should be? 
 	      startState <- workingBranch[brStep] 
 	      }
 	    branchList[[startBranch]] <- workingBranch
 	    endState <- branchList[[startBranch]][length(branchList[[startBranch]])]
 	    }	  
 	  if(!identical(as.integer(daughterBranches), integer(0))) {
-	    for(i in daughterBranches) branchList <- preorderOU(branchList, tree, i, endState, alpha, theta) } 
+	    for(i in daughterBranches) branchList <- preorderOU(branchList, tree, i, endState, sqrt.alpha, theta) } 
 	  return(branchList) 
 	}  
 	## --------------------------------------
 
   ## 1. initialize
-  if(length(alpha) == 1) alpha <- rep(alpha, tree at nnodes)
+  if(length(sqrt.alpha) == 1) sqrt.alpha <- rep(sqrt.alpha, tree at nnodes)
   if(length(theta) == 1) theta <- rep(theta, tree at nnodes)
   brLengths <- c(0, unlist(lapply(2:tree at nnodes, branchLength, tree = tree))) # assumes first node is root; this should be relaxed
   names(brLengths) <- tree at nodes # branches are indexed by end node
-  names(alpha) <- tree at nodes
+  names(sqrt.alpha) <- tree at nodes
   names(theta) <- tree at nodes
 
   ## 2. The following creates a list of random draws from the normal distribution, with standard deviation scaled by total 
@@ -60,10 +60,10 @@
 
   ## 3. traverse
   for(i in which(tree at ancestors == tree at root)) { ## calls preorderOU for each descendent from the root.
-    branchList <- preorderOU(branchList, tree, tree at nodes[i], rootState, alpha, theta) 
+    branchList <- preorderOU(branchList, tree, tree at nodes[i], rootState, sqrt.alpha, theta) 
     }
 		
-  value <- list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, alpha = alpha, variance = variance, theta = theta))
+  value <- list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, sqrt.alpha = sqrt.alpha, variance = variance, theta = theta))
   class(value) <- "ouSim"
   return(value)
 }

Modified: pkg/R/ouSim.phylo.R
===================================================================
--- pkg/R/ouSim.phylo.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/ouSim.phylo.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -1,19 +1,19 @@
-ouSim.phylo <- function(object, rootState = 0, shiftBranches = NULL, shiftStates = NULL, alpha = 0, variance = 1, theta = rootState, model = "OU", branchMeans = NULL, steps = 1000, ...) {
+ouSim.phylo <- function(object, rootState = 0, shiftBranches = NULL, shiftStates = NULL, sqrt.alpha = 0, variance = 1, theta = rootState, model = "OU", branchMeans = NULL, steps = 1000, ...) {
 ## function to plot a simulated dataset under brownian motion or Ornstein-Uhlenbeck (OU) model
 ## Arguments:
 ##   phy is an ape-style tree
-##   alpha and theta are either single values or vectors of length (length(branchList))
+##   sqrt.alpha and theta are either single values or vectors of length (length(branchList))
 ##   shiftBranches is a vector indicating any branches at which an OU or brownian motion model has a determined shift in ancestral state
 ##   shiftStates is a vector of length = length(shiftBranches) indicaing the ancestral states for the determined break points
 ## Models:
 ##  "OU" is a brownian motion or OU model 
 ##  "meanVar" is a model in which the only phylogenetic effect is the mean and variance for a given branch
 ## Andrew Hipp (ahipp at mortonarb.org), January 2008 
-## July 2008: modified to accomodate a vector of alpha and theta corresponding to branches
+## July 2008: modified to accomodate a vector of sqrt.alpha and theta corresponding to branches
 ## Dec 2008: This function I'm leaving as is for the time being and just letting the phylo method be as raw as always.
 ##           New developments will be in the ouchtree, brown, hansen, and hansenBatch methods
 phy <- object
-preorderOU <- function(branchList, phy, startNode, startState, alpha, theta) {
+preorderOU <- function(branchList, phy, startNode, startState, sqrt.alpha, theta) {
 ## Recursive function to generate the data under a Brownian motion or OU model (not needed in the Platt model)
   startBranch = which(phy$edge[,2] == startNode)
   if(!identical(shiftStates, NULL)) {
@@ -21,16 +21,16 @@
   message(paste('Working on branch',startBranch,'with starting state',startState))
   branchList[[startBranch]][1] <- startState
   for (i in 2:length(branchList[[startBranch]])) {
-    branchList[[startBranch]][i] <- branchList[[startBranch]][i - 1] + branchList[[startBranch]][i] + alpha[startBranch] / steps * (theta[startBranch] - branchList[[startBranch]][i - 1]) }
+    branchList[[startBranch]][i] <- branchList[[startBranch]][i - 1] + branchList[[startBranch]][i] + sqrt.alpha[startBranch] / steps * (theta[startBranch] - branchList[[startBranch]][i - 1]) }
   endState = branchList[[startBranch]][length(branchList[[startBranch]])]
   daughterBranches <- phy$edge[which(phy$edge[, 1] == startNode), 2]
   if(!identical(as.integer(daughterBranches), integer(0))) {
-    for(i in daughterBranches) branchList <- preorderOU(branchList, phy, i, endState, alpha, theta) }
+    for(i in daughterBranches) branchList <- preorderOU(branchList, phy, i, endState, sqrt.alpha, theta) }
   return(branchList) }  
 
 ## 1. initialize
 
-if(length(alpha) == 1) alpha <- rep(alpha, length(phy$edge.length))
+if(length(sqrt.alpha) == 1) sqrt.alpha <- rep(sqrt.alpha, length(phy$edge.length))
 if(length(theta) == 1) theta <- rep(theta, length(phy$edge.length))
 ## The following creates a list of random draws from the normal distribution, with standard deviation scaled by total tree length and the number of draws for each branch equal to the number of steps in that branch. If there is a separate variance for each branch, I assume the variance is expressed in tree-length units, not branch-length units, so the scaling is the same for all branches (viz., sd = sqrt(variance / steps))
 if(model == "OU") {
@@ -58,10 +58,10 @@
 ## 3. traverse
 if(model == "OU") {
 	for(i in which(phy$edge[, 1] == rootNode)) {
-	  branchList <- preorderOU(branchList, phy, phy$edge[i,2], rootState, alpha, theta) }}
+	  branchList <- preorderOU(branchList, phy, phy$edge[i,2], rootState, sqrt.alpha, theta) }}
 if(model == "meanVar") branchList <- branchList
 
-value <- (list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, alpha = alpha, variance = variance, theta = theta))) 
+value <- (list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, sqrt.alpha = sqrt.alpha, variance = variance, theta = theta))) 
 class(value) <- "ouSim"
 return(value)
 }
\ No newline at end of file

Modified: pkg/R/summarizingAnalyses.R
===================================================================
--- pkg/R/summarizingAnalyses.R	2011-03-22 16:53:41 UTC (rev 251)
+++ pkg/R/summarizingAnalyses.R	2012-05-02 21:34:37 UTC (rev 252)
@@ -17,7 +17,7 @@
   nnodes <- length(nodeSums) # number of nodes being studied
   nodes <- dimnames(hansenBatch$regMatrix$overall)[[2]] # grab the overall regMatrix, which includes all possible nodes
   sigmaSqVector <- numeric(ntrees) # vector to capture model-averaged sigma^2 for each tree
-  alphaVector <- numeric(ntrees) # vector to capture model-averaged alpha for each tree
+  alphaVector <- numeric(ntrees) # vector to capture model-averaged sqrt.alpha for each tree # s/b sqrt.alpha
   modelsMatrix <- vector('list', ntrees) # list of matrices, indexed by tree, holding the weight for each model
   matrixRows <- c('AICwi', 'AICcwi', 'BICwi') # rows in the matrix
   nodeWeightsSummed <- matrix(0, nrow = length(matrixRows), ncol = nnodes, dimnames = list(matrixRows, nodes)) # holds node weights summed; zero-filled b/c it is a sum?



More information about the Mattice-commits mailing list