[Mattice-commits] r71 - in pkg: . R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Dec 3 17:23:13 CET 2008
Author: andrew_hipp
Date: 2008-12-03 17:23:13 +0100 (Wed, 03 Dec 2008)
New Revision: 71
Added:
pkg/R/ouSimHead.R
pkg/R/ouchSim.R
Modified:
pkg/TODO
Log:
beginning to fix up simulation and split off functions for different object classes
Added: pkg/R/ouSimHead.R
===================================================================
--- pkg/R/ouSimHead.R (rev 0)
+++ pkg/R/ouSimHead.R 2008-12-03 16:23:13 UTC (rev 71)
@@ -0,0 +1,10 @@
+ouSim <- function(tree, ...) {
+ switch(class(tree),
+ phylo = ouSim.phylo(tree, ...),
+ ouchtree = ouSim.ouchtree(tree, ...),
+ brown = ouSim.brown(tree, ...),
+ hansen = ouSim.hansen(tree, ...),
+ batchHansen = ouSim.batchHansen(tree, ...),
+ "Unrecognized tree class"
+ )
+}
\ No newline at end of file
Added: pkg/R/ouchSim.R
===================================================================
--- pkg/R/ouchSim.R (rev 0)
+++ pkg/R/ouchSim.R 2008-12-03 16:23:13 UTC (rev 71)
@@ -0,0 +1,73 @@
+ouchSim <- function(tree, rootState = 0, shiftBranches = NULL, shiftStates = NULL, alpha = 0, variance = 1, theta = rootState, steps = 1000) {
+## function to plot a simulated dataset under brownian motion or Ornstein-Uhlenbeck (OU) model
+## Arguments:
+## tree is an ouch-style tree
+## alpha and theta are either single values or vectors of length (length(branchList))
+## shiftBranches is a vector indicating any branches at which an OU or brownian motion model has a determined shift in ancestral state
+## shiftStates is a vector of length = length(shiftBranches) indicaing the ancestral states for the determined break points
+## Models:
+## "OU" is a brownian motion or OU model
+## "platt" is the Platt-Molitor model, in which the only phylogenetic effect is the mean and variance for a given branch
+## Andrew Hipp (ahipp at mortonarb.org), January 2008
+## July 2008: modified to accomodate a vector of alpha and theta corresponding to branches
+
+preorderOU <- function(branchList, phy, startNode, startState, alpha, theta) {
+## Recursive function to generate the data under a Brownian motion or OU model (not needed in the Platt model)
+ startBranch = which(phy$edge[,2] == startNode)
+ if(!identical(shiftStates, NULL)) {
+ if(startBranch %in% shiftBranches) startState <- shiftStates[match(startBranch, shiftBranches)] }
+ message(paste('Working on branch',startBranch,'with starting state',startState))
+ branchList[[startBranch]][1] <- startState
+ for (i in 2:length(branchList[[startBranch]])) {
+ branchList[[startBranch]][i] <- branchList[[startBranch]][i - 1] + branchList[[startBranch]][i] + alpha[startBranch] / steps * (theta[startBranch] - branchList[[startBranch]][i - 1]) }
+ endState = branchList[[startBranch]][length(branchList[[startBranch]])]
+ daughterBranches <- phy$edge[which(phy$edge[, 1] == startNode), 2]
+ if(!identical(as.integer(daughterBranches), integer(0))) {
+ for(i in daughterBranches) branchList <- preorderOU(branchList, phy, i, endState, alpha, theta) }
+ return(branchList) }
+
+## 1. initialize
+
+if(length(alpha) == 1) alpha <- rep(alpha, length(phy$edge.length))
+if(length(theta) == 1) theta <- rep(theta, length(phy$edge.length))
+## The following creates a list of random draws from the normal distribution, with standard deviation scaled by total tree length and the number of draws for each branch equal to the number of steps in that branch. If there is a separate variance for each branch, I assume the variance is expressed in tree-length units, not branch-length units, so the scaling is the same for all branches (viz., sd = sqrt(variance / steps))
+if(model == "OU") {
+ if(length(variance) == 1) branchList <- lapply(round(phy$edge.length*steps), rnorm, sd = sqrt(variance/steps))
+ else {
+ branchList <- vector("list", length(phy$edge.length))
+ brLengths <- round(phy$edge.length*steps)
+ brSD <- sqrt(variance/steps)
+ for(i in seq(length(brLengths))) branchList[[i]] <- rnorm(n = brLengths[i], mean = 0, sd = brSD[i]) }}
+if(model == "platt") {
+ if(length(variance) == 1) variance <- rep(variance, length(phy$edge.length))
+ branchList <- vector("list", length(phy$edge.length))
+ if(length(branchMeans) == 1) branchMeans <- rep(branchMeans, length(phy$edge))
+ brLengths <- round(phy$edge.length*steps)
+ brSD <- sqrt(variance)
+ for(i in seq(length(brLengths))) branchList[[i]] <- rnorm(n = brLengths[i], mean = branchMeans[i], sd = brSD[i])
+ }
+timesList <- lapply(branchList, seq)
+rootNode = length(phy$tip.label) + 1
+
+startTimes <- max(branching.times(phy)) - branching.times(phy) ## WORKS, BUT ASSUMES ULTRAMETRICITY
+startTimes <- round(startTimes[match(phy$edge[,1], names(startTimes))] * steps)
+for (i in 1:length(timesList)) timesList[[i]] <- timesList[[i]] + startTimes[i]
+
+## 3. traverse
+if(model == "OU") {
+ for(i in which(phy$edge[, 1] == rootNode)) {
+ branchList <- preorderOU(branchList, phy, phy$edge[i,2], rootState, alpha, theta) }}
+if(model == "platt") branchList <- branchList
+
+value <- (list(branchList = branchList, timesList = timesList, steps = steps, parameters = list(rootState = rootState, alpha = alpha, variance = variance, theta = theta)))
+class(value) <- "ouSim"
+return(value)}
+
+plot.ouSim <- function(ouSim, nodeColor = "blue", nodeDotSize = 1.4, colors = rep("black", length(ouSim$branchList)), ...) {
+## Plot the data by calling plot(ouSimObject)
+## To plot different clades, set the colors vector according to the branches in the original
+## only passes the ... along to lines
+ branches = length(ouSim$branchList)
+ plot(1:ouSim$steps, ylim = range(unlist(ouSim$branchList)), type = "n", ylab = "Trait value", xlab = "Time")
+ for(i in 1:branches) lines(ouSim$timesList[[i]], ouSim$branchList[[i]], col = colors[i], ...)
+ for(i in 1:branches) points(ouSim$timesList[[i]][1], ouSim$branchList[[i]][1], pch = 19, col = nodeColor, cex = nodeDotSize) }
Modified: pkg/TODO
===================================================================
--- pkg/TODO 2008-12-03 16:22:39 UTC (rev 70)
+++ pkg/TODO 2008-12-03 16:23:13 UTC (rev 71)
@@ -1 +1,2 @@
-make allPossibleRegimes more efficient when maxNodes < length(nodes)
\ No newline at end of file
+1. make allPossibleRegimes more efficient when maxNodes < length(nodes)
+2. is the k-matrix a sensible statistic? fix it so it works on the revised batchHansen
\ No newline at end of file
More information about the Mattice-commits
mailing list