[adegenet-commits] r628 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu May 13 13:28:55 CEST 2010
Author: jombart
Date: 2010-05-13 13:28:55 +0200 (Thu, 13 May 2010)
New Revision: 628
Modified:
pkg/R/haploGen.R
pkg/man/haploGen.Rd
Log:
Getting there...
Modified: pkg/R/haploGen.R
===================================================================
--- pkg/R/haploGen.R 2010-05-13 09:04:02 UTC (rev 627)
+++ pkg/R/haploGen.R 2010-05-13 11:28:55 UTC (rev 628)
@@ -8,7 +8,7 @@
## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
##
-haploGen <- function(seq.length=1000, mu=0.0001, t.max=50,
+haploGen <- function(seq.length=1000, mu=0.0001, t.max=20,
gen.time=function(){round(rnorm(1,5,1))},
repro=function(){round(rnorm(1,2,1))}, max.nb.haplo=1e3,
geo.sim=TRUE, grid.size=5, lambda.xy=0.5,
@@ -60,13 +60,14 @@
}
## duplicate a sequence (including possible mutations)
- seq.dupli <- function(seq){
- toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu()))
- res <- seq
+ seq.dupli <- function(seq, T){ # T is the number of time units between ancestor and decendent
+ ## toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu())) # can be faster
+ temp <- rbinom(n=1, size=seq.length*T, prob=mu()) # total number of mutations
+ toChange <- sample(1:seq.length, size=temp, replace=FALSE)
if(sum(toChange)>0) {
- res[toChange] <- substi(res[toChange])
+ seq[toChange] <- substi(seq[toChange])
}
- return(res)
+ return(seq)
}
## what is the name of the new sequences?
@@ -173,7 +174,7 @@
newDates <- newDates[newDates <= t.max] # don't store future sequences
nbDes <- length(newDates)
if(nbDes==0) return(NULL) # stop if no suitable date
- newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+ newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq, newDates[i]-date)) # generate new sequences
class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
@@ -194,7 +195,7 @@
newDates <- newDates[newDates <= t.max] # don't store future sequences
nbDes <- length(newDates)
if(nbDes==0) return(NULL) # stop if no suitable date
- newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+ newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq, newDates[i]-date)) # generate new sequences
class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
@@ -231,6 +232,7 @@
res$id <- as.character(1:length(res$ances))
res$ances <- as.character(res$ances)
names(res$dates) <- rownames(res$seq)
+ res$call <- match.call()
class(res) <- "haploGen"
return(res)
Modified: pkg/man/haploGen.Rd
===================================================================
--- pkg/man/haploGen.Rd 2010-05-13 09:04:02 UTC (rev 627)
+++ pkg/man/haploGen.Rd 2010-05-13 11:28:55 UTC (rev 628)
@@ -13,33 +13,46 @@
\alias{sample.haploGen}
%\alias{as,haploGen,graphNEL-method}
\alias{coerce,haploGen,graphNEL-method}
-\title{Simulation of populations of haplotypes}
+\title{Simulation of genealogies of haplotypes}
\description{
- Important: these functions are parts of a publication currently under
- review. They will be documented once accepted for publication.
- Please email the author if you are interested in using it.
+ The function \code{haploGen} implements simulations of genealogies of
+ haplotypes. This forward-time, individual-based simulation tool allow
+ haplotypes to replicate and mutate according to specified parameters,
+ and keeps track of entire genealogies.
+
+ Simulations can be spatially explicit or not (see \code{geo.sim}
+ argument). In the first case, haplotypes are assigned to locations on
+ a regular grip. New haplotypes disperse from their ancestor's location
+ according to a random Poisson diffusion, or alternatively according to
+ a pre-specified migration scheme. This tool does not allow for
+ simulating selection or linkage disequilibrium.
+
+ Produced objects are lists with the class \code{haploGen}; see 'value'
+ section for more information on this class. Other functions are
+ available to print, plot, subset, sample or convert \code{haploGen}
+ objects. A seqTrack method is also provided for analysing
+ \code{haploGen} objects.
}
\usage{
-haploGen(seq.length=1000, mu=0.0001, t.max=50,
- gen.time=function(){round(rnorm(1,5,1))},
- repro=function(){round(rnorm(1,2,1))}, max.nb.haplo=1e3,
- geo.sim=TRUE, grid.size=5, lambda.xy=0.5,
- mat.connect=NULL,
- ini.n=1, ini.xy=NULL)
+haploGen(seq.length=1000, mu=0.0001, t.max=20,
+ gen.time=function(){round(rnorm(1,5,1))},
+ repro=function(){round(rnorm(1,2,1))}, max.nb.haplo=1e3,
+ geo.sim=TRUE, grid.size=5, lambda.xy=0.5,
+ mat.connect=NULL, ini.n=1, ini.xy=NULL)
-print.haploGen(x, ...)
+print.haploGen(x, \ldots)
\method{[}{haploGen}(x, i, j, drop=FALSE)
-\method{labels}{haploGen}(object, ...)
+\method{labels}{haploGen}(object, \ldots)
-as.POSIXct.haploGen(x, tz="", origin=as.POSIXct("2000/01/01"), ...)
+as.POSIXct.haploGen(x, tz="", origin=as.POSIXct("2000/01/01"), \ldots)
-\method{seqTrack}{haploGen}(x, best=c("min","max"), prox.mat=NULL, ...)
+\method{seqTrack}{haploGen}(x, best=c("min","max"), prox.mat=NULL, \ldots)
as.seqTrack.haploGen(x)
-plotHaploGen(x, annot=FALSE, date.range=NULL, col=NULL, bg="grey", add=FALSE, ...)
+plotHaploGen(x, annot=FALSE, date.range=NULL, col=NULL, bg="grey", add=FALSE, \ldots)
sample.haploGen(x, n)
@@ -47,24 +60,105 @@
}
\arguments{
- \item{seq.length}{}
- \item{mu}{}
- \item{t.max}{}
- \item{gen.time}{}
- \item{repro}{}
- \item{max.nb.haplo}{}
- \item{geo.sim}{}
- \item{grid.size}{}
- \item{lambda.xy}{}
- \item{mat.connect}{}
- \item{ini.n}{}
- \item{ini.xy}{}
- \item{x,object}{}
- \item{i,j}{}
- \item{best}{}
- \item{prox.mat}{}
- \item{annot,date.range,col,bg,add}{}
- \item{n}{}
-\item{from,to}{}
+ \item{seq.length}{an integer indicating the length of the simulated
+haplotypes, in number of nucleotides.}
+ \item{mu}{the mutation rate, in number of mutation per site and per
+time unit. Can be a (fixed) number or a function returning a number
+(then called for each replication event).}
+ \item{t.max}{an integer indicating the maximum number of time units to
+ run the simulation for.}
+ \item{gen.time}{an integer indicating the generation time, in number
+of time units. Can be a (fixed) number or a function returning a number
+(then called for each reproduction event).}
+ \item{repro}{an integer indicating the number of descendents per
+haplotype. Can be a (fixed) number or a function returning a number
+(then called for each reproduction event).}
+ \item{max.nb.haplo}{an integer indicating the maximum number of
+haplotypes handled at any time of the simulation, used to control the
+size of the produced object. Larger number will lead to slower
+simulations. If this number is exceeded, the genealogy is prunded to as
+to keep this number of haplotypes.}
+ \item{geo.sim}{a logical stating whether simulations should be
+spatially explicit (TRUE, default) or not (FALSE). Spatially-explicit
+simulations are slightly slower than their non-spatial counterpart.}
+ \item{grid.size}{the size of the square grid of possible locations for
+spatial simulations. The total number of locations will be this number
+squared.}
+ \item{lambda.xy}{the parameter of the Poisson distribution used to
+determine dispersion in x and y axes.}
+ \item{mat.connect}{a matrix of connectivity describing migration
+amongts all pairs of locations. \code{mat.connect[i,j]} indicates the
+probability, being in 'i', to migrate to 'j'. The rows of this matrix
+thus sum to 1. It has as many rows and columns as there are locations,
+with row 'i' / column 'j' corresponding to locations number 'i' and 'j'.
+Locations are numbered as in a matrix in which rows and columns are
+respectively x and y coordinates. For instance, in a 5x5 grid, locations
+are numbered as in \code{matrix(1:25,5,5)}.
+}
+ \item{ini.n}{an integer specifying the number of (identical)
+haplotypes to initiate the simulation}
+ \item{ini.xy}{a vector of two integers giving the x/y coordinates of the initial haplotype.}
+ \item{x,object}{\code{haploGen} objects.}
+ \item{i,j, drop}{\code{i} is a vector used for subsetting the object. For
+instance, \code{i=1:3} will retain only the first three haplotypes of the
+genealogy. \code{j} and \code{drop} are only provided for compatibility,
+but not used.}
+ \item{best, prox.mat}{arguments to be passed to the
+\code{\link{seqTrack}} function. See documentation of
+\code{\link{seqTrack}} for more information.}
+ \item{annot,date.range,col,bg,add}{arguments to be passed to \code{\link{plotSeqTrack}}.}
+ \item{n}{an integer indicating the number of haplotypes to be retained
+in the sample}
+ \item{from,to}{arguments of the conversion function, for converting a
+\code{haploGen} object into a \linkS4class{graphNEL}.}
}
\author{Thibaut Jombart \email{t.jombart at imperial.ac.uk}}
+\references{
+Jombart T, Eggo R, Dodd P, Balloux F (accepted) Reconstructing disease
+outbreaks from genetic data: a graph approach. Heredity.
+}
+\value{
+=== haploGen class ===\cr
+\code{haploGen} objects are lists containing the following slots:\cr
+- seq: DNA sequences in the DNAbin matrix format\cr
+- dates: dates of appearance of the haplotypes\cr
+- ances: a vector of integers giving the index of each haplotype's
+ancestor\cr
+- id: a vector of integers giving the index of each haplotype\cr
+- xy: (optional) a matrix of spatial coordinates of haplotypes\cr
+- call: the matched call
+
+
+=== misc functions ===\cr
+- as.POSIXct: returns a vector of dates with POSIXct format\cr
+- labels: returns the labels of the haplotypes\cr
+- as.seqTrack: returns a seqTrack object. Note that this object is not a
+proper seqTrack analysis, but just a format conversion convenient for
+plotting \code{haploGen} objects.
+}
+\details{
+=== Dependences with other packages ===\cr
+- ape package is required as it implements efficient handling of DNA
+sequences used in \code{haploGen} objects. To install this package,
+simply type:\cr
+\code{install.packages("ape")}
+
+- for various purposes including plotting, converting genealogies to
+graphs (\linkS4class{graphNEL} class) can be useful. This requires the
+packages graph, and possibly Rgraphviz for plotting. These packages are
+not on CRAN, but on Bioconductor. To install them, use:\cr
+source("http://bioconductor.org/biocLite.R")\cr
+biocLite("graph")\cr
+biocLite("Rgraphviz")
+
+See the respective vignettes for more information on using these packages.
+
+
+ === Converting haploGen objects to graphs ===\cr
+ \code{haploGen} objects can be converted to \linkS4class{graphNEL}
+ objects, which can in turn be plotted and manipulated using classical
+ graph tools. Simply use 'as(x, "graphNEL")' where 'x' is a
+ \code{haploGen} object. This functionality requires the \code{graph}
+ package (see 'details').
+}
+
More information about the adegenet-commits
mailing list