[adegenet-commits] r570 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Feb 17 17:58:58 CET 2010
Author: jombart
Date: 2010-02-17 17:58:58 +0100 (Wed, 17 Feb 2010)
New Revision: 570
Added:
pkg/R/haploGen.R
Removed:
pkg/R/haploSim.R
Log:
renamed file
Copied: pkg/R/haploGen.R (from rev 569, pkg/R/haploSim.R)
===================================================================
--- pkg/R/haploGen.R (rev 0)
+++ pkg/R/haploGen.R 2010-02-17 16:58:58 UTC (rev 570)
@@ -0,0 +1,599 @@
+############
+## haploGen
+############
+##
+## N: number of sequences to simulate
+## mu: mutation rate per nucleotid per year
+## Tmax: periode of time to simulate
+## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
+## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
+##
+haploGen <- function(seq.length=1000, mu=0.0001,
+ Tmax=50, mean.gen.time=5, sd.gen.time=1,
+ mean.repro=2, sd.repro=1, max.nb.haplo=1e3,
+ geo.sim=TRUE, grid.size=5, lambda.xy=0.5, matConnect=NULL,
+ ini.n=1, ini.xy=NULL){
+
+ ## CHECKS ##
+ if(!require(ape)) stop("The ape package is required.")
+
+
+ ## GENERAL VARIABLES ##
+ NUCL <- as.DNAbin(c("a","t","c","g"))
+ res <- list(seq=as.matrix(as.DNAbin(character(0))), dates=integer(), ances=character())
+ toExpand <- logical()
+ mu <- mu/365 # mutation rate by day
+ myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
+
+
+ ## AUXILIARY FUNCTIONS ##
+ ## generate sequence from scratch
+ seq.gen <- function(){
+ res <- list(sample(NUCL, size=seq.length, replace=TRUE))
+ class(res) <- "DNAbin"
+ return(res)
+ }
+
+ ## create substitutions for defined SNPs
+ substi <- function(snp){
+ res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
+ class(res) <- "DNAbin"
+ return(res)
+ }
+
+ ## duplicate a sequence (including possible mutations)
+ seq.dupli <- function(seq){
+ toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
+ res <- seq
+ if(sum(toChange)>0) {
+ res[toChange] <- substi(res[toChange])
+ }
+ return(res)
+ }
+
+ ## what is the name of the new sequences?
+ seqname.gen <- function(nb.new.seq){
+ res <- max(as.integer(rownames(res$seq)), 0) + 1:nb.new.seq
+ return(as.character(res))
+ }
+
+ ## how many days before duplication occurs ?
+ time.dupli <- function(){
+ res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
+ res[res<0] <- 0
+ return(res)
+ }
+
+ ## when duplication occurs?
+ date.dupli <- function(curDate){
+ res <- curDate + time.dupli()
+ return(res)
+ }
+
+ ## how many duplication/transmission occur?
+ nb.desc <- function(){
+ res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
+ res[res<0] <- 0
+ return(res)
+ }
+
+ ## where does an haplotype emerges in the first place?
+ xy.gen <- function(){
+ return(sample(1:grid.size, size=2, replace=TRUE))
+ }
+
+ ## where does a transmission occur (destination)?
+ if(is.null(matConnect)){ # use universal lambda param
+ xy.dupli <- function(cur.xy, nbLoc){
+ mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
+ res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
+ res[res < 1] <- 1
+ res[res > grid.size] <- grid.size
+ return(res)
+ }
+ } else { # use location-dependent proba of dispersal between locations
+ if(any(matConnect < 0)) stop("Negative values in matConnect (probabilities expected!)")
+ matConnect <- prop.table(matConnect,1)
+ xy.dupli <- function(cur.xy, nbLoc){
+ ## lambda.xy <- matConnect[cur.xy[1] , cur.xy[2]]
+ ## mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
+ ## res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
+ idxAncesLoc <- myGrid[cur.xy[1], cur.xy[2]]
+ newLoc <- sample(1:grid.size^2, size=nbLoc, prob=matConnect[idxAncesLoc,], replace=TRUE) # get new locations
+ res <- cbind(row(myGrid)[newLoc] , col(myGrid)[newLoc]) # get coords of new locations
+ return(res)
+ }
+ }
+
+
+ ## check result size and resize it if needed
+ resize.result <- function(){
+ curSize <- length(res$dates)
+ if(curSize <= max.nb.haplo) return(NULL)
+ toKeep <- rep(FALSE, curSize)
+ toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
+ removed.strains <- rownames(res$seq)[!toKeep]
+ res$seq <<- res$seq[toKeep,]
+ res$dates <<- res$dates[toKeep]
+ res$ances <<- res$ances[toKeep]
+ toExpand <<- toExpand[toKeep]
+ temp <- as.character(res$ances) %in% removed.strains
+ if(any(temp)) {
+ res$ances[temp] <<- NA
+ }
+
+ return(NULL)
+ }
+
+ ## check result size and resize it if needed - spatial version
+ resize.result.xy <- function(){
+ curSize <- length(res$dates)
+ if(curSize <= max.nb.haplo) return(NULL)
+ toKeep <- rep(FALSE, curSize)
+ toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
+ removed.strains <- rownames(res$seq)[!toKeep]
+ res$seq <<- res$seq[toKeep,]
+ res$dates <<- res$dates[toKeep]
+ res$ances <<- res$ances[toKeep]
+ res$xy <<- res$xy[toKeep,,drop=FALSE]
+ toExpand <<- toExpand[toKeep]
+ temp <- as.character(res$ances) %in% removed.strains
+ if(any(temp)) {
+ res$ances[temp] <<- NA
+ }
+
+ return(NULL)
+ }
+
+
+
+ ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
+ expand.one.strain <- function(seq, date, idx){
+ toExpand[idx] <<- FALSE # this one is no longer to expand
+ nbDes <- nb.desc()
+ if(nbDes==0) return(NULL) # stop if no descendant
+ newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+ newDates <- newDates[newDates <= Tmax] # don't store future sequences
+ nbDes <- length(newDates)
+ if(nbDes==0) return(NULL) # stop if no suitable date
+ newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+ class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
+ newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
+ rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
+ res$seq <<- rbind(res$seq, newSeq) # append to general output
+ res$dates <<- c(res$dates, newDates) # append to general output
+ res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
+ toExpand <<- c(toExpand, rep(TRUE, nbDes))
+ return(NULL)
+ }
+
+
+ ## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
+ expand.one.strain.xy <- function(seq, date, idx, cur.xy){
+ toExpand[idx] <<- FALSE # this one is no longer to expand
+ nbDes <- nb.desc()
+ if(nbDes==0) return(NULL) # stop if no descendant
+ newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+ newDates <- newDates[newDates <= Tmax] # don't store future sequences
+ nbDes <- length(newDates)
+ if(nbDes==0) return(NULL) # stop if no suitable date
+ newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+ class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
+ newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
+ rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
+ res$seq <<- rbind(res$seq, newSeq) # append to general output
+ res$dates <<- c(res$dates, newDates) # append to general output
+ res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
+ res$xy <<- rbind(res$xy, xy.dupli(cur.xy, nbDes))
+ toExpand <<- c(toExpand, rep(TRUE, nbDes))
+ return(NULL)
+ }
+
+
+
+
+
+ ## PERFORM SIMULATIONS - NON SPATIAL CASE ##
+ if(!geo.sim){
+ ## initialization
+ res$seq <- as.matrix(seq.gen())
+ res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
+ class(res$seq) <- "DNAbin"
+ rownames(res$seq) <- 1:ini.n
+ res$dates[1:ini.n] <- rep(0,ini.n)
+ res$ances[1:ini.n] <- rep(NA,ini.n)
+ toExpand <- rep(TRUE,ini.n)
+
+ ## simulations: isn't simplicity beautiful?
+ while(any(toExpand)){
+ idx <- min(which(toExpand))
+ expand.one.strain(res$seq[idx,], res$dates[idx], idx)
+ resize.result()
+ }
+
+
+ ## SHAPE AND RETURN OUTPUT ##
+ res$ances <- as.character(res$ances)
+ names(res$dates) <- rownames(res$seq)
+ class(res) <- "haploGen"
+ return(res)
+
+ } # END NON-SPATIAL SIMULATIONS
+
+
+
+
+ ## PERFORM SIMULATIONS - SPATIAL CASE ##
+ if(geo.sim){
+ ## some checks
+ if(!is.null(matConnect)) {
+ if(nrow(matConnect) != ncol(matConnect)) stop("matConnect is not a square matrix")
+ if(nrow(matConnect) != grid.size^2) stop("dimension of matConnect does not match grid size")
+ }
+
+ ## initialization
+ res$seq <- as.matrix(seq.gen())
+ res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
+ class(res$seq) <- "DNAbin"
+ rownames(res$seq) <- 1:ini.n
+ res$dates[1:ini.n] <- rep(0,ini.n)
+ res$ances[1:ini.n] <- rep(NA,ini.n)
+ toExpand <- rep(TRUE,ini.n)
+
+ if(is.null(ini.xy)){
+ locStart <- xy.gen()
+ } else{
+ locStart <- as.vector(ini.xy)[1:2]
+ }
+ res$xy <- matrix(rep(locStart, ini.n), byrow=TRUE, nrow=ini.n)
+ colnames(res$xy) <- c("x","y")
+
+ ##cat("nb.strains","iteration.time",file="haploGenTime.out") # for debugging
+
+
+ ## simulations: isn't simplicity beautiful?
+ while(any(toExpand)){
+ ##time.previous <- Sys.time() # FOR DEBUGGING
+ idx <- min(which(toExpand))
+ expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
+ resize.result.xy()
+ ## VERBOSE OUTPUT FOR DEBUGGING ##
+ ## cat("\nNb strains:",length(res$ances),"/",max.nb.haplo)
+ ## cat("\nLatest date:", max(res$dates),"/",Tmax)
+ ## cat("\nRemaining strains to duplicate", sum(toExpand))
+ ## cat("\n",append=TRUE,file="haploGenTime.out")
+ ## iter.time <- as.numeric(difftime(Sys.time(),time.previous,unit="sec"))
+ ## time.previous <- Sys.time()
+ ## cat(c(length(res$ances), iter.time),append=
+ ##TRUE,file="haploGenTime.out")
+ ## END DEBUGGING VERBOSE ##
+ }
+
+ ## VERBOSE OUTPUT FOR DEBUGGING ##
+ ##cat("\nSimulation time stored in haploGenTime.out\n")
+
+ ## SHAPE AND RETURN OUTPUT ##
+ res$ances <- as.character(res$ances)
+ names(res$dates) <- rownames(res$seq)
+
+ class(res) <- "haploGen"
+ res$call <- match.call()
+ return(res)
+
+ } # end SPATIAL SIMULATIONS
+
+
+} # end haploGen
+
+
+
+
+
+
+
+
+##################
+## print.haploGen
+##################
+print.haploGen <- function(x, ...){
+
+ cat("\t\n========================")
+ cat("\t\n= simulated haplotypes =")
+ cat("\t\n= (haploGen object) =")
+ cat("\t\n========================\n")
+
+ cat("\nSize :", length(x$ances),"haplotypes")
+ cat("\nHaplotype length :", ncol(x$seq),"nucleotids")
+ cat("\nProportion of NA ancestors :", signif(mean(is.na(x$ances)),5))
+ cat("\nNumber of known ancestors :", sum(!is.na(x$ances)))
+ nbAncInSamp <- sum(x$ances %in% labels(x))
+ cat("\nNumber of ancestors within the sample :", nbAncInSamp)
+ cat("\nDate range :", min(x$dates,na.rm=TRUE),"-",max(x$dates,na.rm=TRUE))
+ ##nUniqSeq <- length(unique(apply(as.character(x$seq),1,paste,collapse="")))
+ ##cat("\nNumber of unique haplotypes :", nUniqSeq)
+
+ cat("\n\n= Content =")
+ for(i in 1:length(x)){
+ cat("\n")
+
+ cat(paste("$", names(x)[i], sep=""),"\n")
+ if(names(x)[i] %in% c("seq","call")) {
+ print(x[[i]])
+ } else if(names(x)[i]=="xy"){
+ print(head(x[[i]]))
+ if(nrow(x[[i]]>6)) cat(" ...\n")
+ } else cat(head(x[[i]],6), ifelse(length(x[[i]])>6,"...",""),"\n")
+ }
+
+
+ return(NULL)
+} # end print.haploGen
+
+
+
+
+
+
+##############
+## [.haploGen
+##############
+"[.haploGen" <- function(x,i,j,drop=FALSE){
+ res <- x
+ res$seq <- res$seq[i,]
+ res$ances <- res$ances[i]
+ res$dates <- res$dates[i]
+ if(!is.null(res$xy)) res$xy <- res$xy[i,,drop=FALSE]
+
+ return(res)
+}
+
+
+
+
+
+####################
+## na.omit.haploGen
+####################
+##
+## ACTUALLY THIS FUNCTION MAKES NO SENSE FOR NOW
+## AS STRAINS WITH NO ANCESTOR MAY BE ANCESTORS OF
+## OTHER STRAINS.
+##
+## na.omit.haploGen <- function(object, ...){
+## res <- object
+## isNA <- is.na(res$ances)
+## res$seq <- res$seq[!isNA,]
+## res$ances <- res$ances[!isNA]
+## res$dates <- res$dates[!isNA]
+## if(!is.null(res$xy)) res$xy <- res$xy[!isNA,]
+
+## return(res)
+## }
+
+
+
+
+##################
+## labels.haploGen
+##################
+labels.haploGen <- function(object, ...){
+ return(rownames(object$seq))
+}
+
+
+
+#######################
+## as.POSIXct.haploGen
+#######################
+as.POSIXct.haploGen <- function(x, tz="", origin=as.POSIXct("2000/01/01"), ...){
+ res <- as.POSIXct(x$dates*24*3600, origin=origin)
+ return(res)
+}
+
+
+
+
+#####################
+## seqTrack.haploGen
+#####################
+seqTrack.haploGen <- function(x, optim=c("min","max"), prox.mat=NULL, ...){
+ myX <- dist.dna(x$seq, model="raw")
+ x.names <- labels(x)
+ x.dates <- as.POSIXct(x)
+ seq.length <- ncol(x$seq)
+ myX <- myX * seq.length
+ prevCall <- as.list(x$call)
+ if(is.null(prevCall$mu)){
+ mu0 <- 0.0001
+ } else {
+ mu0 <- eval(prevCall$mu)
+ }
+ res <- seqTrack(myX, x.names=x.names, x.dates=x.dates, optim=optim, prox.mat=prox.mat,...)
+ return(res)
+}
+
+
+
+
+
+
+#####################
+## seqTrackG.haploGen
+#####################
+seqTrackG.haploGen <- function(x, optim=c("min","max"), ...){
+ myX <- dist.dna(x$seq, model="raw")
+ x.names <- labels(x)
+ x.dates <- as.POSIXct(x)
+ seq.length <- ncol(x$seq)
+ myX <- myX * seq.length
+ prevCall <- as.list(x$call)
+ if(is.null(prevCall$mu)){
+ mu0 <- 0.0001
+ } else {
+ mu0 <- eval(prevCall$mu)
+ }
+ res <- seqTrackG(myX, x.names=x.names, x.dates=x.dates, best=optim,...)
+ return(res)
+}
+
+
+
+
+
+
+##############################
+## optimize.seqTrack.haploGen
+##############################
+optimize.seqTrack.haploGen <- function(x, thres=0.2, optim=c("min","max"),
+ typed.chr=NULL, mu0=NULL, chr.length=NULL,
+ prox.mat=NULL, nstep=10, step.size=1e3,
+ rDate=.rTimeSeq, arg.rDate=NULL, rMissDate=.rUnifTimeSeq, ...){
+
+ x.names <- labels(x)
+ x.dates <- as.POSIXct(x)
+ seq.length <- ncol(x$seq)
+ myX <- dist.dna(x$seq, model="raw") * seq.length
+ prevCall <- as.list(x$call)
+ if(is.null(prevCall$mu)){
+ mu0 <- 0.0001
+ } else {
+ mu0 <- eval(prevCall$mu)
+ }
+
+ res <- optimize.seqTrack.default(x=myX, x.names=x.names, x.dates=x.dates,
+ typed.chr=typed.chr, mu0=mu0, chr.length=chr.length,
+ thres=thres, optim=optim, prox.mat=prox.mat,
+ nstep=nstep, step.size=step.size,
+ rDate=rDate, arg.rDate=arg.rDate, rMissDate=rMissDate, ...)
+} # end optimize.seqTrack.haploGen
+
+
+
+
+
+########################
+## as.seqTrack.haploGen
+########################
+as.seqTrack.haploGen <- function(x){
+ ## x.ori <- x
+ ## x <- na.omit(x)
+ toSetToNA <- x$dates==min(x$dates)
+ res <- list()
+ res$id <- labels(x)
+ res <- as.data.frame(res)
+ res$ances <- x$ances
+ res$ances[toSetToNA] <- NA
+ res$weight <- 1 # ??? have to recompute that...
+ res$weight[toSetToNA] <- NA
+ res$date <- as.POSIXct(x)[labels(x)]
+ res$ances.date <- as.POSIXct(x)[x$ances]
+
+ ## set results as indices rather than labels
+ res$ances <- match(res$ances, res$id)
+ res$id <- 1:length(res$id)
+
+ ## SET CLASS
+ class(res) <- c("seqTrack", "data.frame")
+
+ return(res)
+}
+
+
+
+
+################
+## plotHaploSim
+################
+plotHaploSim <- function(x, annot=FALSE, dateRange=NULL, col=NULL, bg="grey", add=FALSE, ...){
+
+ ## SOME CHECKS ##
+ if(class(x)!="haploGen") stop("x is not a haploGen object")
+ if(is.null(x$xy)) stop("x does not contain xy coordinates; try to simulate date")
+
+
+ ## ## CONVERSION TO A SEQTRACK-LIKE OBJECT ##
+ xy <- x$xy
+ res <- as.seqTrack.haploGen(x)
+
+ ## res <- list()
+ ## res$id <- labels(x)
+ ## res <- as.data.frame(res)
+ ## res$ances <- x$ances
+ ## res$ances[toSetToNA] <- NA
+ ## res$weight <- 1 # ??? have to recompute that...
+ ## res$weight[toSetToNA] <- NA
+ ## res$date <- as.POSIXct(x.ori)[labels(x)]
+ ## res$ances.date <- as.POSIXct(x.ori)[x$ances]
+ ## ## set results as indices rather than labels
+ ## res$ances <- match(res$ances, res$id)
+ ## res$id <- 1:length(res$id)
+
+
+ ## CALL TO PLOTSEQTRACK ##
+ plotSeqTrack(x=res, xy=xy, annot=annot, dateRange=dateRange,
+ col=col, bg=bg, add=add, ...)
+
+ return(invisible(res))
+
+} # end plotHaploSim
+
+
+
+
+
+
+###################
+## sample.haploGen
+###################
+sample.haploGen <- function(x, n, rDate=.rTimeSeq, arg.rDate=NULL){
+ ## EXTRACT THE SAMPLE ##
+ res <- x[sample(1:nrow(x$seq), n, replace=FALSE)]
+
+
+ ## RETRIEVE SOME PARAMETERS FROM HAPLOSIM CALL
+ prevCall <- as.list(x$call)
+ if(!is.null(prevCall$mu)){
+ mu0 <- eval(prevCall$mu)
+ } else {
+ mu0 <- 1e-04
+ }
+
+ if(!is.null(prevCall$seq.length)){
+ L <- eval(prevCall$seq.length)
+ } else {
+ L <- 1000
+ }
+
+ truedates <- res$dates
+ daterange <- diff(range(res$dates,na.rm=TRUE))
+
+ if(identical(rDate,.rTimeSeq)){
+ sampdates <- .rTimeSeq(mu0=mu0, L=L, n=length(truedates), maxNbDays=daterange/2)
+ } else{
+ arg.rDate$n <- n
+ sampdates <- do.call(rDate, arg.rDate)
+ }
+ sampdates <- truedates + abs(sampdates)
+
+ res$dates <- sampdates
+
+ return(res)
+} # end sample.haploGen
+
+
+
+
+
+
+
+
+##########################
+## as("haploGen", "graphNEL")
+##########################
+setOldClass("haploGen")
+setAs("haploGen", "graphNEL", def=function(from){
+ N <- length(from$ances)
+ areNA <- is.na(from$ances)
+ res <- ftM2graphNEL(cbind(from$ances[!areNA], (1:N)[!areNA]))
+ return(res)
+})
Deleted: pkg/R/haploSim.R
===================================================================
--- pkg/R/haploSim.R 2010-02-17 15:03:46 UTC (rev 569)
+++ pkg/R/haploSim.R 2010-02-17 16:58:58 UTC (rev 570)
@@ -1,599 +0,0 @@
-############
-## haploGen
-############
-##
-## N: number of sequences to simulate
-## mu: mutation rate per nucleotid per year
-## Tmax: periode of time to simulate
-## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
-## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
-##
-haploGen <- function(seq.length=1000, mu=0.0001,
- Tmax=50, mean.gen.time=5, sd.gen.time=1,
- mean.repro=2, sd.repro=1, max.nb.haplo=1e3,
- geo.sim=TRUE, grid.size=5, lambda.xy=0.5, matConnect=NULL,
- ini.n=1, ini.xy=NULL){
-
- ## CHECKS ##
- if(!require(ape)) stop("The ape package is required.")
-
-
- ## GENERAL VARIABLES ##
- NUCL <- as.DNAbin(c("a","t","c","g"))
- res <- list(seq=as.matrix(as.DNAbin(character(0))), dates=integer(), ances=character())
- toExpand <- logical()
- mu <- mu/365 # mutation rate by day
- myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
-
-
- ## AUXILIARY FUNCTIONS ##
- ## generate sequence from scratch
- seq.gen <- function(){
- res <- list(sample(NUCL, size=seq.length, replace=TRUE))
- class(res) <- "DNAbin"
- return(res)
- }
-
- ## create substitutions for defined SNPs
- substi <- function(snp){
- res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
- class(res) <- "DNAbin"
- return(res)
- }
-
- ## duplicate a sequence (including possible mutations)
- seq.dupli <- function(seq){
- toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
- res <- seq
- if(sum(toChange)>0) {
- res[toChange] <- substi(res[toChange])
- }
- return(res)
- }
-
- ## what is the name of the new sequences?
- seqname.gen <- function(nb.new.seq){
- res <- max(as.integer(rownames(res$seq)), 0) + 1:nb.new.seq
- return(as.character(res))
- }
-
- ## how many days before duplication occurs ?
- time.dupli <- function(){
- res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
- res[res<0] <- 0
- return(res)
- }
-
- ## when duplication occurs?
- date.dupli <- function(curDate){
- res <- curDate + time.dupli()
- return(res)
- }
-
- ## how many duplication/transmission occur?
- nb.desc <- function(){
- res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
- res[res<0] <- 0
- return(res)
- }
-
- ## where does an haplotype emerges in the first place?
- xy.gen <- function(){
- return(sample(1:grid.size, size=2, replace=TRUE))
- }
-
- ## where does a transmission occur (destination)?
- if(is.null(matConnect)){ # use universal lambda param
- xy.dupli <- function(cur.xy, nbLoc){
- mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
- res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
- res[res < 1] <- 1
- res[res > grid.size] <- grid.size
- return(res)
- }
- } else { # use location-dependent proba of dispersal between locations
- if(any(matConnect < 0)) stop("Negative values in matConnect (probabilities expected!)")
- matConnect <- prop.table(matConnect,1)
- xy.dupli <- function(cur.xy, nbLoc){
- ## lambda.xy <- matConnect[cur.xy[1] , cur.xy[2]]
- ## mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
- ## res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
- idxAncesLoc <- myGrid[cur.xy[1], cur.xy[2]]
- newLoc <- sample(1:grid.size^2, size=nbLoc, prob=matConnect[idxAncesLoc,], replace=TRUE) # get new locations
- res <- cbind(row(myGrid)[newLoc] , col(myGrid)[newLoc]) # get coords of new locations
- return(res)
- }
- }
-
-
- ## check result size and resize it if needed
- resize.result <- function(){
- curSize <- length(res$dates)
- if(curSize <= max.nb.haplo) return(NULL)
- toKeep <- rep(FALSE, curSize)
- toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
- removed.strains <- rownames(res$seq)[!toKeep]
- res$seq <<- res$seq[toKeep,]
- res$dates <<- res$dates[toKeep]
- res$ances <<- res$ances[toKeep]
- toExpand <<- toExpand[toKeep]
- temp <- as.character(res$ances) %in% removed.strains
- if(any(temp)) {
- res$ances[temp] <<- NA
- }
-
- return(NULL)
- }
-
- ## check result size and resize it if needed - spatial version
- resize.result.xy <- function(){
- curSize <- length(res$dates)
- if(curSize <= max.nb.haplo) return(NULL)
- toKeep <- rep(FALSE, curSize)
- toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
- removed.strains <- rownames(res$seq)[!toKeep]
- res$seq <<- res$seq[toKeep,]
- res$dates <<- res$dates[toKeep]
- res$ances <<- res$ances[toKeep]
- res$xy <<- res$xy[toKeep,,drop=FALSE]
- toExpand <<- toExpand[toKeep]
- temp <- as.character(res$ances) %in% removed.strains
- if(any(temp)) {
- res$ances[temp] <<- NA
- }
-
- return(NULL)
- }
-
-
-
- ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
- expand.one.strain <- function(seq, date, idx){
- toExpand[idx] <<- FALSE # this one is no longer to expand
- nbDes <- nb.desc()
- if(nbDes==0) return(NULL) # stop if no descendant
- newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
- newDates <- newDates[newDates <= Tmax] # don't store future sequences
- nbDes <- length(newDates)
- if(nbDes==0) return(NULL) # stop if no suitable date
- newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
- class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
- newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
- rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
- res$seq <<- rbind(res$seq, newSeq) # append to general output
- res$dates <<- c(res$dates, newDates) # append to general output
- res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
- toExpand <<- c(toExpand, rep(TRUE, nbDes))
- return(NULL)
- }
-
-
- ## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
- expand.one.strain.xy <- function(seq, date, idx, cur.xy){
- toExpand[idx] <<- FALSE # this one is no longer to expand
- nbDes <- nb.desc()
- if(nbDes==0) return(NULL) # stop if no descendant
- newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
- newDates <- newDates[newDates <= Tmax] # don't store future sequences
- nbDes <- length(newDates)
- if(nbDes==0) return(NULL) # stop if no suitable date
- newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
- class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
- newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
- rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
- res$seq <<- rbind(res$seq, newSeq) # append to general output
- res$dates <<- c(res$dates, newDates) # append to general output
- res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
- res$xy <<- rbind(res$xy, xy.dupli(cur.xy, nbDes))
- toExpand <<- c(toExpand, rep(TRUE, nbDes))
- return(NULL)
- }
-
-
-
-
-
- ## PERFORM SIMULATIONS - NON SPATIAL CASE ##
- if(!geo.sim){
- ## initialization
- res$seq <- as.matrix(seq.gen())
- res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
- class(res$seq) <- "DNAbin"
- rownames(res$seq) <- 1:ini.n
- res$dates[1:ini.n] <- rep(0,ini.n)
- res$ances[1:ini.n] <- rep(NA,ini.n)
- toExpand <- rep(TRUE,ini.n)
-
- ## simulations: isn't simplicity beautiful?
- while(any(toExpand)){
- idx <- min(which(toExpand))
- expand.one.strain(res$seq[idx,], res$dates[idx], idx)
- resize.result()
- }
-
-
- ## SHAPE AND RETURN OUTPUT ##
- res$ances <- as.character(res$ances)
- names(res$dates) <- rownames(res$seq)
- class(res) <- "haploGen"
- return(res)
-
- } # END NON-SPATIAL SIMULATIONS
-
-
-
-
- ## PERFORM SIMULATIONS - SPATIAL CASE ##
- if(geo.sim){
- ## some checks
- if(!is.null(matConnect)) {
- if(nrow(matConnect) != ncol(matConnect)) stop("matConnect is not a square matrix")
- if(nrow(matConnect) != grid.size^2) stop("dimension of matConnect does not match grid size")
- }
-
- ## initialization
- res$seq <- as.matrix(seq.gen())
- res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
- class(res$seq) <- "DNAbin"
- rownames(res$seq) <- 1:ini.n
- res$dates[1:ini.n] <- rep(0,ini.n)
- res$ances[1:ini.n] <- rep(NA,ini.n)
- toExpand <- rep(TRUE,ini.n)
-
- if(is.null(ini.xy)){
- locStart <- xy.gen()
- } else{
- locStart <- as.vector(ini.xy)[1:2]
- }
- res$xy <- matrix(rep(locStart, ini.n), byrow=TRUE, nrow=ini.n)
- colnames(res$xy) <- c("x","y")
-
- ##cat("nb.strains","iteration.time",file="haploGenTime.out") # for debugging
-
-
- ## simulations: isn't simplicity beautiful?
- while(any(toExpand)){
- ##time.previous <- Sys.time() # FOR DEBUGGING
- idx <- min(which(toExpand))
- expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
- resize.result.xy()
- ## VERBOSE OUTPUT FOR DEBUGGING ##
- ## cat("\nNb strains:",length(res$ances),"/",max.nb.haplo)
- ## cat("\nLatest date:", max(res$dates),"/",Tmax)
- ## cat("\nRemaining strains to duplicate", sum(toExpand))
- ## cat("\n",append=TRUE,file="haploGenTime.out")
- ## iter.time <- as.numeric(difftime(Sys.time(),time.previous,unit="sec"))
- ## time.previous <- Sys.time()
- ## cat(c(length(res$ances), iter.time),append=
- ##TRUE,file="haploGenTime.out")
- ## END DEBUGGING VERBOSE ##
- }
-
- ## VERBOSE OUTPUT FOR DEBUGGING ##
- ##cat("\nSimulation time stored in haploGenTime.out\n")
-
- ## SHAPE AND RETURN OUTPUT ##
- res$ances <- as.character(res$ances)
- names(res$dates) <- rownames(res$seq)
-
- class(res) <- "haploGen"
- res$call <- match.call()
- return(res)
-
- } # end SPATIAL SIMULATIONS
-
-
-} # end haploGen
-
-
-
-
-
-
-
-
-##################
-## print.haploGen
-##################
-print.haploGen <- function(x, ...){
-
- cat("\t\n========================")
- cat("\t\n= simulated haplotypes =")
- cat("\t\n= (haploGen object) =")
- cat("\t\n========================\n")
-
- cat("\nSize :", length(x$ances),"haplotypes")
- cat("\nHaplotype length :", ncol(x$seq),"nucleotids")
- cat("\nProportion of NA ancestors :", signif(mean(is.na(x$ances)),5))
- cat("\nNumber of known ancestors :", sum(!is.na(x$ances)))
- nbAncInSamp <- sum(x$ances %in% labels(x))
- cat("\nNumber of ancestors within the sample :", nbAncInSamp)
- cat("\nDate range :", min(x$dates,na.rm=TRUE),"-",max(x$dates,na.rm=TRUE))
- ##nUniqSeq <- length(unique(apply(as.character(x$seq),1,paste,collapse="")))
- ##cat("\nNumber of unique haplotypes :", nUniqSeq)
-
- cat("\n\n= Content =")
- for(i in 1:length(x)){
- cat("\n")
-
- cat(paste("$", names(x)[i], sep=""),"\n")
- if(names(x)[i] %in% c("seq","call")) {
- print(x[[i]])
- } else if(names(x)[i]=="xy"){
- print(head(x[[i]]))
- if(nrow(x[[i]]>6)) cat(" ...\n")
- } else cat(head(x[[i]],6), ifelse(length(x[[i]])>6,"...",""),"\n")
- }
-
-
- return(NULL)
-} # end print.haploGen
-
-
-
-
-
-
-##############
-## [.haploGen
-##############
-"[.haploGen" <- function(x,i,j,drop=FALSE){
- res <- x
- res$seq <- res$seq[i,]
- res$ances <- res$ances[i]
- res$dates <- res$dates[i]
- if(!is.null(res$xy)) res$xy <- res$xy[i,,drop=FALSE]
-
- return(res)
-}
-
-
-
-
-
-####################
-## na.omit.haploGen
-####################
-##
-## ACTUALLY THIS FUNCTION MAKES NO SENSE FOR NOW
-## AS STRAINS WITH NO ANCESTOR MAY BE ANCESTORS OF
-## OTHER STRAINS.
-##
-## na.omit.haploGen <- function(object, ...){
-## res <- object
-## isNA <- is.na(res$ances)
-## res$seq <- res$seq[!isNA,]
-## res$ances <- res$ances[!isNA]
-## res$dates <- res$dates[!isNA]
-## if(!is.null(res$xy)) res$xy <- res$xy[!isNA,]
-
-## return(res)
-## }
-
-
-
-
-##################
-## labels.haploGen
-##################
-labels.haploGen <- function(object, ...){
- return(rownames(object$seq))
-}
-
-
-
-#######################
-## as.POSIXct.haploGen
-#######################
-as.POSIXct.haploGen <- function(x, tz="", origin=as.POSIXct("2000/01/01"), ...){
- res <- as.POSIXct(x$dates*24*3600, origin=origin)
- return(res)
-}
-
-
-
-
-#####################
-## seqTrack.haploGen
-#####################
-seqTrack.haploGen <- function(x, optim=c("min","max"), prox.mat=NULL, ...){
- myX <- dist.dna(x$seq, model="raw")
- x.names <- labels(x)
- x.dates <- as.POSIXct(x)
- seq.length <- ncol(x$seq)
- myX <- myX * seq.length
- prevCall <- as.list(x$call)
- if(is.null(prevCall$mu)){
- mu0 <- 0.0001
- } else {
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/adegenet -r 570
More information about the adegenet-commits
mailing list