[adegenet-commits] r570 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Feb 17 17:58:58 CET 2010


Author: jombart
Date: 2010-02-17 17:58:58 +0100 (Wed, 17 Feb 2010)
New Revision: 570

Added:
   pkg/R/haploGen.R
Removed:
   pkg/R/haploSim.R
Log:
renamed file


Copied: pkg/R/haploGen.R (from rev 569, pkg/R/haploSim.R)
===================================================================
--- pkg/R/haploGen.R	                        (rev 0)
+++ pkg/R/haploGen.R	2010-02-17 16:58:58 UTC (rev 570)
@@ -0,0 +1,599 @@
+############
+## haploGen
+############
+##
+## N: number of sequences to simulate
+## mu: mutation rate per nucleotid per year
+## Tmax: periode of time to simulate
+## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
+## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
+##
+haploGen <- function(seq.length=1000, mu=0.0001,
+                     Tmax=50, mean.gen.time=5, sd.gen.time=1,
+                     mean.repro=2, sd.repro=1, max.nb.haplo=1e3,
+                     geo.sim=TRUE, grid.size=5, lambda.xy=0.5, matConnect=NULL,
+                     ini.n=1, ini.xy=NULL){
+
+    ## CHECKS ##
+    if(!require(ape)) stop("The ape package is required.")
+
+
+    ## GENERAL VARIABLES ##
+    NUCL <- as.DNAbin(c("a","t","c","g"))
+    res <- list(seq=as.matrix(as.DNAbin(character(0))), dates=integer(), ances=character())
+    toExpand <- logical()
+    mu <- mu/365 # mutation rate by day
+    myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
+
+
+    ## AUXILIARY FUNCTIONS ##
+    ## generate sequence from scratch
+    seq.gen <- function(){
+        res <- list(sample(NUCL, size=seq.length, replace=TRUE))
+        class(res) <- "DNAbin"
+        return(res)
+    }
+
+    ## create substitutions for defined SNPs
+    substi <- function(snp){
+        res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
+        class(res) <- "DNAbin"
+        return(res)
+    }
+
+    ## duplicate a sequence (including possible mutations)
+    seq.dupli <- function(seq){
+        toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
+        res <- seq
+        if(sum(toChange)>0) {
+            res[toChange] <- substi(res[toChange])
+        }
+        return(res)
+    }
+
+    ## what is the name of the new sequences?
+    seqname.gen <- function(nb.new.seq){
+        res <- max(as.integer(rownames(res$seq)), 0) + 1:nb.new.seq
+        return(as.character(res))
+    }
+
+    ## how many days before duplication occurs ?
+    time.dupli <- function(){
+        res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
+        res[res<0] <- 0
+        return(res)
+    }
+
+    ## when duplication occurs?
+    date.dupli <- function(curDate){
+        res <- curDate + time.dupli()
+        return(res)
+    }
+
+    ## how many duplication/transmission occur?
+    nb.desc <- function(){
+        res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
+        res[res<0] <- 0
+        return(res)
+    }
+
+    ## where does an haplotype emerges in the first place?
+    xy.gen <- function(){
+        return(sample(1:grid.size, size=2, replace=TRUE))
+    }
+
+    ## where does a transmission occur (destination)?
+    if(is.null(matConnect)){ # use universal lambda param
+        xy.dupli <- function(cur.xy, nbLoc){
+            mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
+            res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
+            res[res < 1] <- 1
+            res[res > grid.size] <- grid.size
+            return(res)
+        }
+    } else { # use location-dependent proba of dispersal between locations
+        if(any(matConnect < 0)) stop("Negative values in matConnect (probabilities expected!)")
+        matConnect <- prop.table(matConnect,1)
+        xy.dupli <- function(cur.xy, nbLoc){
+            ## lambda.xy <- matConnect[cur.xy[1] , cur.xy[2]]
+            ##             mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
+            ##             res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
+            idxAncesLoc <- myGrid[cur.xy[1], cur.xy[2]]
+            newLoc <- sample(1:grid.size^2, size=nbLoc, prob=matConnect[idxAncesLoc,], replace=TRUE) # get new locations
+            res <- cbind(row(myGrid)[newLoc] , col(myGrid)[newLoc]) # get coords of new locations
+            return(res)
+        }
+    }
+
+
+    ## check result size and resize it if needed
+    resize.result <- function(){
+        curSize <- length(res$dates)
+        if(curSize <= max.nb.haplo) return(NULL)
+        toKeep <- rep(FALSE, curSize)
+        toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
+        removed.strains <- rownames(res$seq)[!toKeep]
+        res$seq <<- res$seq[toKeep,]
+        res$dates <<- res$dates[toKeep]
+        res$ances <<- res$ances[toKeep]
+        toExpand <<- toExpand[toKeep]
+        temp <- as.character(res$ances) %in% removed.strains
+        if(any(temp)) {
+            res$ances[temp] <<- NA
+        }
+
+        return(NULL)
+    }
+
+    ## check result size and resize it if needed - spatial version
+    resize.result.xy <- function(){
+        curSize <- length(res$dates)
+        if(curSize <= max.nb.haplo) return(NULL)
+        toKeep <- rep(FALSE, curSize)
+        toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
+        removed.strains <- rownames(res$seq)[!toKeep]
+        res$seq <<- res$seq[toKeep,]
+        res$dates <<- res$dates[toKeep]
+        res$ances <<- res$ances[toKeep]
+        res$xy <<- res$xy[toKeep,,drop=FALSE]
+        toExpand <<- toExpand[toKeep]
+        temp <- as.character(res$ances) %in% removed.strains
+        if(any(temp)) {
+            res$ances[temp] <<- NA
+        }
+
+        return(NULL)
+    }
+
+
+
+    ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
+    expand.one.strain <- function(seq, date, idx){
+        toExpand[idx] <<- FALSE # this one is no longer to expand
+        nbDes <- nb.desc()
+        if(nbDes==0) return(NULL) # stop if no descendant
+        newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+        newDates <- newDates[newDates <= Tmax] # don't store future sequences
+        nbDes <- length(newDates)
+        if(nbDes==0) return(NULL) # stop if no suitable date
+        newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+        class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
+        newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
+        rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
+        res$seq <<- rbind(res$seq, newSeq) # append to general output
+        res$dates <<- c(res$dates, newDates) # append to general output
+        res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
+        toExpand <<- c(toExpand, rep(TRUE, nbDes))
+        return(NULL)
+    }
+
+
+    ## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
+    expand.one.strain.xy <- function(seq, date, idx, cur.xy){
+        toExpand[idx] <<- FALSE # this one is no longer to expand
+        nbDes <- nb.desc()
+        if(nbDes==0) return(NULL) # stop if no descendant
+        newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+        newDates <- newDates[newDates <= Tmax] # don't store future sequences
+        nbDes <- length(newDates)
+        if(nbDes==0) return(NULL) # stop if no suitable date
+        newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+        class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
+        newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
+        rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
+        res$seq <<- rbind(res$seq, newSeq) # append to general output
+        res$dates <<- c(res$dates, newDates) # append to general output
+        res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
+        res$xy <<- rbind(res$xy, xy.dupli(cur.xy, nbDes))
+        toExpand <<- c(toExpand, rep(TRUE, nbDes))
+        return(NULL)
+    }
+
+
+
+
+
+    ## PERFORM SIMULATIONS - NON SPATIAL CASE ##
+    if(!geo.sim){
+        ## initialization
+        res$seq <- as.matrix(seq.gen())
+        res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
+        class(res$seq) <- "DNAbin"
+        rownames(res$seq) <- 1:ini.n
+        res$dates[1:ini.n] <- rep(0,ini.n)
+        res$ances[1:ini.n] <- rep(NA,ini.n)
+        toExpand <- rep(TRUE,ini.n)
+
+        ## simulations: isn't simplicity beautiful?
+        while(any(toExpand)){
+            idx <- min(which(toExpand))
+            expand.one.strain(res$seq[idx,], res$dates[idx], idx)
+            resize.result()
+        }
+
+
+        ## SHAPE AND RETURN OUTPUT ##
+        res$ances <- as.character(res$ances)
+        names(res$dates) <- rownames(res$seq)
+        class(res) <- "haploGen"
+        return(res)
+
+    } # END NON-SPATIAL SIMULATIONS
+
+
+
+
+    ## PERFORM SIMULATIONS - SPATIAL CASE ##
+    if(geo.sim){
+        ## some checks
+        if(!is.null(matConnect)) {
+            if(nrow(matConnect) != ncol(matConnect)) stop("matConnect is not a square matrix")
+            if(nrow(matConnect) != grid.size^2) stop("dimension of matConnect does not match grid size")
+        }
+
+        ## initialization
+        res$seq <- as.matrix(seq.gen())
+        res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
+        class(res$seq) <- "DNAbin"
+        rownames(res$seq) <- 1:ini.n
+        res$dates[1:ini.n] <- rep(0,ini.n)
+        res$ances[1:ini.n] <- rep(NA,ini.n)
+        toExpand <- rep(TRUE,ini.n)
+
+        if(is.null(ini.xy)){
+            locStart <- xy.gen()
+        } else{
+            locStart <- as.vector(ini.xy)[1:2]
+        }
+        res$xy <- matrix(rep(locStart, ini.n), byrow=TRUE, nrow=ini.n)
+        colnames(res$xy) <- c("x","y")
+
+        ##cat("nb.strains","iteration.time",file="haploGenTime.out") # for debugging
+
+
+        ## simulations: isn't simplicity beautiful?
+        while(any(toExpand)){
+            ##time.previous <- Sys.time() # FOR DEBUGGING
+            idx <- min(which(toExpand))
+            expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
+            resize.result.xy()
+            ## VERBOSE OUTPUT FOR DEBUGGING ##
+            ## cat("\nNb strains:",length(res$ances),"/",max.nb.haplo)
+            ##             cat("\nLatest date:", max(res$dates),"/",Tmax)
+            ##             cat("\nRemaining strains to duplicate", sum(toExpand))
+            ##             cat("\n",append=TRUE,file="haploGenTime.out")
+            ##             iter.time <- as.numeric(difftime(Sys.time(),time.previous,unit="sec"))
+            ##             time.previous <- Sys.time()
+            ##             cat(c(length(res$ances), iter.time),append=
+            ##TRUE,file="haploGenTime.out")
+        ## END DEBUGGING VERBOSE ##
+        }
+
+        ## VERBOSE OUTPUT FOR DEBUGGING ##
+        ##cat("\nSimulation time stored in haploGenTime.out\n")
+
+        ## SHAPE AND RETURN OUTPUT ##
+        res$ances <- as.character(res$ances)
+        names(res$dates) <- rownames(res$seq)
+
+        class(res) <- "haploGen"
+        res$call <- match.call()
+        return(res)
+
+    } # end SPATIAL SIMULATIONS
+
+
+} # end haploGen
+
+
+
+
+
+
+
+
+##################
+## print.haploGen
+##################
+print.haploGen <- function(x, ...){
+
+    cat("\t\n========================")
+    cat("\t\n= simulated haplotypes =")
+    cat("\t\n=  (haploGen object)   =")
+    cat("\t\n========================\n")
+
+    cat("\nSize :", length(x$ances),"haplotypes")
+    cat("\nHaplotype length :", ncol(x$seq),"nucleotids")
+    cat("\nProportion of NA ancestors :", signif(mean(is.na(x$ances)),5))
+    cat("\nNumber of known ancestors :", sum(!is.na(x$ances)))
+    nbAncInSamp <- sum(x$ances %in% labels(x))
+    cat("\nNumber of ancestors within the sample :", nbAncInSamp)
+    cat("\nDate range :", min(x$dates,na.rm=TRUE),"-",max(x$dates,na.rm=TRUE))
+    ##nUniqSeq <- length(unique(apply(as.character(x$seq),1,paste,collapse="")))
+    ##cat("\nNumber of unique haplotypes :", nUniqSeq)
+
+    cat("\n\n= Content =")
+    for(i in 1:length(x)){
+        cat("\n")
+
+        cat(paste("$", names(x)[i], sep=""),"\n")
+        if(names(x)[i] %in% c("seq","call")) {
+            print(x[[i]])
+        } else if(names(x)[i]=="xy"){
+            print(head(x[[i]]))
+            if(nrow(x[[i]]>6)) cat("    ...\n")
+        } else cat(head(x[[i]],6), ifelse(length(x[[i]])>6,"...",""),"\n")
+    }
+
+
+    return(NULL)
+} # end print.haploGen
+
+
+
+
+
+
+##############
+## [.haploGen
+##############
+"[.haploGen" <- function(x,i,j,drop=FALSE){
+    res <- x
+    res$seq <- res$seq[i,]
+    res$ances <- res$ances[i]
+    res$dates <- res$dates[i]
+    if(!is.null(res$xy)) res$xy <- res$xy[i,,drop=FALSE]
+
+    return(res)
+}
+
+
+
+
+
+####################
+## na.omit.haploGen
+####################
+##
+## ACTUALLY THIS FUNCTION MAKES NO SENSE FOR NOW
+## AS STRAINS WITH NO ANCESTOR MAY BE ANCESTORS OF
+## OTHER STRAINS.
+##
+## na.omit.haploGen <- function(object, ...){
+##     res <- object
+##     isNA <- is.na(res$ances)
+##     res$seq <- res$seq[!isNA,]
+##     res$ances <- res$ances[!isNA]
+##     res$dates <- res$dates[!isNA]
+##     if(!is.null(res$xy)) res$xy <- res$xy[!isNA,]
+
+##     return(res)
+## }
+
+
+
+
+##################
+## labels.haploGen
+##################
+labels.haploGen <- function(object, ...){
+    return(rownames(object$seq))
+}
+
+
+
+#######################
+## as.POSIXct.haploGen
+#######################
+as.POSIXct.haploGen <- function(x, tz="", origin=as.POSIXct("2000/01/01"), ...){
+    res <- as.POSIXct(x$dates*24*3600, origin=origin)
+    return(res)
+}
+
+
+
+
+#####################
+## seqTrack.haploGen
+#####################
+seqTrack.haploGen <- function(x, optim=c("min","max"), prox.mat=NULL, ...){
+    myX <- dist.dna(x$seq, model="raw")
+    x.names <- labels(x)
+    x.dates <- as.POSIXct(x)
+    seq.length <- ncol(x$seq)
+    myX <- myX * seq.length
+    prevCall <- as.list(x$call)
+    if(is.null(prevCall$mu)){
+        mu0 <- 0.0001
+    } else {
+        mu0 <- eval(prevCall$mu)
+    }
+    res <- seqTrack(myX, x.names=x.names, x.dates=x.dates, optim=optim, prox.mat=prox.mat,...)
+    return(res)
+}
+
+
+
+
+
+
+#####################
+## seqTrackG.haploGen
+#####################
+seqTrackG.haploGen <- function(x, optim=c("min","max"), ...){
+    myX <- dist.dna(x$seq, model="raw")
+    x.names <- labels(x)
+    x.dates <- as.POSIXct(x)
+    seq.length <- ncol(x$seq)
+    myX <- myX * seq.length
+    prevCall <- as.list(x$call)
+    if(is.null(prevCall$mu)){
+        mu0 <- 0.0001
+    } else {
+        mu0 <- eval(prevCall$mu)
+    }
+    res <- seqTrackG(myX, x.names=x.names, x.dates=x.dates, best=optim,...)
+    return(res)
+}
+
+
+
+
+
+
+##############################
+## optimize.seqTrack.haploGen
+##############################
+optimize.seqTrack.haploGen <- function(x, thres=0.2, optim=c("min","max"),
+                                       typed.chr=NULL, mu0=NULL, chr.length=NULL,
+                                       prox.mat=NULL, nstep=10, step.size=1e3,
+                                       rDate=.rTimeSeq, arg.rDate=NULL, rMissDate=.rUnifTimeSeq, ...){
+
+    x.names <- labels(x)
+    x.dates <- as.POSIXct(x)
+    seq.length <- ncol(x$seq)
+    myX <- dist.dna(x$seq, model="raw") * seq.length
+    prevCall <- as.list(x$call)
+    if(is.null(prevCall$mu)){
+        mu0 <- 0.0001
+    } else {
+        mu0 <- eval(prevCall$mu)
+    }
+
+    res <- optimize.seqTrack.default(x=myX, x.names=x.names, x.dates=x.dates,
+                                     typed.chr=typed.chr, mu0=mu0, chr.length=chr.length,
+                                     thres=thres, optim=optim, prox.mat=prox.mat,
+                                     nstep=nstep, step.size=step.size,
+                                     rDate=rDate, arg.rDate=arg.rDate, rMissDate=rMissDate, ...)
+} # end optimize.seqTrack.haploGen
+
+
+
+
+
+########################
+## as.seqTrack.haploGen
+########################
+as.seqTrack.haploGen <- function(x){
+    ## x.ori <- x
+    ## x <- na.omit(x)
+    toSetToNA <- x$dates==min(x$dates)
+    res <- list()
+    res$id <- labels(x)
+    res <- as.data.frame(res)
+    res$ances <- x$ances
+    res$ances[toSetToNA] <- NA
+    res$weight <- 1 # ??? have to recompute that...
+    res$weight[toSetToNA] <- NA
+    res$date <- as.POSIXct(x)[labels(x)]
+    res$ances.date <- as.POSIXct(x)[x$ances]
+
+    ## set results as indices rather than labels
+    res$ances <- match(res$ances, res$id)
+    res$id <- 1:length(res$id)
+
+    ## SET CLASS
+    class(res) <- c("seqTrack", "data.frame")
+
+    return(res)
+}
+
+
+
+
+################
+## plotHaploSim
+################
+plotHaploSim <- function(x, annot=FALSE, dateRange=NULL, col=NULL, bg="grey", add=FALSE, ...){
+
+    ## SOME CHECKS ##
+    if(class(x)!="haploGen") stop("x is not a haploGen object")
+    if(is.null(x$xy)) stop("x does not contain xy coordinates; try to simulate date")
+
+
+    ## ## CONVERSION TO A SEQTRACK-LIKE OBJECT ##
+    xy <- x$xy
+    res <- as.seqTrack.haploGen(x)
+
+    ##     res <- list()
+    ##     res$id <- labels(x)
+    ##     res <- as.data.frame(res)
+    ##     res$ances <- x$ances
+    ##     res$ances[toSetToNA] <- NA
+    ##     res$weight <- 1 # ??? have to recompute that...
+    ##     res$weight[toSetToNA] <- NA
+    ##     res$date <- as.POSIXct(x.ori)[labels(x)]
+    ##     res$ances.date <- as.POSIXct(x.ori)[x$ances]
+    ##     ## set results as indices rather than labels
+    ##     res$ances <- match(res$ances, res$id)
+    ##     res$id <- 1:length(res$id)
+
+
+    ## CALL TO PLOTSEQTRACK ##
+    plotSeqTrack(x=res, xy=xy, annot=annot, dateRange=dateRange,
+                        col=col, bg=bg, add=add, ...)
+
+    return(invisible(res))
+
+} # end plotHaploSim
+
+
+
+
+
+
+###################
+## sample.haploGen
+###################
+sample.haploGen <- function(x, n, rDate=.rTimeSeq, arg.rDate=NULL){
+    ## EXTRACT THE SAMPLE ##
+    res <- x[sample(1:nrow(x$seq), n, replace=FALSE)]
+
+
+    ## RETRIEVE SOME PARAMETERS FROM HAPLOSIM CALL
+    prevCall <- as.list(x$call)
+    if(!is.null(prevCall$mu)){
+        mu0 <- eval(prevCall$mu)
+    } else {
+        mu0 <- 1e-04
+    }
+
+    if(!is.null(prevCall$seq.length)){
+        L <- eval(prevCall$seq.length)
+    } else {
+        L <- 1000
+    }
+
+    truedates <- res$dates
+    daterange <- diff(range(res$dates,na.rm=TRUE))
+
+    if(identical(rDate,.rTimeSeq)){
+        sampdates <- .rTimeSeq(mu0=mu0, L=L, n=length(truedates), maxNbDays=daterange/2)
+    } else{
+        arg.rDate$n <- n
+        sampdates <- do.call(rDate, arg.rDate)
+    }
+    sampdates <- truedates + abs(sampdates)
+
+    res$dates <- sampdates
+
+    return(res)
+} # end sample.haploGen
+
+
+
+
+
+
+
+
+##########################
+## as("haploGen", "graphNEL")
+##########################
+setOldClass("haploGen")
+setAs("haploGen", "graphNEL", def=function(from){
+    N <- length(from$ances)
+    areNA <- is.na(from$ances)
+    res <- ftM2graphNEL(cbind(from$ances[!areNA], (1:N)[!areNA]))
+    return(res)
+})

Deleted: pkg/R/haploSim.R
===================================================================
--- pkg/R/haploSim.R	2010-02-17 15:03:46 UTC (rev 569)
+++ pkg/R/haploSim.R	2010-02-17 16:58:58 UTC (rev 570)
@@ -1,599 +0,0 @@
-############
-## haploGen
-############
-##
-## N: number of sequences to simulate
-## mu: mutation rate per nucleotid per year
-## Tmax: periode of time to simulate
-## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
-## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
-##
-haploGen <- function(seq.length=1000, mu=0.0001,
-                     Tmax=50, mean.gen.time=5, sd.gen.time=1,
-                     mean.repro=2, sd.repro=1, max.nb.haplo=1e3,
-                     geo.sim=TRUE, grid.size=5, lambda.xy=0.5, matConnect=NULL,
-                     ini.n=1, ini.xy=NULL){
-
-    ## CHECKS ##
-    if(!require(ape)) stop("The ape package is required.")
-
-
-    ## GENERAL VARIABLES ##
-    NUCL <- as.DNAbin(c("a","t","c","g"))
-    res <- list(seq=as.matrix(as.DNAbin(character(0))), dates=integer(), ances=character())
-    toExpand <- logical()
-    mu <- mu/365 # mutation rate by day
-    myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
-
-
-    ## AUXILIARY FUNCTIONS ##
-    ## generate sequence from scratch
-    seq.gen <- function(){
-        res <- list(sample(NUCL, size=seq.length, replace=TRUE))
-        class(res) <- "DNAbin"
-        return(res)
-    }
-
-    ## create substitutions for defined SNPs
-    substi <- function(snp){
-        res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
-        class(res) <- "DNAbin"
-        return(res)
-    }
-
-    ## duplicate a sequence (including possible mutations)
-    seq.dupli <- function(seq){
-        toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
-        res <- seq
-        if(sum(toChange)>0) {
-            res[toChange] <- substi(res[toChange])
-        }
-        return(res)
-    }
-
-    ## what is the name of the new sequences?
-    seqname.gen <- function(nb.new.seq){
-        res <- max(as.integer(rownames(res$seq)), 0) + 1:nb.new.seq
-        return(as.character(res))
-    }
-
-    ## how many days before duplication occurs ?
-    time.dupli <- function(){
-        res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
-        res[res<0] <- 0
-        return(res)
-    }
-
-    ## when duplication occurs?
-    date.dupli <- function(curDate){
-        res <- curDate + time.dupli()
-        return(res)
-    }
-
-    ## how many duplication/transmission occur?
-    nb.desc <- function(){
-        res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
-        res[res<0] <- 0
-        return(res)
-    }
-
-    ## where does an haplotype emerges in the first place?
-    xy.gen <- function(){
-        return(sample(1:grid.size, size=2, replace=TRUE))
-    }
-
-    ## where does a transmission occur (destination)?
-    if(is.null(matConnect)){ # use universal lambda param
-        xy.dupli <- function(cur.xy, nbLoc){
-            mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
-            res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
-            res[res < 1] <- 1
-            res[res > grid.size] <- grid.size
-            return(res)
-        }
-    } else { # use location-dependent proba of dispersal between locations
-        if(any(matConnect < 0)) stop("Negative values in matConnect (probabilities expected!)")
-        matConnect <- prop.table(matConnect,1)
-        xy.dupli <- function(cur.xy, nbLoc){
-            ## lambda.xy <- matConnect[cur.xy[1] , cur.xy[2]]
-            ##             mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
-            ##             res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
-            idxAncesLoc <- myGrid[cur.xy[1], cur.xy[2]]
-            newLoc <- sample(1:grid.size^2, size=nbLoc, prob=matConnect[idxAncesLoc,], replace=TRUE) # get new locations
-            res <- cbind(row(myGrid)[newLoc] , col(myGrid)[newLoc]) # get coords of new locations
-            return(res)
-        }
-    }
-
-
-    ## check result size and resize it if needed
-    resize.result <- function(){
-        curSize <- length(res$dates)
-        if(curSize <= max.nb.haplo) return(NULL)
-        toKeep <- rep(FALSE, curSize)
-        toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
-        removed.strains <- rownames(res$seq)[!toKeep]
-        res$seq <<- res$seq[toKeep,]
-        res$dates <<- res$dates[toKeep]
-        res$ances <<- res$ances[toKeep]
-        toExpand <<- toExpand[toKeep]
-        temp <- as.character(res$ances) %in% removed.strains
-        if(any(temp)) {
-            res$ances[temp] <<- NA
-        }
-
-        return(NULL)
-    }
-
-    ## check result size and resize it if needed - spatial version
-    resize.result.xy <- function(){
-        curSize <- length(res$dates)
-        if(curSize <= max.nb.haplo) return(NULL)
-        toKeep <- rep(FALSE, curSize)
-        toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
-        removed.strains <- rownames(res$seq)[!toKeep]
-        res$seq <<- res$seq[toKeep,]
-        res$dates <<- res$dates[toKeep]
-        res$ances <<- res$ances[toKeep]
-        res$xy <<- res$xy[toKeep,,drop=FALSE]
-        toExpand <<- toExpand[toKeep]
-        temp <- as.character(res$ances) %in% removed.strains
-        if(any(temp)) {
-            res$ances[temp] <<- NA
-        }
-
-        return(NULL)
-    }
-
-
-
-    ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
-    expand.one.strain <- function(seq, date, idx){
-        toExpand[idx] <<- FALSE # this one is no longer to expand
-        nbDes <- nb.desc()
-        if(nbDes==0) return(NULL) # stop if no descendant
-        newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
-        newDates <- newDates[newDates <= Tmax] # don't store future sequences
-        nbDes <- length(newDates)
-        if(nbDes==0) return(NULL) # stop if no suitable date
-        newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
-        class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
-        newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
-        rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
-        res$seq <<- rbind(res$seq, newSeq) # append to general output
-        res$dates <<- c(res$dates, newDates) # append to general output
-        res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
-        toExpand <<- c(toExpand, rep(TRUE, nbDes))
-        return(NULL)
-    }
-
-
-    ## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
-    expand.one.strain.xy <- function(seq, date, idx, cur.xy){
-        toExpand[idx] <<- FALSE # this one is no longer to expand
-        nbDes <- nb.desc()
-        if(nbDes==0) return(NULL) # stop if no descendant
-        newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
-        newDates <- newDates[newDates <= Tmax] # don't store future sequences
-        nbDes <- length(newDates)
-        if(nbDes==0) return(NULL) # stop if no suitable date
-        newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
-        class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
-        newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
-        rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
-        res$seq <<- rbind(res$seq, newSeq) # append to general output
-        res$dates <<- c(res$dates, newDates) # append to general output
-        res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
-        res$xy <<- rbind(res$xy, xy.dupli(cur.xy, nbDes))
-        toExpand <<- c(toExpand, rep(TRUE, nbDes))
-        return(NULL)
-    }
-
-
-
-
-
-    ## PERFORM SIMULATIONS - NON SPATIAL CASE ##
-    if(!geo.sim){
-        ## initialization
-        res$seq <- as.matrix(seq.gen())
-        res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
-        class(res$seq) <- "DNAbin"
-        rownames(res$seq) <- 1:ini.n
-        res$dates[1:ini.n] <- rep(0,ini.n)
-        res$ances[1:ini.n] <- rep(NA,ini.n)
-        toExpand <- rep(TRUE,ini.n)
-
-        ## simulations: isn't simplicity beautiful?
-        while(any(toExpand)){
-            idx <- min(which(toExpand))
-            expand.one.strain(res$seq[idx,], res$dates[idx], idx)
-            resize.result()
-        }
-
-
-        ## SHAPE AND RETURN OUTPUT ##
-        res$ances <- as.character(res$ances)
-        names(res$dates) <- rownames(res$seq)
-        class(res) <- "haploGen"
-        return(res)
-
-    } # END NON-SPATIAL SIMULATIONS
-
-
-
-
-    ## PERFORM SIMULATIONS - SPATIAL CASE ##
-    if(geo.sim){
-        ## some checks
-        if(!is.null(matConnect)) {
-            if(nrow(matConnect) != ncol(matConnect)) stop("matConnect is not a square matrix")
-            if(nrow(matConnect) != grid.size^2) stop("dimension of matConnect does not match grid size")
-        }
-
-        ## initialization
-        res$seq <- as.matrix(seq.gen())
-        res$seq <- matrix(rep(res$seq, ini.n), byrow=TRUE, nrow=ini.n)
-        class(res$seq) <- "DNAbin"
-        rownames(res$seq) <- 1:ini.n
-        res$dates[1:ini.n] <- rep(0,ini.n)
-        res$ances[1:ini.n] <- rep(NA,ini.n)
-        toExpand <- rep(TRUE,ini.n)
-
-        if(is.null(ini.xy)){
-            locStart <- xy.gen()
-        } else{
-            locStart <- as.vector(ini.xy)[1:2]
-        }
-        res$xy <- matrix(rep(locStart, ini.n), byrow=TRUE, nrow=ini.n)
-        colnames(res$xy) <- c("x","y")
-
-        ##cat("nb.strains","iteration.time",file="haploGenTime.out") # for debugging
-
-
-        ## simulations: isn't simplicity beautiful?
-        while(any(toExpand)){
-            ##time.previous <- Sys.time() # FOR DEBUGGING
-            idx <- min(which(toExpand))
-            expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
-            resize.result.xy()
-            ## VERBOSE OUTPUT FOR DEBUGGING ##
-            ## cat("\nNb strains:",length(res$ances),"/",max.nb.haplo)
-            ##             cat("\nLatest date:", max(res$dates),"/",Tmax)
-            ##             cat("\nRemaining strains to duplicate", sum(toExpand))
-            ##             cat("\n",append=TRUE,file="haploGenTime.out")
-            ##             iter.time <- as.numeric(difftime(Sys.time(),time.previous,unit="sec"))
-            ##             time.previous <- Sys.time()
-            ##             cat(c(length(res$ances), iter.time),append=
-            ##TRUE,file="haploGenTime.out")
-        ## END DEBUGGING VERBOSE ##
-        }
-
-        ## VERBOSE OUTPUT FOR DEBUGGING ##
-        ##cat("\nSimulation time stored in haploGenTime.out\n")
-
-        ## SHAPE AND RETURN OUTPUT ##
-        res$ances <- as.character(res$ances)
-        names(res$dates) <- rownames(res$seq)
-
-        class(res) <- "haploGen"
-        res$call <- match.call()
-        return(res)
-
-    } # end SPATIAL SIMULATIONS
-
-
-} # end haploGen
-
-
-
-
-
-
-
-
-##################
-## print.haploGen
-##################
-print.haploGen <- function(x, ...){
-
-    cat("\t\n========================")
-    cat("\t\n= simulated haplotypes =")
-    cat("\t\n=  (haploGen object)   =")
-    cat("\t\n========================\n")
-
-    cat("\nSize :", length(x$ances),"haplotypes")
-    cat("\nHaplotype length :", ncol(x$seq),"nucleotids")
-    cat("\nProportion of NA ancestors :", signif(mean(is.na(x$ances)),5))
-    cat("\nNumber of known ancestors :", sum(!is.na(x$ances)))
-    nbAncInSamp <- sum(x$ances %in% labels(x))
-    cat("\nNumber of ancestors within the sample :", nbAncInSamp)
-    cat("\nDate range :", min(x$dates,na.rm=TRUE),"-",max(x$dates,na.rm=TRUE))
-    ##nUniqSeq <- length(unique(apply(as.character(x$seq),1,paste,collapse="")))
-    ##cat("\nNumber of unique haplotypes :", nUniqSeq)
-
-    cat("\n\n= Content =")
-    for(i in 1:length(x)){
-        cat("\n")
-
-        cat(paste("$", names(x)[i], sep=""),"\n")
-        if(names(x)[i] %in% c("seq","call")) {
-            print(x[[i]])
-        } else if(names(x)[i]=="xy"){
-            print(head(x[[i]]))
-            if(nrow(x[[i]]>6)) cat("    ...\n")
-        } else cat(head(x[[i]],6), ifelse(length(x[[i]])>6,"...",""),"\n")
-    }
-
-
-    return(NULL)
-} # end print.haploGen
-
-
-
-
-
-
-##############
-## [.haploGen
-##############
-"[.haploGen" <- function(x,i,j,drop=FALSE){
-    res <- x
-    res$seq <- res$seq[i,]
-    res$ances <- res$ances[i]
-    res$dates <- res$dates[i]
-    if(!is.null(res$xy)) res$xy <- res$xy[i,,drop=FALSE]
-
-    return(res)
-}
-
-
-
-
-
-####################
-## na.omit.haploGen
-####################
-##
-## ACTUALLY THIS FUNCTION MAKES NO SENSE FOR NOW
-## AS STRAINS WITH NO ANCESTOR MAY BE ANCESTORS OF
-## OTHER STRAINS.
-##
-## na.omit.haploGen <- function(object, ...){
-##     res <- object
-##     isNA <- is.na(res$ances)
-##     res$seq <- res$seq[!isNA,]
-##     res$ances <- res$ances[!isNA]
-##     res$dates <- res$dates[!isNA]
-##     if(!is.null(res$xy)) res$xy <- res$xy[!isNA,]
-
-##     return(res)
-## }
-
-
-
-
-##################
-## labels.haploGen
-##################
-labels.haploGen <- function(object, ...){
-    return(rownames(object$seq))
-}
-
-
-
-#######################
-## as.POSIXct.haploGen
-#######################
-as.POSIXct.haploGen <- function(x, tz="", origin=as.POSIXct("2000/01/01"), ...){
-    res <- as.POSIXct(x$dates*24*3600, origin=origin)
-    return(res)
-}
-
-
-
-
-#####################
-## seqTrack.haploGen
-#####################
-seqTrack.haploGen <- function(x, optim=c("min","max"), prox.mat=NULL, ...){
-    myX <- dist.dna(x$seq, model="raw")
-    x.names <- labels(x)
-    x.dates <- as.POSIXct(x)
-    seq.length <- ncol(x$seq)
-    myX <- myX * seq.length
-    prevCall <- as.list(x$call)
-    if(is.null(prevCall$mu)){
-        mu0 <- 0.0001
-    } else {
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/adegenet -r 570


More information about the adegenet-commits mailing list