[adegenet-commits] r366 - pkg/R

Thu Jun 4 21:49:30 CEST 2009

Author: jombart
Date: 2009-06-04 21:49:30 +0200 (Thu, 04 Jun 2009)
New Revision: 366

Modified:
   pkg/R/haploSim.R
Log:
first complete version with spatial simulations added as a new module; lots of code duplicated to go faster (i.e. spatial and non-spatial versions)


Modified: pkg/R/haploSim.R
===================================================================

--- pkg/R/haploSim.R	2009-06-04 17:53:47 UTC (rev 365)
+++ pkg/R/haploSim.R	2009-06-04 19:49:30 UTC (rev 366)
@@ -9,9 +9,9 @@
 ## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
 ##
 haploSim <- function(seq.length=1000, mu=0.0001,
-                    Tmax=50, mean.gen.time=5, sd.gen.time=1,
-                    mean.repro=2, sd.repro=1,
-                    max.nb.haplo=1e3){
+                     Tmax=50, mean.gen.time=5, sd.gen.time=1,
+                     mean.repro=2, sd.repro=1, max.nb.haplo=1e3,
+                     geo.sim=TRUE, grid.size=10, lambda.xy=0.5, lambdaMat.xy=NULL){
 
     ## CHECKS ##
     if(!require(ape)) stop("The ape package is required.")
@@ -23,6 +23,7 @@
     toExpand <- logical()
     mu <- mu/365 # mutation rate by day
 
+
     ## AUXILIARY FUNCTIONS ##
     ## generate sequence from scratch
     gen.seq <- function(){
@@ -93,8 +94,55 @@
         return(NULL)
     }
 
+    ## check result size and resize it if needed - spatial version
+    resize.result.xy <- function(){
+        curSize <- length(res$dates)
+        if(curSize <= max.nb.haplo) return(NULL)
+        toKeep <- rep(FALSE, curSize)
+        toKeep[sample(1:curSize, size=max.nb.haplo, replace=FALSE)] <- TRUE
+        removed.strains <- rownames(res$seq)[!toKeep]
+        res$seq <<- res$seq[toKeep,]
+        res$dates <<- res$dates[toKeep]
+        res$ances <<- res$ances[toKeep]
+        res$xy <<- res$xy[toKeep,,drop=FALSE]
+        toExpand <<- toExpand[toKeep]
+        temp <- as.character(res$ances) %in% removed.strains
+        if(any(temp)) {
+            res$ances[temp] <<- NA
+        }
 
-    ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE ##
+        return(NULL)
+    }
+
+
+    ## where does an haplotype emerges in the first place?
+    gen.xy <- function(){
+        return(sample(1:grid.size, size=2, replace=TRUE))
+    }
+
+    ## where does a transmission occur (destination)?
+    if(is.null(lambdaMat.xy)){ # use universal lambda param
+        dupli.xy <- function(cur.xy, nbLoc){
+            mvt <- rpois(2*nbLoc, lambda.xy) * sample(c(-1,1), size=2*nbLoc, replace=TRUE)
+            res <- t(matrix(mvt, nrow=2) + as.vector(cur.xy))
+            res[res < 1] <- 1
+            res[res > grid.size] <- grid.size
+            return(res)
+        }
+    } else { # use location-dependent lambda param
+        dupli.xy <- function(cur.xy){
+            lambda.xy <- lambdaMat.xy[cur.xy[1] , cur.xy[2]]
+            mvt <- rpois(2, lambda.xy) * sample(c(-1,1), size=2, replace=TRUE)
+            res <- cur.xy + mvt
+            res[res < 1] <- 1
+            res[res > grid.size] <- grid.size
+            return(res)
+        }
+    }
+
+
+
+    ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - NON SPATIAL ##
     expand.one.strain <- function(seq, date, idx){
         toExpand[idx] <<- FALSE # this one is no longer to expand
         nbDes <- nb.desc()
@@ -115,39 +163,101 @@
     }
 
 
-    ## PERFORM SIMULATIONS ##
+    ## 2nd MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE - SPATIAL ##
+    expand.one.strain.xy <- function(seq, date, idx, cur.xy){
+        toExpand[idx] <<- FALSE # this one is no longer to expand
+        nbDes <- nb.desc()
+        if(nbDes==0) return(NULL) # stop if no descendant
+        newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+        newDates <- newDates[newDates <= Tmax] # don't store future sequences
+        nbDes <- length(newDates)
+        if(nbDes==0) return(NULL) # stop if no suitable date
+        newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+        class(newSeq) <- "DNAbin" # lists of DNAbin vectors must also have class "DNAbin"
+        newSeq <- as.matrix(newSeq) # list DNAbin -> matrix DNAbin with nbDes rows
+        rownames(newSeq) <- seqname.gen(nbDes) # find new labels for these new sequences
+        res$seq <<- rbind(res$seq, newSeq) # append to general output
+        res$dates <<- c(res$dates, newDates) # append to general output
+        res$ances <<- c(res$ances, rep(rownames(res$seq)[idx], nbDes)) # append to general output
+        res$xy <<- rbind(res$xy, dupli.xy(cur.xy, nbDes))
+        toExpand <<- c(toExpand, rep(TRUE, nbDes))
+        return(NULL)
+    }
 
-    ## initialization
-    res$seq <- as.matrix(gen.seq())
-    rownames(res$seq) <- "1"
-    res$dates[1] <- 0
-    res$ances[1] <- NA
-    toExpand <- TRUE
 
-    ## simulations: isn't simplicity beautiful?
-    while(any(toExpand)){
-        idx <- min(which(toExpand))
-        expand.one.strain(res$seq[idx,], res$dates[idx], idx)
-        resize.result()
-    }
 
 
-    ## SHAPE AND RETURN OUTPUT ##
-    ## shift ances as characters to indices in others slots
-    ##     cat("\nres$ances\n")
-    ##     print(res$ances)
-    ##     cat("\nres$seq names\n")
-    ##     print(rownames(res$seq))
 
-    nbAncesNAOk <- sum(is.na(res$ances))
-    res$ances <- match(res$ances, rownames(res$seq))
-    if(sum(is.na(res$ances))> nbAncesNAOk){ # in case non-NA ancestors are not in res$seq
-        warning("NA introduced when converting ances to indices, likely indicating a bug")
-    }
+    ## PERFORM SIMULATIONS - NON SPATIAL CASE ##
+    if(!geo.sim){
+        ## initialization
+        res$seq <- as.matrix(gen.seq())
+        rownames(res$seq) <- "1"
+        res$dates[1] <- 0
+        res$ances[1] <- NA
+        toExpand <- TRUE
 
-    class(res) <- "haploSim"
-    return(res)
+        ## simulations: isn't simplicity beautiful?
+        while(any(toExpand)){
+            idx <- min(which(toExpand))
+            expand.one.strain(res$seq[idx,], res$dates[idx], idx)
+            resize.result()
+        }
 
+
+        ## SHAPE AND RETURN OUTPUT ##
+        nbAncesNAOk <- sum(is.na(res$ances))
+        res$ances <- match(res$ances, rownames(res$seq))
+        if(sum(is.na(res$ances))> nbAncesNAOk){ # in case non-NA ancestors are not in res$seq
+            warning("NA introduced when converting ances to indices, likely indicating a bug")
+        }
+
+        class(res) <- "haploSim"
+        return(res)
+
+    } # END NON-SPATIAL SIMULATIONS
+
+
+
+
+    ## PERFORM SIMULATIONS - SPATIAL CASE ##
+    if(geo.sim){
+        ## some checks
+        if(!is.null(lambdaMat.xy)) {
+            if(nrow(lambdaMax.xy) != ncol(lambdaMat.xy)) stop("lambdaMat.xy is not a square matrix")
+            if(nrow(lambdaMax.xy) != grid.size) stop("dimension of lambdaMat.xy does not match grid size")
+        }
+
+        ## initialization
+        res$seq <- as.matrix(gen.seq())
+        rownames(res$seq) <- "1"
+        res$dates[1] <- 0
+        res$ances[1] <- NA
+        res$xy <- matrix(gen.xy(), nrow=1)
+        colnames(res$xy) <- xy
+        toExpand <- TRUE
+
+        ## simulations: isn't simplicity beautiful?
+        while(any(toExpand)){
+            idx <- min(which(toExpand))
+            expand.one.strain.xy(res$seq[idx,], res$dates[idx], idx, res$xy[idx,])
+            resize.result.xy()
+        }
+
+
+        ## SHAPE AND RETURN OUTPUT ##
+        nbAncesNAOk <- sum(is.na(res$ances))
+        res$ances <- match(res$ances, rownames(res$seq))
+        if(sum(is.na(res$ances))> nbAncesNAOk){ # in case non-NA ancestors are not in res$seq
+            warning("NA introduced when converting ances to indices, likely indicating a bug")
+        }
+
+        class(res) <- "haploSim"
+        return(res)
+
+    } # end SPATIAL SIMULATIONS
+
+
 } # end haploSim
 
 
@@ -155,6 +265,8 @@
 
 
 
+
+
 ##################
 ## print.haploSim
 ##################
@@ -182,4 +294,20 @@
 
 
     return(NULL)
-}
+} # end print.haploSim
+
+
+
+
+
+
+##################
+## assign.coords
+##################
+assign.coords <- function(x, ){
+    ## CHECKS ##
+
+
+
+
+} # end assign.coords