[adegenet-commits] r333 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu May 28 16:30:11 CEST 2009


Author: jombart
Date: 2009-05-28 16:30:11 +0200 (Thu, 28 May 2009)
New Revision: 333

Modified:
   pkg/R/seqTrack.R
Log:
added a function to draw randomly dates between two dates.


Modified: pkg/R/seqTrack.R
===================================================================
--- pkg/R/seqTrack.R	2009-05-28 10:30:57 UTC (rev 332)
+++ pkg/R/seqTrack.R	2009-05-28 14:30:11 UTC (rev 333)
@@ -295,29 +295,43 @@
 
 
 
+#################
+## .rUnifTimeSeq
+#################
+.rUnifTimeSeq <- function(n, dateMin, dateMax){
+    rangeSize <- as.integer(round(diff(c(dateA,dateB))))
+    nbDays <- sample(1:nbDays, n, replace=TRUE)
+    res <- dateMin + nbDays*3600*24
+    res <- round(res, units="days")
+    return(res)
+}
 
+
+
+
+
 ###############
 ## .ambigDates
 ###############
 ## x: result of seqTrack
 ## mu0: mutation rate / site / year
-## p: threshold probability for a sequence not to mutate
-.ambigDates <- function(x, mu0, seq.length, p=0.99){
-    mu <- mu0/365 # mutation rate / site / day
-    t <- 0:1000 # in days
-    Pt <- (1-mu)^(t*seq.length)
-    ##plot(Pt,ylim=c(0,1))
-    nbDays <- which.min(Pt>p)-1
+## ## p: threshold probability for a sequence not to mutate
+## .ambigDates <- function(x, mu0, seq.length, p=0.99){
+##     mu <- mu0/365 # mutation rate / site / day
+##     t <- 0:1000 # in days
+##     Pt <- (1-mu)^(t*seq.length)
+##     ##plot(Pt,ylim=c(0,1))
+##     nbDays <- which.min(Pt>p)-1
 
-    isNA <- is.na(x[,2])
-    date.to <- x$date
-    date.from <- x$ances.date
+##     isNA <- is.na(x[,2])
+##     date.to <- x$date
+##     date.from <- x$ances.date
 
-    res <- (date.to-date.from) < nbDays*2
-    res[is.na(res)] <- FALSE
-    return(res)
+##     res <- (date.to-date.from) < nbDays*2
+##     res[is.na(res)] <- FALSE
+##     return(res)
 
-} # end checkTime
+## } # end checkTime
 
 
 
@@ -360,7 +374,7 @@
 ## optimize.seqTrack
 #####################
 optimize.seqTrack <- function(nsim, seq.names, seq.dates, W, optim=c("min","max"),
-                              proxMat=NULL, mu0, seq.length, ...){
+                              proxMat=NULL, mu0, seq.length, rMissDate=NULL, ...){
 
     ## CHECKS ##
     optim <- match.arg(optim)
@@ -381,6 +395,9 @@
         stop(msg)
     }
 
+    isMissDate <- is.na(seq.dates)
+
+
     N <- length(seq.names)
     id <- 1:N
 
@@ -399,21 +416,7 @@
 
 
     ## AUXILIARY FUNCTIONS ##
-    ## valRes <- function(res){
-    ##         return(sum(unlist(res[,3]), na.rm=TRUE))
-    ##     }
 
-    ##     useNewRes <- function(oldRes,newRes, optim){
-    ##         if(optim=="min"){
-    ##             res <- valRes(oldRes) > valRes(newRes)
-    ##         } else {
-    ##             res <- valRes(oldRes) < valRes(newRes)
-    ##         }
-
-    ##         return(res)
-    ##     }
-
-
     ## FIND INITIAL SEQTRACK RESULT ##
     res.ini <- seqTrack(seq.names, seq.dates, W, optim=c("min","max"))
 
@@ -428,8 +431,8 @@
 
 
     ## DO THE OPTIMISATION ##
-    RANGE.DATES <- diff(range(seq.dates)) # time window of the sample, in days
-    NB.DATES.TO.SIM <- length(seq.dates)
+    RANGE.DATES <- as.integer(round(diff(range(seq.dates, na.rm=TRUE)))) # time window of the sample, in days
+    NB.DATES.TO.SIM <- sum(!isMissDate)
 
 
     ## for loop is not to slow for < 1e6 rep
@@ -439,16 +442,61 @@
     res.best <- res.ini # initialization
     valRes <- numeric(nsim)
 
-    for(i in 1:nsim){
-        myDates <- seq.dates + .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=seq.length, maxNbDays=2*RANGE.DATES)
-        res.new <- seqTrack(seq.names=seq.names, seq.dates=myDates, W=W, optim=optim, proxMat=proxMat, ...)
-        valRes[i] <- sum(res.new$weight,na.rm=TRUE)
-        if(use.new.res(res.best, res.new)){
-            res.best <- res.new
+
+    ## DEFAULT CASE: NO MISSING DATES
+    if(!any(isMissDate)){
+        for(i in 1:nsim){
+            myDates <- seq.dates + .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=seq.length, maxNbDays=2*RANGE.DATES)
+            res.new <- seqTrack(seq.names=seq.names, seq.dates=myDates, W=W, optim=optim, proxMat=proxMat, ...)
+            valRes[i] <- sum(res.new$weight,na.rm=TRUE)
+            if(use.new.res(res.best, res.new)){
+                res.best <- res.new
+            }
         }
     }
 
 
+    ## OTHER CASE: HANDLE MISSING DATES
+    if(any(isMissDate)){
+
+        ## Handle distribution and its parameters ##
+        argList <- list(...)
+        if(is.null(rMissDate)) {
+            rMissDate <- runif # distribution function
+        }
+        if(is.null(argList$min) & identical(rMissDate, runif)){ # earliest date
+            argList$min <- min(seq.dates,na.rm=TRUE)
+        } else {
+            argList$min[is.na(argList$min)] <- min(seq.dates,na.rm=TRUE) - RANGE.DATES*0.5
+        }
+        if(is.null(argList$max) & identical(rMissDate, runif)){ # latest date
+            argList$max <- max(seq.dates,na.rm=TRUE)
+        } else {
+            argList$max[is.na(argList$max)] <- max(seq.dates,na.rm=TRUE) + RANGE.DATES*0.5
+        }
+
+        argList$n <- sum(isMissDate)
+
+        ## Make simulations ##
+        for(i in 1:nsim){
+            myDates <- seq.dates
+            ## distribution for available dates
+            myDates[!isMissDate] <- myDates[!isMissDate] +
+                .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=seq.length, maxNbDays=2*RANGE.DATES)
+            ## distribution for missing dates
+            myDates[isMissDate] <-  min(seq.dates,na.rm=TRUE) +
+                do.call(rMissDate, argList) * 24*3600
+
+            res.new <- seqTrack(seq.names=seq.names, seq.dates=myDates, W=W, optim=optim, proxMat=proxMat, ...)
+
+            valRes[i] <- sum(res.new$weight,na.rm=TRUE)
+            if(use.new.res(res.best, res.new)){
+                res.best <- res.new
+            }
+        }
+    }
+
+
     ## RESULT ##
     res <- list(best=res.best, valsim=valRes)
     return(res)



More information about the adegenet-commits mailing list