[adegenet-commits] r402 - pkg/R

Mon Jun 15 16:56:04 CEST 2009

Author: jombart
Date: 2009-06-15 16:56:03 +0200 (Mon, 15 Jun 2009)
New Revision: 402

Modified:
   pkg/R/seqTrack.R
Log:
Hanlding of several segments everywhere. Have to test it.


Modified: pkg/R/seqTrack.R
===================================================================

--- pkg/R/seqTrack.R	2009-06-15 14:01:47 UTC (rev 401)
+++ pkg/R/seqTrack.R	2009-06-15 14:56:03 UTC (rev 402)
@@ -15,7 +15,7 @@
 #############
 ## seqTrack
 #############
-seqTrack.default <- function(x, seq.names, seq.dates, optim=c("min","max"),
+seqTrack.default <- function(x, x.names, x.dates, optim=c("min","max"),
                      prox.mat=NULL, ...){
 
     ## CHECKS ##
@@ -28,12 +28,12 @@
         which.optim <- which.max
     }
 
-    if(length(seq.names) != length(seq.dates)){
-        stop("inconsistent length for seq.dates")
+    if(length(x.names) != length(x.dates)){
+        stop("inconsistent length for x.dates")
     }
 
-    if(is.character(seq.dates)){
-        msg <- paste("seq.dates is a character vector; " ,
+    if(is.character(x.dates)){
+        msg <- paste("x.dates is a character vector; " ,
                      "please convert it as dates using 'as.POSIXct'" ,
                      "\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
         stop(msg)
@@ -45,10 +45,10 @@
         stop("prox.mat is provided but its dimensions are inconsistent with that of x")
     }
 
-    N <- length(seq.names)
+    N <- length(x.names)
     id <- 1:N
 
-    seq.dates <- as.POSIXct(round.POSIXt(seq.dates,units="days")) # round dates to the day
+    x.dates <- as.POSIXct(round.POSIXt(x.dates,units="days")) # round dates to the day
 
     x <- as.matrix(x)
     ## rename dimensions using id
@@ -57,7 +57,7 @@
         colnames(prox.mat) <- rownames(prox.mat) <- id
     }
 
-    if(length(seq.names) != nrow(x)){
+    if(length(x.names) != nrow(x)){
         stop("inconsistent dimension for x")
     }
 
@@ -87,7 +87,7 @@
 
         ## If several ancestors remain, take the oldest.
         if(length(ances)>1){
-            ances <- ances[which.min(seq.dates[ances])]
+            ances <- ances[which.min(x.dates[ances])]
         }
 
         return(ances)
@@ -96,7 +96,7 @@
 
     ## findAncestor
     findAncestor <- function(idx){ # returns the index of one seq's ancestor
-        candid <- which(seq.dates < seq.dates[idx])
+        candid <- which(x.dates < x.dates[idx])
         if(length(candid)==0) return(list(ances=NA, weight=NA))
         if(length(candid)==1) return(list(ances=candid, weight=x[idx, candid]))
         ancesId <- as.numeric(which.is.optim(x[idx, candid]))
@@ -110,9 +110,9 @@
     ## BUILD THE OUTPUT ##
     res <- sapply(id, findAncestor)
     res <- data.frame(ances=unlist(res[1,]), weight=unlist(res[2,]))
-    ances.date <- seq.dates[res[,1]]
-    res <- cbind.data.frame(id,res, date=seq.dates, ances.date)
-    rownames(res) <- seq.names
+    ances.date <- x.dates[res[,1]]
+    res <- cbind.data.frame(id,res, date=x.dates, ances.date)
+    rownames(res) <- x.names
 
     return(res)
 } # end seqTrack
@@ -127,7 +127,7 @@
 ################
 plotSeqTrack <- function(x, xy, useArrows=TRUE, annot=TRUE, dateRange=NULL,
                          col=NULL, bg="grey", add=FALSE, quiet=FALSE,
-                         showAmbiguous=TRUE, mu0=NULL, seq.length=NULL, prob=0.75,
+                         showAmbiguous=TRUE, mu0=NULL, chr.length=NULL, prob=0.75,
                          plot=TRUE,...){
 
     ## CHECKS ##
@@ -135,8 +135,8 @@
     if(ncol(x) != 5) stop("x does not have five columns")
     if(ncol(xy) != 2) stop("xy does not have two columns")
     if(nrow(xy) != nrow(x)) stop("x and xy have inconsistent dimensions")
-    if(showAmbiguous & (is.null(mu0) | is.null(seq.length)) ){
-        stop("showAmbiguous is TRUE, but mu0 and seq.length are not all provided.")
+    if(showAmbiguous & (is.null(mu0) | is.null(chr.length)) ){
+        stop("showAmbiguous is TRUE, but mu0 and chr.length are not all provided.")
     }
 
     isAmbig <- NULL
@@ -151,7 +151,7 @@
 
     ## FIND AMBIGUOUS TEMPORAL ORDERING ##
     if(showAmbiguous){
-        temp <- .pAbeforeB(x$ances.date, x$date, mu0, seq.length)
+        temp <- .pAbeforeB(x$ances.date, x$date, mu0, chr.length)
         isAmbig <- temp < prob
     }
 
@@ -168,10 +168,10 @@
 
     ## FIND THE COLOR FOR EDGES ##
     if(is.null(col)){
-        if(is.null(mu0) & is.null(seq.length)) {
+        if(is.null(mu0) & is.null(chr.length)) {
             col <- "black"
         } else {
-            ##  w <- .pAbeforeB(x$ances.date, x$date, mu0, seq.length, 200) # beware, lots of steps take time
+            ##  w <- .pAbeforeB(x$ances.date, x$date, mu0, chr.length, 200) # beware, lots of steps take time
             ##             isAmbig <-  w < prob
             ##             w[w<.5] <- .5
             ##             w <- (1 - w)
@@ -340,10 +340,10 @@
 ## x: result of seqTrack
 ## mu0: mutation rate / site / year
 ## ## p: threshold probability for a sequence not to mutate
-## .ambigDates <- function(x, mu0, seq.length, p=0.99){
+## .ambigDates <- function(x, mu0, chr.length, p=0.99){
 ##     mu <- mu0/365 # mutation rate / site / day
 ##     t <- 0:1000 # in days
-##     Pt <- (1-mu)^(t*seq.length)
+##     Pt <- (1-mu)^(t*chr.length)
 ##     ##plot(Pt,ylim=c(0,1))
 ##     nbDays <- which.min(Pt>p)-1
 
@@ -407,14 +407,15 @@
 #####################
 ##
 ## TODO:
-## 1) Change the output to retain xxx simulations | ok.
-## 2) VECTORIZE mu0 and seq.length, recycle if needed with a warning
+## 1) Change the output to retain xxx simulations | ok. -- done.
+## 2) VECTORIZE mu0 and chr.length, recycle if needed with a warning
 ## 3) uncomment, adapt, and test code for missing data
 ##
-optimize.seqTrack.default <- function(x, seq.names, seq.dates, thres=0.2, optim=c("min","max"),
-                              prox.mat=NULL, nstep=10, step.size=1e3, mu0, seq.length,
-                              rMissDate=.rUnifTimeSeq, ...){
+optimize.seqTrack.default <- function(x, x.names, x.dates, typed.chr, mu0, chr.length,
+                                      thres=0.2, optim=c("min","max"), prox.mat=NULL, nstep=10, step.size=1e3,
+                                      rMissDate=.rUnifTimeSeq, ...){
 
+
     ## CHECKS ##
     optim <- match.arg(optim)
     if(optim=="min"){
@@ -423,31 +424,42 @@
         which.optim <- which.max
     }
 
-    if(length(seq.names) != length(seq.dates)){
-        stop("inconsistent length for seq.dates")
+    if(length(x.names) != length(x.dates)){
+        stop("inconsistent length for x.dates")
     }
 
-    if(is.character(seq.dates)){
-        msg <- paste("seq.dates is a character vector; " ,
+    if(is.character(x.dates)){
+        msg <- paste("x.dates is a character vector; " ,
                      "please convert it as dates using 'as.POSIXct'" ,
                      "\n(making sure dates are given as 'YYYY/MM/DD' or 'YYYY-MM-DD').", sep="")
         stop(msg)
     }
 
-    isMissDate <- is.na(seq.dates)
+    isMissDate <- is.na(x.dates)
 
 
-    N <- length(seq.names)
+    N <- length(x.names)
     id <- 1:N
-    if(length(mu0) < N) { # recycle mu0
-        mu0 <- rep(mu0, length=N)
+    ## if(length(mu0) < N) { # recycle mu0
+    ##         mu0 <- rep(mu0, length=N)
+    ##     }
+    ##     if(length(chr.length) < N) {# recycle chr.length
+    ##         chr.length <- rep(chr.length, length=N)
+    ##     }
+
+
+    ## handle typed.chr, mu0, chr.length
+    if(!is.list(typed.chr)) {
+        stop("typed.chr must be a list")
     }
-    if(length(seq.length) < N) {# recycle seq.length
-        seq.length <- rep(seq.length, length=N)
+    if(length(typed.chr)!=N) {
+        stop("typed.chr has an inconsistent length")
     }
 
+    list.mu0 <- lapply(typed.chr, function(e) mu0[e])
+    list.chr.length <- lapply(typed.chr, function(e) chr.length[e])
 
-    seq.dates <- as.POSIXct(round.POSIXt(seq.dates,units="days")) # round dates to the day
+    x.dates <- as.POSIXct(round.POSIXt(x.dates,units="days")) # round dates to the day
 
     x <- as.matrix(x)
 
@@ -455,17 +467,18 @@
         stop("prox.mat is provided but its dimensions are inconsistent with that of x")
     }
 
+
     ## rename dimensions using id
     colnames(x) <- rownames(x) <- id
 
-    if(length(seq.names) != nrow(x)){
+    if(length(x.names) != nrow(x)){
         stop("inconsistent dimension for x")
     }
 
 
     ## SET THRESHOLD IF NEEDED ## ## NO LONGER USED
     ##  if(is.null(thres)){
-    ##         thres <- sum(seqTrack(seq.names=seq.names, seq.dates=seq.dates, W=W,
+    ##         thres <- sum(seqTrack(x.names=x.names, x.dates=x.dates, W=W,
     ##                                     optim=optim, prox.mat=prox.mat, ...)$weight, na.rm=TRUE)
     ##     }
 
@@ -479,7 +492,7 @@
 
 
     ## DO THE OPTIMISATION ##
-    RANGE.DATES <- as.integer(round(diff(range(seq.dates, na.rm=TRUE)))) # time window of the sample, in days
+    RANGE.DATES <- as.integer(round(diff(range(x.dates, na.rm=TRUE)))) # time window of the sample, in days
     NB.DATES.TO.SIM <- sum(!isMissDate)
 
 
@@ -501,8 +514,8 @@
     if(!any(isMissDate)){
         ## dates initialisation, taken from initial prior
         newDates <- sapply(1:N, function(i)
-                           .rTimeSeq(n=step.size, mu0=mu0[i], L=seq.length[i], maxNbDays=RANGE.DATES))
-        newDates <- t(newDates)*24*3600 + seq.dates
+                           .rTimeSeq(n=step.size, mu0=list.mu0[[i]], L=list.chr.length[[i]], maxNbDays=RANGE.DATES))
+        newDates <- t(newDates)*24*3600 + x.dates
 
         ## >> one step of 'step.size' simulations, all with same prior << ##
         for(i in 1:nstep){
@@ -510,7 +523,7 @@
             for(j in 1:step.size){
                 myDates <- as.POSIXct(newDates[,j])
 
-                res.new <- seqTrack(x, seq.names=seq.names, seq.dates=myDates,
+                res.new <- seqTrack(x, x.names=x.names, x.dates=myDates,
                                     optim=optim, prox.mat=prox.mat, ...)
 
                 ##ances <- cbind(ances, res.new$ances) # not needed now
@@ -572,28 +585,28 @@
     ##         argList <- list(...)
 
     ##         if(is.null(argList$dateMin) & identical(rMissDate, .rUnifTimeSeq)){ # earliest date
-    ##             argList$dateMin <- min(seq.dates,na.rm=TRUE)
+    ##             argList$dateMin <- min(x.dates,na.rm=TRUE)
     ##         } else {
-    ##             argList$dateMin[is.na(argList$dateMin)] <- min(seq.dates,na.rm=TRUE)
+    ##             argList$dateMin[is.na(argList$dateMin)] <- min(x.dates,na.rm=TRUE)
     ##         }
     ##         if(is.null(argList$dateMax) & identical(rMissDate, .rUnifTimeSeq)){ # latest date
-    ##             argList$dateMax <- max(seq.dates,na.rm=TRUE)
+    ##             argList$dateMax <- max(x.dates,na.rm=TRUE)
     ##         } else {
-    ##             argList$dateMax[is.na(argList$dateMax)] <- max(seq.dates,na.rm=TRUE)
+    ##             argList$dateMax[is.na(argList$dateMax)] <- max(x.dates,na.rm=TRUE)
     ##         }
 
     ##         argList$n <- sum(isMissDate)
 
     ##         ## Make simulations ##
     ##         for(i in 1:nstep){
-    ##             myDates <- seq.dates
+    ##             myDates <- x.dates
     ##             ## distribution for available dates
     ##             myDates[!isMissDate] <- myDates[!isMissDate] +
-    ##                 .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=seq.length, maxNbDays=2*RANGE.DATES)
+    ##                 .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=chr.length, maxNbDays=2*RANGE.DATES)
     ##             ## distribution for missing dates
     ##             myDates[isMissDate] <- do.call(rMissDate, argList)
 
-    ##             res.new <- seqTrack(seq.names=seq.names, seq.dates=myDates, W=W, optim=optim, prox.mat=prox.mat, ...)
+    ##             res.new <- seqTrack(x.names=x.names, x.dates=myDates, W=W, optim=optim, prox.mat=prox.mat, ...)
 
     ##             valRes[i] <- sum(res.new$weight,na.rm=TRUE)
     ##             if(use.new.res(res.best, res.new)){
@@ -607,7 +620,7 @@
 
     ## reconstruct the result with new dates
     res <- lapply(1:ncol(date), function(i)
-                   seqTrack(x=x, seq.names=seq.names, seq.dates=as.POSIXct(date[,i]),
+                   seqTrack(x=x, x.names=x.names, x.dates=as.POSIXct(date[,i]),
                                     optim=optim, prox.mat=prox.mat, ...))
     ances <- data.frame(lapply(res, function(e) e$ances))
     ances <- matrix(as.integer(unlist(ances)), nrow=nrow(ances))
@@ -742,7 +755,7 @@
 ###############
 ## seqTrack.ml
 ###############
-seqTrack.ml <- function(aln, seq.dates, optim=c("min","max"), ...){
+seqTrack.ml <- function(aln, x.dates, optim=c("min","max"), ...){
 
 } # end seqTrack.ml
 
@@ -807,22 +820,22 @@
 ## #############
 ## ## seqTrack
 ## #############
-## seqTrack <- function(seq.names, seq.dates, D, k=5, lag=3, ...){
+## seqTrack <- function(x.names, x.dates, D, k=5, lag=3, ...){
 
 ##     ## CHECKS ##
-##     if(length(seq.names) != length(seq.dates)){
-##         stop("inconsistent length for seq.dates")
+##     if(length(x.names) != length(x.dates)){
+##         stop("inconsistent length for x.dates")
 ##     }
 
 ##     D <- as.matrix(D)
 
-##     if(length(seq.names) != nrow(D)){
+##     if(length(x.names) != nrow(D)){
 ##         stop("inconsistent dimension for D")
 ##     }
 
 
 ##     ## ASSIGNEMENTS ##
-##     N <- length(seq.names)
+##     N <- length(x.names)
 ##     STARTPOINTS <- 1:N # global variable, modified thoughout
 ##     id <- 1:N
 ##     INPATH <- NULL
@@ -837,13 +850,13 @@
 ##     ## AUXILIARY FUNCTIONS ##
 ##     ## choose a starting sequence, as recent as possible
 ##     chooseStartPoint <- function(){
-##         return(STARTPOINTS[which.max(seq.dates[STARTPOINTS])])
+##         return(STARTPOINTS[which.max(x.dates[STARTPOINTS])])
 ##     }
 
 
 ##     ## find k closest ancestors: returns a list(id=[id of the ancestors], d=[distances to ancestors])
 ##     findAncestors <- function(currentPoint){
-##         candidates <- id[seq.dates < seq.dates[currentPoint]]
+##         candidates <- id[x.dates < x.dates[currentPoint]]
 ##         if(length(candidates)==0) return(NULL) # this will indicate the end of the path
 ##         res <- D[currentPoint,candidates]
 ##         if(length(res) <= k) return(list(id=as.integer(names(res)), d=res)) # if less than k candidates
@@ -898,7 +911,7 @@
 ##         isEnded <- function(onePath){
 ##             oldest <- onePath[length(onePath)]
 ##             temp <- setdiff(id,oldest)
-##             if(all(seq.dates[temp] < seq.dates[oldest])) return(TRUE)
+##             if(all(x.dates[temp] < x.dates[oldest])) return(TRUE)
 ##             return(FALSE)
 ##         }