[adegenet-commits] r318 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu May 21 20:16:56 CEST 2009
Author: jombart
Date: 2009-05-21 20:16:56 +0200 (Thu, 21 May 2009)
New Revision: 318
Modified:
pkg/R/seqTrack.R
Log:
New optimization routine is now time-stochastic. Not tested.
Modified: pkg/R/seqTrack.R
===================================================================
--- pkg/R/seqTrack.R 2009-05-21 16:52:34 UTC (rev 317)
+++ pkg/R/seqTrack.R 2009-05-21 18:16:56 UTC (rev 318)
@@ -223,7 +223,7 @@
##############
-## .pAbeforeB
+## .pAbeforeB # no longer used, never totally finished
##############
##
## proba that a sequence A, sampled a time Ta,
@@ -232,30 +232,55 @@
## - Ta, Tb: sampling times (Ta < Tb)
## - mu0: mutation rate / site / year
-.pAbeforeB <- function(Ta, Tb, mu0){
+## .pAbeforeB <- function(Ta, Tb, mu0){
+## mu <- mu0/365 # mutation rate / site / day
+## t <- 1:1000 # in days
+## Pt <- (1-mu)^(t*seq.length)
+
+## ## define the range of surrounding days
+## timeSep <- Tb-Ta
+## rangeSize <- timeSep + 2000 -1 + 2
+## t <- 1:rangeSize # idx of Ta = 1001; idx of Tb = 1001 + timeSep
+
+## ## define probabilities to observe A (resp, B) by days
+## Pa <- rep(0, rangeSize)
+## Pa[1:2001] <- c(sort(Pt),1,Pt) # proba to observe A for surrounding days
+## Pb <- rep(0, rangeSize)
+## Pb[timeSep+(1:2001)] <- c(sort(Pt),1,Pt) # proba to observe B for surrounding days
+
+## ## compute proba A preceeded B for a given day
+## f1 <- function(day){
+## L <- rangeSize
+## if(day==L) return(0)
+## idSum <- seq(day+1, L, by=1)
+## res <- Pa[day] * sum(Pb[idSum])
+## return(res)
+## } # end f1
+## }
+
+
+
+#############
+## .dTimeSeq
+#############
+.dTimeSeq <- function(mu0, L, maxNbDays=100){
mu <- mu0/365 # mutation rate / site / day
- t <- 1:1000 # in days
+ t <- 0:maxNbDays # in days added / substracted
Pt <- (1-mu)^(t*seq.length)
+ t <- c(-rev(t[-1]), t)
+ Pt <- c(rev(Pt[-1]), Pt)
+ return(list(t, Pt))
+}
- ## define the range of surrounding days
- timeSep <- Tb-Ta
- rangeSize <- timeSep + 2000 -1 + 2
- t <- 1:rangeSize # idx of Ta = 1001; idx of Tb = 1001 + timeSep
- ## define probabilities to observe A (resp, B) by days
- Pa <- rep(0, rangeSize)
- Pa[1:2001] <- c(sort(Pt),1,Pt) # proba to observe A for surrounding days
- Pb <- rep(0, rangeSize)
- Pb[timeSep+(1:2001)] <- c(sort(Pt),1,Pt) # proba to observe B for surrounding days
-
- ## compute proba A preceeded B for a given day
- f1 <- function(day){
- L <- rangeSize
- if(day==L) return(0)
- idSum <- seq(day+1, L, by=1)
- res <- Pa[day] * sum(Pb[idSum])
- return(res)
- } # end f1
+#############
+## .rTimeSeq
+#############
+.rTimeSeq <- function(n, mu0, L, maxNbDays=100){
+ temp <- .dTimeSeq(mu0, L, maxNbDays)
+ p <- temp[[2]]/sum(temp[[2]])
+ res <- sample(temp[[1]], size=n, replace=TRUE, prob=p)
+ return(res)
}
@@ -263,7 +288,6 @@
-
###############
## .ambigDates
###############
@@ -295,8 +319,8 @@
#####################
## optimize.seqTrack
#####################
-optimize.seqTrack <- function(seq.names, seq.dates, W, optim=c("min","max"),
- mu0, seq.length, p=0.99, ...){
+optimize.seqTrack <- function(nsim, seq.names, seq.dates, W, optim=c("min","max"),
+ proxMat=NULL, mu0, seq.length, p=0.99, ...){
## CHECKS ##
optim <- match.arg(optim)
@@ -321,6 +345,11 @@
id <- 1:N
W <- as.matrix(W)
+
+ if(!is.null(proxMat) && !identical(dim(proxMat),dim(W))) {
+ stop("proxMat is provided but its dimensions are inconsistent with that of W")
+ }
+
## rename dimensions using id
colnames(W) <- rownames(W) <- id
@@ -330,65 +359,77 @@
## AUXILIARY FUNCTIONS ##
- valRes <- function(res){
- return(sum(unlist(res[,3]), na.rm=TRUE))
- }
+ ## valRes <- function(res){
+ ## return(sum(unlist(res[,3]), na.rm=TRUE))
+ ## }
- useNewRes <- function(oldRes,newRes, optim){
- if(optim=="min"){
- res <- valRes(oldRes) > valRes(newRes)
- } else {
- res <- valRes(oldRes) < valRes(newRes)
- }
+ ## useNewRes <- function(oldRes,newRes, optim){
+ ## if(optim=="min"){
+ ## res <- valRes(oldRes) > valRes(newRes)
+ ## } else {
+ ## res <- valRes(oldRes) < valRes(newRes)
+ ## }
- return(res)
- }
+ ## return(res)
+ ## }
## FIND INITIAL SEQTRACK RESULT ##
res.ini <- seqTrack(seq.names, seq.dates, W, optim=c("min","max"))
-
- ## LOOK FOR AMBIGUOUS DATES ##
- isAmbig <- .ambigDates(res.ini, mu0, seq.dates, seq.length, p=0.99)
- if(!any(isAmbig)){
- cat("\nNo ambiguity in dates was found; unique solution returned.\n")
- return(res)
+ ## to compare results
+ use.new.res <- function(res.old, res.new){
+ if(optim=="min"){
+ return(sum(res.old$weight, na.rm=TRUE) > sum(res.new$weight, na.rm=TRUE))
+ } else {
+ return(sum(res.old$weight, na.rm=TRUE) < sum(res.new$weight, na.rm=TRUE))
+ }
}
- ## DEFINE NEW TEMPORAL ORDER ##
- permutDate <- function(res, id, newDates){ # id: segment whose vertices are permuted
- idAnc <- unlist(res[2,id])
- idDes <- unlist(res[1,id])
- temp <- newDates[idAnc]
- newDates[idAnc] <- newDates[idDes]
- newDates[idDes] <- temp
- return(newDates)
- }
+ ## ## LOOK FOR AMBIGUOUS DATES ##
+ ## isAmbig <- .ambigDates(res.ini, mu0, seq.length, p=0.99)
+ ## if(!any(isAmbig)){
+ ## cat("\nNo ambiguity in dates was found; unique solution returned.\n")
+ ## return(res)
+ ## }
+ ## ## DEFINE NEW TEMPORAL ORDER ##
+ ## permutDate <- function(res, id, newDates){ # id: segment whose vertices are permuted
+ ## idAnc <- unlist(res[2,id])
+ ## idDes <- unlist(res[1,id])
+ ## temp <- newDates[idAnc]
+ ## newDates[idAnc] <- newDates[idDes]
+ ## newDates[idDes] <- temp
+ ## return(newDates)
+ ## }
+
+
## DO THE OPTIMISATION ##
- res.cur <- res.ini # initialization
- idToPermut <- which(isAmbig) # indices of segments to permut
- dates.cur <- order(seq.dates) # order matching initial one
- permuted <- integer(0) # keeps track of permutated segments (by the index of the descendent)
+ RANGE.DATES <- diff(range(seq.dates)) # time window of the sample, in days
+ NB.DATES.TO.SIM <- length(seq.dates)
- for(i in idToPermut){
- dates.new <- permutDate(res.cur, i, dates.cur) # new temporal order
- res.new <- seqTrack(seq.names, newDates, W, optim=c("min","max")) # new result
- if(useNewRes(res.cur, res.new, optim)){ # keep the best result
- res.cur <- res.new
- dates.cur <- dates.new
- permuted <- c(permuted, i)
+
+ ## for loop is not to slow for < 1e6 rep
+ ## and allows not to handle huge objects
+ ## (which would grow exponentially)
+
+ res.best <- res.cur # initialization
+ valRes <- numeric(nsim)
+
+ for(i in 1:nsim){
+ myDates <- .rTimeSeq(n=NB.DATES.TO.SIM, mu0=mu0, L=seq.length, maxNbDays=2*RANGE.DATES)
+ res.new <- seqTrack(seq.names=seq.names, seq.dates=myDates, W=W, optim=optim, proxMat=proxMat, ...)
+ valRes[i] <- sum(res.new$weight)
+ if(use.new.res(res.best, res.new)){
+ res.best <- res.new
}
}
## RESULT ##
- res <- list(res=res.cur, permutID=permuted, newDates=dates.cur)
- nperm <- length(permuted)
- cat("\nBest result found after",nperm,"permutations of ambiguous dates\n")
+ res <- list(best=res.best, valsim=valRes)
return(res)
} # optimize.seqTrack
@@ -411,6 +452,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#########################
## OLD ADD-HOC VERSION
#########################
More information about the adegenet-commits
mailing list