[adegenet-commits] r357 - in pkg: . R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Jun 4 12:21:31 CEST 2009
Author: jombart
Date: 2009-06-04 12:21:31 +0200 (Thu, 04 Jun 2009)
New Revision: 357
Added:
pkg/R/haploSim.R
Removed:
pkg/R/simuFlu.R
Modified:
pkg/DESCRIPTION
Log:
simuFlu renamed to haploSim.
Started size limitation for results.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2009-06-03 22:46:35 UTC (rev 356)
+++ pkg/DESCRIPTION 2009-06-04 10:21:31 UTC (rev 357)
@@ -9,4 +9,4 @@
Description: Classes and functions for genetic data analysis within the multivariate framework.
License: GPL (>=2)
LazyLoad: yes
-Collate: classes.R auxil.R handling.R genind2genpop.R propTyped.R basicMethods.R old2new.R makefreq.R chooseCN.R dist.genpop.R export.R setAs.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R propShared.R scale.R colorplot.R loadingplot.R sequences.R seqTrack.R simuFlu.R
+Collate: classes.R auxil.R handling.R genind2genpop.R propTyped.R basicMethods.R old2new.R makefreq.R chooseCN.R dist.genpop.R export.R setAs.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R propShared.R scale.R colorplot.R loadingplot.R sequences.R seqTrack.R haploSim.R
Copied: pkg/R/haploSim.R (from rev 356, pkg/R/simuFlu.R)
===================================================================
--- pkg/R/haploSim.R (rev 0)
+++ pkg/R/haploSim.R 2009-06-04 10:21:31 UTC (rev 357)
@@ -0,0 +1,107 @@
+############
+## haploSim
+############
+##
+## N: number of sequences to simulate
+## mu: mutation rate per nucleotid per year
+## Tmax: periode of time to simulatet
+## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
+## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
+##
+haploSim <- function(seq.length=1500, mu=0.0035,
+ Tmax=30, mean.gen.time=2.5, sd.gen.time=0.5,
+ mean.repro=1.5, sd.repro=1,
+ max.nb.strain=1e4){
+
+ ## GENERAL VARIABLES ##
+ NUCL <- c("a","t","c","g")
+ res <- list(seq=list(), dates=integer(), ances=integer())
+ toExpand <- logical()
+ mu <- mu/365 # mutation rate by day
+
+ ## AUXILIARY FUNCTIONS ##
+ ## generate sequence from scratch
+ gen.seq <- function(){
+ return(sample(NUCL, size=seq.length, replace=TRUE))
+ }
+
+ ## create substitutions for defined SNPs
+ substi <- function(snp){
+ res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
+ return(res)
+ }
+
+ ## duplicate a sequence (including possible mutations)
+ seq.dupli <- function(seq){
+ toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
+ res <- seq
+ if(sum(toChange)>0) {
+ res[toChange] <- substi(res[toChange])
+ }
+ return(res)
+ }
+
+ ## how many days before duplication occurs ?
+ time.dupli <- function(){
+ res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
+ res[res<0] <- 0
+ return(res)
+ }
+
+ ## when duplication occurs?
+ date.dupli <- function(curDate){
+ res <- curDate + time.dupli()
+ return(res)
+ }
+
+ ## how many duplication/transmission occur?
+ nb.desc <- function(){
+ res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
+ res[res<0] <- 0
+ return(res)
+ }
+
+ ## check result size and bound it
+ resize.result <- function(){
+ if(length(res$date) < max.nb.strain) return(NULL)
+
+ }
+
+
+ ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE ##
+ expand.one.strain <- function(seq, date, idx){
+ toExpand[idx] <<- FALSE # this one is no longer to expand
+ nbDes <- nb.desc()
+ if(nbDes==0) return(NULL) # stop if no descendant
+ newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
+ newDates <- newDates[newDates <= Tmax] # don't store future sequences
+ nbDes <- length(newDates)
+ if(nbDes==0) return(NULL) # stop if no suitable date
+ newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
+ res$seq <<- c(res$seq, newSeq) # append to general output
+ res$dates <<- c(res$dates, newDates) # append to general output
+ res$ances <<- c(res$ances, rep(idx, nbDes)) # append to general output
+ toExpand <<- c(toExpand, rep(TRUE, nbDes))
+ return(NULL)
+ }
+
+
+ ## PERFORM SIMULATIONS ##
+
+ ## initialization
+ res$seq[[1]] <- gen.seq()
+ res$dates[1] <- 0
+ res$ances[1] <- NA
+ toExpand <- TRUE
+
+ ## simulations: isn't simplicity beautiful?
+ while(any(toExpand)){
+ idx <- min(which(toExpand))
+ expand.one.strain(res$seq[[idx]], res$dates[idx], idx)
+ }
+
+
+ ## RETURN OUTPUT ##
+ return(res)
+
+} # end haploSim
Property changes on: pkg/R/haploSim.R
___________________________________________________________________
Name: svn:mergeinfo
+
Deleted: pkg/R/simuFlu.R
===================================================================
--- pkg/R/simuFlu.R 2009-06-03 22:46:35 UTC (rev 356)
+++ pkg/R/simuFlu.R 2009-06-04 10:21:31 UTC (rev 357)
@@ -1,99 +0,0 @@
-###########
-## simuFlu
-###########
-##
-## N: number of sequences to simulate
-## Tmax: periode of time to simulatet
-## mean.gen.time, sd.gen.time: average time for transmission and its standard deviation (normal dist)
-## mean.repro, sd.repro: average number of transmissions and its standard deviation (normal dist)
-##
-simuFlu <- function(seq.length=1500, mu=0.0035,
- Tmax=30, mean.gen.time=2.5, sd.gen.time=0.5,
- mean.repro=1.5, sd.repro=1){
-
- ## GENERAL VARIABLES ##
- NUCL <- c("a","t","c","g")
- res <- list(seq=list(), dates=integer(), ances=integer())
- toExpand <- logical()
-
-
- ## AUXILIARY FUNCTIONS ##
- ## generate sequence from scratch
- gen.seq <- function(){
- return(sample(NUCL, size=seq.length, replace=TRUE))
- }
-
- ## create substitutions for defined SNPs
- substi <- function(snp){
- res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
- return(res)
- }
-
- ## duplicate a sequence (including possible mutations)
- seq.dupli <- function(seq){
- toChange <- as.logical(rbinom(n=seq.length, size=1, prob=mu))
- res <- seq
- if(sum(toChange)>0) {
- res[toChange] <- substi(res[toChange])
- }
- return(res)
- }
-
- ## how many days before duplication occurs ?
- time.dupli <- function(){
- res <- round(rnorm(1, mean=mean.gen.time, sd=sd.gen.time))
- res[res<0] <- 0
- return(res)
- }
-
- ## when duplication occurs?
- date.dupli <- function(curDate){
- res <- curDate + time.dupli()
- return(res)
- }
-
- ## how many duplication/transmission occur?
- nb.desc <- function(){
- res <- round(rnorm(1, mean=mean.repro, sd=sd.repro))
- res[res<0] <- 0
- return(res)
- }
-
-
- ## MAIN SUB-FUNCTION: EXPANDING FROM ONE SEQUENCE ##
- expand.one.strain <- function(seq, date, idx){
- toExpand[idx] <<- FALSE # this one is no longer to expand
- nbDes <- nb.desc()
- if(nbDes==0) return(NULL) # stop if no descendant
- newDates <- sapply(1:nbDes, function(i) date.dupli(date)) # find dates for descendants
- newDates <- newDates[newDates <= Tmax] # don't store future sequences
- nbDes <- length(newDates)
- if(nbDes==0) return(NULL) # stop if no suitable date
- newSeq <- lapply(1:nbDes, function(i) seq.dupli(seq)) # generate new sequences
- res$seq <<- c(res$seq, newSeq) # append to general output
- res$dates <<- c(res$dates, newDates) # append to general output
- res$ances <<- c(res$ances, rep(idx, nbDes)) # append to general output
- toExpand <<- c(toExpand, rep(TRUE, nbDes))
- return(NULL)
- }
-
-
- ## PERFORM SIMULATIONS ##
-
- ## initialization
- res$seq[[1]] <- gen.seq()
- res$dates[1] <- 0
- res$ances[1] <- NA
- toExpand <- TRUE
-
- ## simulations: isn't simplicity beautiful?
- while(any(toExpand)){
- idx <- min(which(toExpand))
- expand.one.strain(res$seq[[idx]], res$dates[idx], idx)
- }
-
-
- ## RETURN OUTPUT ##
- return(res)
-
-} # end simuFlu
More information about the adegenet-commits
mailing list