[Seqinr-commits] r1664 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Oct 12 18:30:15 CEST 2009
Author: lobry
Date: 2009-10-12 18:30:15 +0200 (Mon, 12 Oct 2009)
New Revision: 1664
Added:
pkg/R/stutterabif.R
Log:
new function for stutter ratio
Added: pkg/R/stutterabif.R
===================================================================
--- pkg/R/stutterabif.R (rev 0)
+++ pkg/R/stutterabif.R 2009-10-12 16:30:15 UTC (rev 1664)
@@ -0,0 +1,109 @@
+stutterabif <- function(abifdata, chanel, poswild,
+ datapointbefore = 70, datapointafter = 20,
+ datapointsigma = 3.5,
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ maxrfu = 1000,
+ method = "monoH.FC",
+ pms = 6,
+ fig = FALSE){
+
+ xseq <- floor((poswild-datapointbefore):(poswild+datapointafter))
+ yseq <- abifdata$Data[[DATA]][xseq]
+ #
+ # The baseline for the signal is taken as the most common value:
+ #
+ baseline <- baselineabif(yseq, maxrfu = maxrfu)
+ #
+ # Values below baseline are forced to baseline:
+ #
+ yseq[yseq < baseline] <- baseline
+ #
+ # Set baseline to zero:
+ #
+ yseq <- yseq - baseline
+ #
+ # First pass to fit a gaussian on wild allele:
+ #
+ ytheo.one <- function(p, x) p[1]*dnorm(x, p[2], p[3])
+ obj.one <- function(p) sum(( yseq - ytheo.one(p, xseq))^2)
+ guess.one <- numeric(3)
+ guess.one[1] <- datapointsigma*sqrt(2*pi)*max(yseq)
+ guess.one[2] <- poswild
+ guess.one[3] <- datapointsigma
+ suppressWarnings(nlmres.one <- nlm(obj.one, p = guess.one))
+ #
+ # Second pass to estimate stutter allele parameters. We do
+ # not take into account wild peak allele data.
+ #
+ est <- nlmres.one$estimate
+ censored <- floor(est[2] - 6*est[3]) # cut at mu - 6 sigma
+ ytheo.two <- function(p, x) p[1]*dnorm(x, p[2], p[3])
+ used <- ( xseq < censored )
+ obj.two <- function(p) sum(( yseq[used] - ytheo.two(p, xseq[used]))^2)
+ guess.two <- numeric(3)
+ guess.two[1] <- datapointsigma*sqrt(2*pi)*max(yseq[used])
+ guess.two[2] <- xseq[which.max(yseq[used])]
+ guess.two[3] <- datapointsigma
+ suppressWarnings(nlmres.two <- nlm(obj.two, p = guess.two))
+ est2 <- nlmres.two$estimate
+ #
+ # Fit a spline:
+ #
+ spfun <- splinefun(xseq, yseq, method = method)
+ xx <- seq(min(xseq), max(xseq), le = 500)
+ yy <- spfun(xx)
+ #
+ # Compute output result:
+ #
+
+ x1inf <- est2[2] - pms*est2[3]
+ x1sup <- est2[2] + pms*est2[3]
+ l1 <- optimize(spfun, interval = c(x1inf, x1sup), maximum = TRUE)$maximum
+ h1 <- spfun(l1)
+ s1 <- integrate(spfun, x1inf, x1sup)$value
+
+ x2inf <- est[2] - pms*est[3]
+ x2sup <- est[2] + pms*est[3]
+ l2 <- optimize(spfun, interval = c(x2inf, x2sup), maximum = TRUE)$maximum
+ h2 <- spfun(l2)
+ s2 <- integrate(spfun, x2inf, x2sup)$value
+
+ rh <- h1/h2
+ rs <- s1/s2
+
+ p <- list(p1 = est2[1], mu1 = est2[2], sd1 = est2[3],
+ p2 = est[1], mu2 = est[2], sd2 = est[3],
+ xseq = xseq, yseq = yseq)
+ if(fig){
+ plot(xseq, yseq,
+ main = paste("rh = ", round(rh, 5), "rs =", round(rs, 5)),
+ ylab = "RFU", las = 1,
+ xlab = "Time in datapoint units")
+ abline(h = 0, lty = 3)
+ abline(v = c(l1, l2), lty=2, col = c("red", "blue"))
+ abline(h = c(h1, h2), lty=2, col = c("red", "blue"))
+
+ #lines(xx, yy, col = "red")
+ #
+ # Stutter:
+ #
+ xx <- seq(x1inf, x1sup, le = 500)
+ yy <- spfun(xx)
+ lines(xx, yy, col = "red", lwd = 2)
+ #
+ # Wild Allele:
+ #
+ xx <- seq(x2inf, x2sup, le = 500)
+ yy <- spfun(xx)
+ lines(xx, yy, col = "blue", lwd = 2)
+ #
+ # legend:
+ #
+ legend("topleft", inset = 0.01, legend = c("Stutter product",
+ "Corresponding allele"), lty = 1, lwd = 2, col = c("red", "blue"),
+ bg = "white", title = "Datapoints used for:")
+ }
+ return(list(rh = rh, rs = rs, h1 = h1, h2 = h2, s1 = s1, s2 = s2, p = p))
+}
+
More information about the Seqinr-commits
mailing list