[Robast-commits] r296 - in pkg/RobLoxBioC: . R inst/scripts man

Wed Apr 22 14:26:55 CEST 2009

Author: stamats
Date: 2009-04-22 14:26:55 +0200 (Wed, 22 Apr 2009)
New Revision: 296

Added:
   pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R
   pkg/RobLoxBioC/man/SimStudies.Rd
Removed:
   pkg/RobLoxBioC/man/AffySimStudy.Rd
Modified:
   pkg/RobLoxBioC/DESCRIPTION
   pkg/RobLoxBioC/NAMESPACE
   pkg/RobLoxBioC/R/AffySimStudyFunction.R
   pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R
   pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
Log:
after importing the namespace of distr, the package now checks without errors or warnings. Added file for Illumina simulation study and renamed file AffySimStudy.Rd to SimStudies.Rd.

Modified: pkg/RobLoxBioC/DESCRIPTION
===================================================================

--- pkg/RobLoxBioC/DESCRIPTION	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/DESCRIPTION	2009-04-22 12:26:55 UTC (rev 296)
@@ -1,6 +1,6 @@
 Package: RobLoxBioC
 Version: 0.4
-Date: 2009-04-21
+Date: 2009-04-22
 Title: Infinitesimally robust estimators for preprocessing omics data
 Description: Functions for the determination of optimally robust
   influence curves and estimators for preprocessing omics data,

Modified: pkg/RobLoxBioC/NAMESPACE
===================================================================
--- pkg/RobLoxBioC/NAMESPACE	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/NAMESPACE	2009-04-22 12:26:55 UTC (rev 296)
@@ -1,4 +1,4 @@
-import("methods")
+import("methods", "distr")
 
 exportMethods("robloxbioc", "KolmogorovMinDist")
-export("AffySimStudy")
+export("AffySimStudy", "IlluminaSimStudy")

Modified: pkg/RobLoxBioC/R/AffySimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/R/AffySimStudyFunction.R	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/R/AffySimStudyFunction.R	2009-04-22 12:26:55 UTC (rev 296)
@@ -64,18 +64,18 @@
 
     if(plot3){
         Ergebnis1 <- list(Mean, Median, Tukey[,1], RadMinmax[,1])
-        Ergebnis2 <- list(Sd, Mad, Tukey[,2], RadMinmax[,2])
+        Ergebnis2 <- list(Sd, Mad, RadMinmax[,2])
         myCol <- brewer.pal(4, "Dark2")
         if(plot1 || plot2) dev.new()
         layout(matrix(c(1, 1, 1, 1, 3, 2, 2, 2, 2, 3), ncol = 2))
         boxplot(Ergebnis1, col = myCol, pch = 20, main = "Location")
         abline(h = 0)
-        boxplot(Ergebnis2, col = myCol, pch = 20, main = "Scale")
+        boxplot(Ergebnis2, col = myCol[c(1,2,4)], pch = 20, main = "Scale")
         abline(h = 1)
         op <- par(mar = rep(2, 4))
         plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
         legend("center", c("ML", "Med/MAD", "biweight", "rmx"),
-               fill = myCol, ncol = 5, cex = 1.5)
+               fill = myCol, ncol = 4, cex = 1.5)
         on.exit(par(op))
     }
 

Modified: pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R	2009-04-22 12:26:55 UTC (rev 296)
@@ -82,7 +82,7 @@
         op <- par(mar = rep(2, 4))
         plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
         legend("center", c("ML", "Med/MAD", "Illumina", "rmx"),
-               fill = myCol, ncol = 5, cex = 1.5)
+               fill = myCol, ncol = 4, cex = 1.5)
         on.exit(par(op))
     }
 

Modified: pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/IlluminaExample.R	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/inst/scripts/IlluminaExample.R	2009-04-22 12:26:55 UTC (rev 296)
@@ -37,16 +37,79 @@
 #save(spikeInData, compress = TRUE, file = "spikeInData.RData")
 #load(file = "spikeInData.RData")
 
-## takes more than 100 min on Intel P9500 (64bit Linux, 4 GByte RAM)
-system.time(minKD <- KolmogorovMinDist(spikeInData, Norm(), imagesPerArray = 2))
-save(minKD, compress = TRUE, file = "minKD_Illumina.RData")
+## takes about 9 hours on Intel P9500 (64bit Linux, 4 GByte RAM)
+system.time(minKD.Illumina <- KolmogorovMinDist(spikeInData, Norm(), imagesPerArray = 2))
+save(minKD.Illumina, compress = TRUE, file = "minKD_Illumina.RData")
 
+## takes about 9 hours on Intel P9500 (64bit Linux, 4 GByte RAM)
+system.time(minKD.Illumina.log <- KolmogorovMinDist(spikeInData, Norm(), log = TRUE, imagesPerArray = 2))
+save(minKD.Illumina.log, compress = TRUE, file = "minKD_Illumina_log.RData")
+
 ## load the results from R-forge ...
-con <- url("http://robast.r-forge.r-project.org/data/minKD_hgu95a.RData")
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina.RData")
 load(file = con)
 close(con)
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina_log.RData")
+load(file = con)
+close(con)
 
+## takes more than 90 min on Intel P9500 (64bit Linux, 4 GByte RAM)
+ns <- c(10:70)
+M <- length(ns)
+minKD.Illumina.norm <- matrix(NA, nrow = 50000, ncol = M)
+colnames(minKD.Illumina.norm) <- ns
+for(i in seq_len(M)){
+    tm <- proc.time()
+    print(ns[i])
+    temp <- matrix(rnorm(50000*ns[i]), ncol = ns[i])
+    minKD.Illumina.norm[,i] <- KolmogorovMinDist(temp, Norm())$dist
+    cat("Dauer:\t", proc.time()-tm, "\n")
+    save(minKD.Illumina.norm, compress = TRUE, file = "minKD_Illumina_norm.RData")
+}
 
+## load the results from R-forge
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina_norm.RData")
+load(file = con)
+close(con)
+
+#######################################
+## Figure in Kohl and Deigner (2009)
+#######################################
+res1 <- split(as.vector(minKD.Illumina$dist), as.vector(minKD.Illumina$n))[10:70]
+res2 <- split(as.vector(minKD.Illumina.log$dist), as.vector(minKD.Illumina.log$n))[10:70]
+res3 <- lapply(as.data.frame(minKD.Illumina.norm), function(x) x)
+uni.n <- rep(10:70, 3)
+
+postscript(file = "minKDIllumina.eps", height = 6, width = 9, paper = "special", 
+           horizontal = TRUE)
+par(mar = c(4, 4, 3, 1))
+plot(0, 0, type = "n", ylim = c(0, 0.49), xlim = c(0.5, 37.5), 
+     panel.first = abline(h = seq(0, 0.45, by = 0.05), lty = 2, col = "grey"), 
+     main = "Minimum Kolmogorov distance", 
+     ylab = "minimum Kolmogorov distance", 
+     xlab = "sample size", axes = FALSE)
+axis(1, c(1:61, 63:123, 125:185), labels = uni.n, cex.axis = 0.6)
+axis(2, seq(0, 0.4, by = 0.05), labels = seq(0, 0.4, by = 0.05), las = 2,
+     cex.axis = 0.8)
+box()
+boxplot(c(res1, res2, res3), at = c(1:61, 63:123, 125:185), add = TRUE, pch = 20, 
+        names = FALSE, axes = FALSE)
+abline(v = c(62, 124), lwd = 1.5)
+text(c(30, 93, 155), rep(0.48, 3), labels = c("Raw Data", "log Raw Data", "Normal Samples"),
+     font = 2)
+lines(1:61, 1/(2*(10:70)), lwd = 2)
+lines(63:123, 1/(2*(10:70)), lwd = 2)
+lines(125:185, 1/(2*(10:70)), lwd = 2)
+legend("bottomleft", legend = "minimal possible distance", lty = 1, 
+       bg = "white", cex = 0.8)
+dev.off()
+
+## Comparison of median distances
+## Table in Kohl and Deigner (2009)
+round(sapply(res1, median) - sapply(res3, median), 4)
+round(sapply(res2, median) - sapply(res3, median), 4)
+
+
 ###############################################################################
 ## The following example is based on the R code of Mark Dunning and Matt Ritchie
 ## available under

Added: pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R	                        (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R	2009-04-22 12:26:55 UTC (rev 296)
@@ -0,0 +1,102 @@
+###############################################################################
+## Simulation study comparing Illumina's default method with the rmx estimator
+###############################################################################
+
+library(RobLoxBioC)
+
+## fixed variables
+n <- 11
+M <- 1e5
+eps.lower <- 0
+eps.upper <- 0.05
+seed <- 123 ## due to 1e5 replications the influence of the seed is neglectable
+
+eps <- 0.01
+contD <- Norm(0, 9)
+(res1 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res2 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res3 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res4 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res5 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res6 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res7 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.02
+contD <- Norm(0, 9)
+(res11 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res12 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res13 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res14 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res15 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res16 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res17 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.04
+contD <- Norm(0, 9)
+(res21 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res22 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res23 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res24 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res25 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res26 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res27 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))

Deleted: pkg/RobLoxBioC/man/AffySimStudy.Rd
===================================================================
--- pkg/RobLoxBioC/man/AffySimStudy.Rd	2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/man/AffySimStudy.Rd	2009-04-22 12:26:55 UTC (rev 296)
@@ -1,58 +0,0 @@
-\name{AffySimStudy}
-\Rdversion{1.1}
-\alias{AffySimStudy}
-\title{Perform Monte-Carlo Study comparing Tukey's biweight and rmx estimators.}
-\description{
-  The function can be used to perform Monte-Carlo studies comparing Tukey's 
-  biweight and rmx estimators for normal location and scale. In addition, 
-  maximum likelihood (ML) estimators (mean and sd) and median and MAD are
-  computed. The comparison is based on the empirical MSE.
-}
-\usage{
-AffySimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05, steps = 3L, 
-             fsCor = TRUE, contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
-  \item{n}{integer; sample size, should be at least 3.}
-  \item{M}{integer; Monte-Carlo replications.}
-  \item{eps}{amount of contamination in [0, 0.5].}
-  \item{seed}{random seed.}
-  \item{eps.lower}{used by rmx estimator.}
-  \item{eps.upper}{used by rmx estimator.}
-  \item{steps}{integer; steps used for estimator construction.}
-  \item{fsCor}{logical; use finite-sample correction.}
-  \item{contD}{object of class \code{"UnivariateDistribution"}; contaminating distribution.}
-  \item{plot1}{logical; plot cdf of ideal and real distribution.}
-  \item{plot2}{logical; plot 20 (or M if M < 20) randomly selected samples.}
-  \item{plot3}{logical; generate boxplots of the results.}
-}
-\details{
-Normal location and scale with mean = 0 and sd = 1 is used as ideal model (without
-restriction due to equivariance).
-
-Since there is no estimator which yields reliable results if 50 percent or more of the
-observations are contaminated, we use a modification where we re-simulate all samples
-including at least 50 percent contaminated data.
-
-We use funtion \code{\link[RobLox]{rowRoblox}} for the computation of the rmx estimator.
-}
-\value{Data.frame including empirical MSE (standardized by sample size n) and
-relMSE with respect to the rmx estimator.
-}
-\references{
-  Affymetrix, Inc. (2002). \emph{Statistical Algorithms Description Document}.
-  Affymetrix, Santa Clara.
-
-  Kohl M. and Deigner H.P. (2009). Using infinitesimally robust estimators for 
-  preprocessing gene expression data. In preparation.
-}
-\author{Matthias Kohl \email{Matthias.Kohl at stamats.de}}
-%\note{}
-\seealso{\code{\link[RobLox]{rowRoblox}}}
-\examples{
-AffySimStudy(n = 11, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3), 
-             plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
-}
-\concept{Monte-Carlo study}
-\keyword{robust}

Copied: pkg/RobLoxBioC/man/SimStudies.Rd (from rev 294, pkg/RobLoxBioC/man/AffySimStudy.Rd)
===================================================================
--- pkg/RobLoxBioC/man/SimStudies.Rd	                        (rev 0)
+++ pkg/RobLoxBioC/man/SimStudies.Rd	2009-04-22 12:26:55 UTC (rev 296)
@@ -0,0 +1,71 @@
+\name{SimStudies}
+\Rdversion{1.1}
+\alias{AffySimStudy}
+\alias{IlluminaSimStudy}
+\title{Perform Monte-Carlo Study.}
+\description{
+  The function \code{AffySimStudy} can be used to perform Monte-Carlo studies 
+  comparing Tukey's biweight and rmx estimators for normal location and scale. 
+  The function \code{IlluminaSimStudy} can be used to perform Monte-Carlo studies 
+  comparing Illumina's default method - a Huber-type skipped mean and sd 
+  (cf. Hampel (1985)) - and rmx estimators for normal location and scale.
+  In addition, maximum likelihood (ML) estimators (mean and sd) and median and 
+  MAD are computed. The comparison is based on the empirical MSE.
+}
+\usage{
+AffySimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05, 
+             steps = 3L, fsCor = TRUE, contD, plot1 = FALSE, 
+             plot2 = FALSE, plot3 = FALSE)
+IlluminaSimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05, 
+                 steps = 3L, fsCor = TRUE, contD, plot1 = FALSE, 
+                 plot2 = FALSE, plot3 = FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{n}{integer; sample size, should be at least 3.}
+  \item{M}{integer; Monte-Carlo replications.}
+  \item{eps}{amount of contamination in [0, 0.5].}
+  \item{seed}{random seed.}
+  \item{eps.lower}{used by rmx estimator.}
+  \item{eps.upper}{used by rmx estimator.}
+  \item{steps}{integer; steps used for estimator construction.}
+  \item{fsCor}{logical; use finite-sample correction.}
+  \item{contD}{object of class \code{"UnivariateDistribution"}; contaminating distribution.}
+  \item{plot1}{logical; plot cdf of ideal and real distribution.}
+  \item{plot2}{logical; plot 20 (or M if M < 20) randomly selected samples.}
+  \item{plot3}{logical; generate boxplots of the results.}
+}
+\details{
+Normal location and scale with mean = 0 and sd = 1 is used as ideal model (without
+restriction due to equivariance).
+
+Since there is no estimator which yields reliable results if 50 percent or more of the
+observations are contaminated, we use a modification where we re-simulate all samples
+including at least 50 percent contaminated data.
+
+We use funtion \code{\link[RobLox]{rowRoblox}} for the computation of the rmx estimator.
+}
+\value{Data.frame including empirical MSE (standardized by sample size n) and
+relMSE with respect to the rmx estimator.
+}
+\references{
+  Affymetrix, Inc. (2002). \emph{Statistical Algorithms Description Document}.
+  Affymetrix, Santa Clara.
+
+  Hampel F.R. (1985). The breakdown points of the mean combined with some rejection
+  rules. Technometrics, 27(2):95-107.  
+
+  Kohl M. and Deigner H.P. (2009). Using infinitesimally robust estimators for 
+  preprocessing gene expression data. In preparation.
+}
+\author{Matthias Kohl \email{Matthias.Kohl at stamats.de}}
+%\note{}
+\seealso{\code{\link[RobLox]{rowRoblox}}}
+\examples{
+AffySimStudy(n = 11, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3), 
+             plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+IlluminaSimStudy(n = 30, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3), 
+                 plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+}
+\concept{Monte-Carlo study}
+\keyword{robust}


Property changes on: pkg/RobLoxBioC/man/SimStudies.Rd
___________________________________________________________________
Name: svn:mergeinfo
   +