[Robast-commits] r296 - in pkg/RobLoxBioC: . R inst/scripts man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Apr 22 14:26:55 CEST 2009
Author: stamats
Date: 2009-04-22 14:26:55 +0200 (Wed, 22 Apr 2009)
New Revision: 296
Added:
pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R
pkg/RobLoxBioC/man/SimStudies.Rd
Removed:
pkg/RobLoxBioC/man/AffySimStudy.Rd
Modified:
pkg/RobLoxBioC/DESCRIPTION
pkg/RobLoxBioC/NAMESPACE
pkg/RobLoxBioC/R/AffySimStudyFunction.R
pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R
pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
Log:
after importing the namespace of distr, the package now checks without errors or warnings. Added file for Illumina simulation study and renamed file AffySimStudy.Rd to SimStudies.Rd.
Modified: pkg/RobLoxBioC/DESCRIPTION
===================================================================
--- pkg/RobLoxBioC/DESCRIPTION 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/DESCRIPTION 2009-04-22 12:26:55 UTC (rev 296)
@@ -1,6 +1,6 @@
Package: RobLoxBioC
Version: 0.4
-Date: 2009-04-21
+Date: 2009-04-22
Title: Infinitesimally robust estimators for preprocessing omics data
Description: Functions for the determination of optimally robust
influence curves and estimators for preprocessing omics data,
Modified: pkg/RobLoxBioC/NAMESPACE
===================================================================
--- pkg/RobLoxBioC/NAMESPACE 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/NAMESPACE 2009-04-22 12:26:55 UTC (rev 296)
@@ -1,4 +1,4 @@
-import("methods")
+import("methods", "distr")
exportMethods("robloxbioc", "KolmogorovMinDist")
-export("AffySimStudy")
+export("AffySimStudy", "IlluminaSimStudy")
Modified: pkg/RobLoxBioC/R/AffySimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/R/AffySimStudyFunction.R 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/R/AffySimStudyFunction.R 2009-04-22 12:26:55 UTC (rev 296)
@@ -64,18 +64,18 @@
if(plot3){
Ergebnis1 <- list(Mean, Median, Tukey[,1], RadMinmax[,1])
- Ergebnis2 <- list(Sd, Mad, Tukey[,2], RadMinmax[,2])
+ Ergebnis2 <- list(Sd, Mad, RadMinmax[,2])
myCol <- brewer.pal(4, "Dark2")
if(plot1 || plot2) dev.new()
layout(matrix(c(1, 1, 1, 1, 3, 2, 2, 2, 2, 3), ncol = 2))
boxplot(Ergebnis1, col = myCol, pch = 20, main = "Location")
abline(h = 0)
- boxplot(Ergebnis2, col = myCol, pch = 20, main = "Scale")
+ boxplot(Ergebnis2, col = myCol[c(1,2,4)], pch = 20, main = "Scale")
abline(h = 1)
op <- par(mar = rep(2, 4))
plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
legend("center", c("ML", "Med/MAD", "biweight", "rmx"),
- fill = myCol, ncol = 5, cex = 1.5)
+ fill = myCol, ncol = 4, cex = 1.5)
on.exit(par(op))
}
Modified: pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/R/IlluminaSimStudyFunction.R 2009-04-22 12:26:55 UTC (rev 296)
@@ -82,7 +82,7 @@
op <- par(mar = rep(2, 4))
plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
legend("center", c("ML", "Med/MAD", "Illumina", "rmx"),
- fill = myCol, ncol = 5, cex = 1.5)
+ fill = myCol, ncol = 4, cex = 1.5)
on.exit(par(op))
}
Modified: pkg/RobLoxBioC/inst/scripts/IlluminaExample.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/IlluminaExample.R 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/inst/scripts/IlluminaExample.R 2009-04-22 12:26:55 UTC (rev 296)
@@ -37,16 +37,79 @@
#save(spikeInData, compress = TRUE, file = "spikeInData.RData")
#load(file = "spikeInData.RData")
-## takes more than 100 min on Intel P9500 (64bit Linux, 4 GByte RAM)
-system.time(minKD <- KolmogorovMinDist(spikeInData, Norm(), imagesPerArray = 2))
-save(minKD, compress = TRUE, file = "minKD_Illumina.RData")
+## takes about 9 hours on Intel P9500 (64bit Linux, 4 GByte RAM)
+system.time(minKD.Illumina <- KolmogorovMinDist(spikeInData, Norm(), imagesPerArray = 2))
+save(minKD.Illumina, compress = TRUE, file = "minKD_Illumina.RData")
+## takes about 9 hours on Intel P9500 (64bit Linux, 4 GByte RAM)
+system.time(minKD.Illumina.log <- KolmogorovMinDist(spikeInData, Norm(), log = TRUE, imagesPerArray = 2))
+save(minKD.Illumina.log, compress = TRUE, file = "minKD_Illumina_log.RData")
+
## load the results from R-forge ...
-con <- url("http://robast.r-forge.r-project.org/data/minKD_hgu95a.RData")
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina.RData")
load(file = con)
close(con)
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina_log.RData")
+load(file = con)
+close(con)
+## takes more than 90 min on Intel P9500 (64bit Linux, 4 GByte RAM)
+ns <- c(10:70)
+M <- length(ns)
+minKD.Illumina.norm <- matrix(NA, nrow = 50000, ncol = M)
+colnames(minKD.Illumina.norm) <- ns
+for(i in seq_len(M)){
+ tm <- proc.time()
+ print(ns[i])
+ temp <- matrix(rnorm(50000*ns[i]), ncol = ns[i])
+ minKD.Illumina.norm[,i] <- KolmogorovMinDist(temp, Norm())$dist
+ cat("Dauer:\t", proc.time()-tm, "\n")
+ save(minKD.Illumina.norm, compress = TRUE, file = "minKD_Illumina_norm.RData")
+}
+## load the results from R-forge
+con <- url("http://robast.r-forge.r-project.org/data/minKD_Illumina_norm.RData")
+load(file = con)
+close(con)
+
+#######################################
+## Figure in Kohl and Deigner (2009)
+#######################################
+res1 <- split(as.vector(minKD.Illumina$dist), as.vector(minKD.Illumina$n))[10:70]
+res2 <- split(as.vector(minKD.Illumina.log$dist), as.vector(minKD.Illumina.log$n))[10:70]
+res3 <- lapply(as.data.frame(minKD.Illumina.norm), function(x) x)
+uni.n <- rep(10:70, 3)
+
+postscript(file = "minKDIllumina.eps", height = 6, width = 9, paper = "special",
+ horizontal = TRUE)
+par(mar = c(4, 4, 3, 1))
+plot(0, 0, type = "n", ylim = c(0, 0.49), xlim = c(0.5, 37.5),
+ panel.first = abline(h = seq(0, 0.45, by = 0.05), lty = 2, col = "grey"),
+ main = "Minimum Kolmogorov distance",
+ ylab = "minimum Kolmogorov distance",
+ xlab = "sample size", axes = FALSE)
+axis(1, c(1:61, 63:123, 125:185), labels = uni.n, cex.axis = 0.6)
+axis(2, seq(0, 0.4, by = 0.05), labels = seq(0, 0.4, by = 0.05), las = 2,
+ cex.axis = 0.8)
+box()
+boxplot(c(res1, res2, res3), at = c(1:61, 63:123, 125:185), add = TRUE, pch = 20,
+ names = FALSE, axes = FALSE)
+abline(v = c(62, 124), lwd = 1.5)
+text(c(30, 93, 155), rep(0.48, 3), labels = c("Raw Data", "log Raw Data", "Normal Samples"),
+ font = 2)
+lines(1:61, 1/(2*(10:70)), lwd = 2)
+lines(63:123, 1/(2*(10:70)), lwd = 2)
+lines(125:185, 1/(2*(10:70)), lwd = 2)
+legend("bottomleft", legend = "minimal possible distance", lty = 1,
+ bg = "white", cex = 0.8)
+dev.off()
+
+## Comparison of median distances
+## Table in Kohl and Deigner (2009)
+round(sapply(res1, median) - sapply(res3, median), 4)
+round(sapply(res2, median) - sapply(res3, median), 4)
+
+
###############################################################################
## The following example is based on the R code of Mark Dunning and Matt Ritchie
## available under
Added: pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/IlluminaSimStudy.R 2009-04-22 12:26:55 UTC (rev 296)
@@ -0,0 +1,102 @@
+###############################################################################
+## Simulation study comparing Illumina's default method with the rmx estimator
+###############################################################################
+
+library(RobLoxBioC)
+
+## fixed variables
+n <- 11
+M <- 1e5
+eps.lower <- 0
+eps.upper <- 0.05
+seed <- 123 ## due to 1e5 replications the influence of the seed is neglectable
+
+eps <- 0.01
+contD <- Norm(0, 9)
+(res1 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res2 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res3 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res4 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res5 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res6 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res7 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.02
+contD <- Norm(0, 9)
+(res11 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res12 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res13 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res14 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res15 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res16 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res17 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.04
+contD <- Norm(0, 9)
+(res21 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res22 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res23 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res24 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res25 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(3)
+(res26 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1000)
+(res27 <- IlluminaSimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
Deleted: pkg/RobLoxBioC/man/AffySimStudy.Rd
===================================================================
--- pkg/RobLoxBioC/man/AffySimStudy.Rd 2009-04-22 11:08:04 UTC (rev 295)
+++ pkg/RobLoxBioC/man/AffySimStudy.Rd 2009-04-22 12:26:55 UTC (rev 296)
@@ -1,58 +0,0 @@
-\name{AffySimStudy}
-\Rdversion{1.1}
-\alias{AffySimStudy}
-\title{Perform Monte-Carlo Study comparing Tukey's biweight and rmx estimators.}
-\description{
- The function can be used to perform Monte-Carlo studies comparing Tukey's
- biweight and rmx estimators for normal location and scale. In addition,
- maximum likelihood (ML) estimators (mean and sd) and median and MAD are
- computed. The comparison is based on the empirical MSE.
-}
-\usage{
-AffySimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05, steps = 3L,
- fsCor = TRUE, contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE)
-}
-%- maybe also 'usage' for other objects documented here.
-\arguments{
- \item{n}{integer; sample size, should be at least 3.}
- \item{M}{integer; Monte-Carlo replications.}
- \item{eps}{amount of contamination in [0, 0.5].}
- \item{seed}{random seed.}
- \item{eps.lower}{used by rmx estimator.}
- \item{eps.upper}{used by rmx estimator.}
- \item{steps}{integer; steps used for estimator construction.}
- \item{fsCor}{logical; use finite-sample correction.}
- \item{contD}{object of class \code{"UnivariateDistribution"}; contaminating distribution.}
- \item{plot1}{logical; plot cdf of ideal and real distribution.}
- \item{plot2}{logical; plot 20 (or M if M < 20) randomly selected samples.}
- \item{plot3}{logical; generate boxplots of the results.}
-}
-\details{
-Normal location and scale with mean = 0 and sd = 1 is used as ideal model (without
-restriction due to equivariance).
-
-Since there is no estimator which yields reliable results if 50 percent or more of the
-observations are contaminated, we use a modification where we re-simulate all samples
-including at least 50 percent contaminated data.
-
-We use funtion \code{\link[RobLox]{rowRoblox}} for the computation of the rmx estimator.
-}
-\value{Data.frame including empirical MSE (standardized by sample size n) and
-relMSE with respect to the rmx estimator.
-}
-\references{
- Affymetrix, Inc. (2002). \emph{Statistical Algorithms Description Document}.
- Affymetrix, Santa Clara.
-
- Kohl M. and Deigner H.P. (2009). Using infinitesimally robust estimators for
- preprocessing gene expression data. In preparation.
-}
-\author{Matthias Kohl \email{Matthias.Kohl at stamats.de}}
-%\note{}
-\seealso{\code{\link[RobLox]{rowRoblox}}}
-\examples{
-AffySimStudy(n = 11, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3),
- plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
-}
-\concept{Monte-Carlo study}
-\keyword{robust}
Copied: pkg/RobLoxBioC/man/SimStudies.Rd (from rev 294, pkg/RobLoxBioC/man/AffySimStudy.Rd)
===================================================================
--- pkg/RobLoxBioC/man/SimStudies.Rd (rev 0)
+++ pkg/RobLoxBioC/man/SimStudies.Rd 2009-04-22 12:26:55 UTC (rev 296)
@@ -0,0 +1,71 @@
+\name{SimStudies}
+\Rdversion{1.1}
+\alias{AffySimStudy}
+\alias{IlluminaSimStudy}
+\title{Perform Monte-Carlo Study.}
+\description{
+ The function \code{AffySimStudy} can be used to perform Monte-Carlo studies
+ comparing Tukey's biweight and rmx estimators for normal location and scale.
+ The function \code{IlluminaSimStudy} can be used to perform Monte-Carlo studies
+ comparing Illumina's default method - a Huber-type skipped mean and sd
+ (cf. Hampel (1985)) - and rmx estimators for normal location and scale.
+ In addition, maximum likelihood (ML) estimators (mean and sd) and median and
+ MAD are computed. The comparison is based on the empirical MSE.
+}
+\usage{
+AffySimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05,
+ steps = 3L, fsCor = TRUE, contD, plot1 = FALSE,
+ plot2 = FALSE, plot3 = FALSE)
+IlluminaSimStudy(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.05,
+ steps = 3L, fsCor = TRUE, contD, plot1 = FALSE,
+ plot2 = FALSE, plot3 = FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{n}{integer; sample size, should be at least 3.}
+ \item{M}{integer; Monte-Carlo replications.}
+ \item{eps}{amount of contamination in [0, 0.5].}
+ \item{seed}{random seed.}
+ \item{eps.lower}{used by rmx estimator.}
+ \item{eps.upper}{used by rmx estimator.}
+ \item{steps}{integer; steps used for estimator construction.}
+ \item{fsCor}{logical; use finite-sample correction.}
+ \item{contD}{object of class \code{"UnivariateDistribution"}; contaminating distribution.}
+ \item{plot1}{logical; plot cdf of ideal and real distribution.}
+ \item{plot2}{logical; plot 20 (or M if M < 20) randomly selected samples.}
+ \item{plot3}{logical; generate boxplots of the results.}
+}
+\details{
+Normal location and scale with mean = 0 and sd = 1 is used as ideal model (without
+restriction due to equivariance).
+
+Since there is no estimator which yields reliable results if 50 percent or more of the
+observations are contaminated, we use a modification where we re-simulate all samples
+including at least 50 percent contaminated data.
+
+We use funtion \code{\link[RobLox]{rowRoblox}} for the computation of the rmx estimator.
+}
+\value{Data.frame including empirical MSE (standardized by sample size n) and
+relMSE with respect to the rmx estimator.
+}
+\references{
+ Affymetrix, Inc. (2002). \emph{Statistical Algorithms Description Document}.
+ Affymetrix, Santa Clara.
+
+ Hampel F.R. (1985). The breakdown points of the mean combined with some rejection
+ rules. Technometrics, 27(2):95-107.
+
+ Kohl M. and Deigner H.P. (2009). Using infinitesimally robust estimators for
+ preprocessing gene expression data. In preparation.
+}
+\author{Matthias Kohl \email{Matthias.Kohl at stamats.de}}
+%\note{}
+\seealso{\code{\link[RobLox]{rowRoblox}}}
+\examples{
+AffySimStudy(n = 11, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3),
+ plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+IlluminaSimStudy(n = 30, M = 100, eps = 0.02, contD = Norm(mean = 0, sd = 3),
+ plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+}
+\concept{Monte-Carlo study}
+\keyword{robust}
Property changes on: pkg/RobLoxBioC/man/SimStudies.Rd
___________________________________________________________________
Name: svn:mergeinfo
+
More information about the Robast-commits
mailing list