[Robast-commits] r285 - pkg/RobLoxBioC/inst/scripts
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Apr 18 17:03:15 CEST 2009
Author: stamats
Date: 2009-04-18 17:03:14 +0200 (Sat, 18 Apr 2009)
New Revision: 285
Added:
pkg/RobLoxBioC/inst/scripts/AffySimStudy.R
pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R
Modified:
pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R
Log:
scripts updated and some new added
Added: pkg/RobLoxBioC/inst/scripts/AffySimStudy.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffySimStudy.R (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/AffySimStudy.R 2009-04-18 15:03:14 UTC (rev 285)
@@ -0,0 +1,132 @@
+###############################################################################
+## Simulation study comparing Tukey's biweight with the rmx estimator
+###############################################################################
+
+library(affy)
+library(RobLox)
+library(distr)
+library(RColorBrewer)
+
+## Load function AffySimStudy which is in file "AffySimStudyFunction.R"
+source(file = "AffySimStudyFunction.R")
+
+## fixed variables
+n <- 11
+M <- 1e5
+eps.lower <- 0
+eps.upper <- 0.05
+seed <- 123 ## due to 1e5 replications the influence of the seed is neglectable
+
+eps <- 0.01
+contD <- Norm(0, 9)
+(res1 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res2 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res3 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res4 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res5 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res6 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res7 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res8 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1.51)
+(res9 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.025
+contD <- Norm(0, 9)
+(res11 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res12 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res13 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res14 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res15 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res16 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res17 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res18 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1.51)
+(res19 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.05
+contD <- Norm(0, 9)
+(res21 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res22 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res23 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res24 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res25 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res26 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res27 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res28 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(1.51)
+(res29 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed,
+ eps.lower = eps.lower, eps.upper = eps.upper,
+ contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
Added: pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R 2009-04-18 15:03:14 UTC (rev 285)
@@ -0,0 +1,116 @@
+###############################################################################
+## Function to perform simulation study comparing Tukey's biweight with the
+## rmx estimator
+###############################################################################
+
+## n: sample size
+## M: Monte Carlo replications
+## eps: amount of contamination
+## seed: seed for simulations
+## eps.lower: eps.lower for rmx estimator
+## eps.upper: eps.upper for rmx estimator
+## steps: number of steps used for estimator construction
+## contD: contaminating distribution
+## plot1: plot densities of ideal and real situation
+## plot2: plot 20 randomly chosen samples out of the M samples
+## plot3: boxplots of the estimates
+AffySimStudy <- function(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.2,
+ steps = 3, fsCor = TRUE, contD,
+ plot1 = TRUE, plot2 = FALSE, plot3 = TRUE){
+ if(plot1){
+ from <- min(-6, q(contD)(1e-15))
+ to <- max(6, q(contD)(1-1e-15))
+ curve(dnorm, from = from, to = to, lwd = 2, n = 201,
+ main = "Comparison: ideal vs. real")
+ fun <- function(x) (1-eps)*dnorm(x)+eps*d(contD)(x)
+ curve(fun, from = from, to = to, add = TRUE, col = "orange",
+ lwd = 2, n = 201)
+ legend("topleft", legend = c("ideal", "real"),
+ fill = c("black", "orange"))
+ }
+
+ set.seed(seed)
+ r <- rbinom(n*M, prob = eps, size = 1)
+ Mid <- rnorm(n*M)
+ Mcont <- r(contD)(n*M)
+ Mre <- matrix((1-r)*Mid + r*Mcont, ncol = n)
+
+ if(plot2){
+ library(lattice)
+ ind <- sample(1:M, 20)
+ if(plot1) dev.new()
+ print(
+ stripplot(rep(1:20, each = 20) ~ as.vector(Mre[ind,]),
+ ylab = "samples", xlab = "x", pch = 20,
+ main = "Randomly chosen samples")
+ )
+ }
+
+ ## ML-estimator: mean and sd
+ Mean <- rowMeans(Mre)
+ Sd <- sqrt(rowMeans((Mre-Mean)^2))
+ ## Median and MAD
+ Median <- rowMedians(Mre)
+ Mad <- rowMedians(abs(Mre - Median))/qnorm(0.75)
+ ## Tukey 1-step + MAD
+ Tukey <- apply(Mre, 1, function(x) tukey.biweight(x))
+ Tukey <- cbind(Tukey, Mad)
+
+ ## Radius-minimax estimator
+ RadMinmax1 <- estimate(rowRoblox(Mre, eps.lower = eps.lower,
+ eps.upper = eps.upper, k = steps,
+ fsCor = fsCor))
+ RadMinmax2 <- estimate(rowRoblox(Mre, sd = Mad, eps.lower = eps.lower,
+ eps.upper = eps.upper, k = steps,
+ fsCor = fsCor))
+ RadMinmax2 <- cbind(RadMinmax2, Mad)
+
+ if(plot3){
+ Ergebnis1 <- list(Mean, Median, Tukey[,1], RadMinmax1[,1], RadMinmax2[,1])
+ Ergebnis2 <- list(Sd, Mad, Tukey[,2], RadMinmax1[,2], RadMinmax2[,2])
+ myCol <- brewer.pal(4, "Dark2")
+ if(plot1 || plot2) dev.new()
+ layout(matrix(c(1, 1, 1, 1, 3, 2, 2, 2, 2, 3), ncol = 2))
+ boxplot(Ergebnis1, col = myCol, pch = 20, main = "Location")
+ abline(h = 0)
+ boxplot(Ergebnis2, col = myCol, pch = 20, main = "Scale")
+ abline(h = 1)
+ op <- par(mar = rep(2, 4))
+ plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
+ legend("center", c("ML", "Med/MAD", "biweight", "rmx", "rmx/MAD"),
+ fill = myCol, ncol = 5, cex = 1.5)
+ par(op)
+ }
+
+ ## ML-estimator
+ MSE1.1 <- n*mean(Mean^2)
+ ## Median + MAD
+ MSE2.1 <- n*mean(Median^2)
+ ## Tukey
+ MSE3.1 <- n*mean(Tukey[,1]^2)
+ ## Radius-minimax
+ MSE4.1 <- n*mean(RadMinmax1[,1]^2)
+ MSE5.1 <- n*mean(RadMinmax2[,1]^2)
+ empMSE <- data.frame(ML = MSE1.1, Med = MSE2.1, Tukey = MSE3.1,
+ "rmx" = MSE4.1, "rmx1" = MSE5.1)
+ rownames(empMSE) <- "n x empMSE (loc)"
+
+ ## ML-estimator
+ MSE1.2 <- n*mean((Sd-1)^2)
+ ## Median + MAD
+ MSE2.2 <- n*mean((Mad-1)^2)
+ ## Tukey
+ MSE3.2 <- MSE2.2
+ ## Radius-minimax
+ MSE4.2 <- n*mean((RadMinmax1[,2]-1)^2)
+ MSE5.2 <- n*mean((RadMinmax2[,2]-1)^2)
+ empMSE <- rbind(empMSE, c(MSE1.2, MSE2.2, MSE3.2, MSE4.2, MSE5.2))
+ rownames(empMSE)[2] <- "n x empMSE (scale)"
+ empMSE <- rbind(empMSE, c(MSE1.1 + MSE1.2, MSE2.1 + MSE2.2, MSE3.1 + MSE3.2,
+ MSE4.1 + MSE4.2, MSE5.1 + MSE5.2))
+ rownames(empMSE)[3] <- "n x empMSE (loc + scale)"
+
+ empMSE
+}
+
+
Modified: pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R 2009-04-18 09:09:51 UTC (rev 284)
+++ pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R 2009-04-18 15:03:14 UTC (rev 285)
@@ -93,7 +93,7 @@
###########################################################
## takes more than 90 min on Intel P9500 (64bit Linux, 4 GByte RAM)
-ns <- 5:20
+ns <- c(5:20, 69)
M <- length(ns)
minKD.norm <- matrix(NA, nrow = 50000, ncol = M)
for(i in seq_len(M)){
@@ -129,7 +129,43 @@
lines(1:length(ns), 1/(2*ns), col = "orange", lwd = 2)
legend("topright", legend = "minimal possible distance", fill = "orange")
+#######################################
+## Figure 1 in Kohl and Deigner (2009)
+#######################################
+res1 <- split(as.vector(minKD.hgu95a$dist), as.vector(minKD.hgu95a$n))
+res2 <- split(as.vector(minKD.hgu133a$dist), as.vector(minKD.hgu133a$n))
+res3 <- lapply(as.data.frame(minKD.norm[,c(2:12,16,17)]), function(x) x)
+uni.n <- c(as.integer(names(res1)), as.integer(names(res2)), as.integer(names(res3)))
+postscript(file = "minKDAffy.eps", height = 6, width = 9, paper = "special",
+ horizontal = TRUE)
+par(mar = c(4, 4, 3, 1))
+plot(0, 0, type = "n", ylim = c(0, 0.49), xlim = c(0.5, 37.5),
+ panel.first = abline(h = seq(0, 0.45, by = 0.05), lty = 2, col = "grey"),
+ main = "Minimum Kolmogorov distance",
+ ylab = "minimum Kolmogorov distance",
+ xlab = "sample size", axes = FALSE)
+axis(1, c(1:13, 15:23, 25:37), labels = uni.n, cex.axis = 0.6)
+axis(2, seq(0, 0.45, by = 0.05), labels = seq(0, 0.45, by = 0.05), las = 2,
+ cex.axis = 0.8)
+box()
+boxplot(c(res1, res2, res3), at = c(1:13, 15:23, 25:37), add = TRUE, pch = 20,
+ names = FALSE, axes = FALSE)
+abline(v = c(14, 24), lwd = 1.5)
+text(c(7, 19, 31), rep(0.48, 3), labels = c("HGU95A", "HGU133A", "Normal Samples"),
+ font = 2)
+lines(1:13, 1/(2*uni.n[1:13]), lwd = 2)
+lines(15:23, 1/(2*uni.n[14:22]), lwd = 2)
+lines(25:37, 1/(2*uni.n[23:35]), lwd = 2)
+legend("bottomleft", legend = "minimal possible distance", lty = 1,
+ bg = "white", cex = 0.8)
+dev.off()
+
+## Comparison of median distances
+abs(sapply(res1, median) - sapply(res3, median))
+abs(sapply(res2, median) - sapply(res3, median)[-c(1,2,4,7)])
+
+
###########################################################
## assessments for MAS 5.0 and RMA including dilution data from package affycomp
###########################################################
More information about the Robast-commits
mailing list