[Robast-commits] r285 - pkg/RobLoxBioC/inst/scripts

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Apr 18 17:03:15 CEST 2009


Author: stamats
Date: 2009-04-18 17:03:14 +0200 (Sat, 18 Apr 2009)
New Revision: 285

Added:
   pkg/RobLoxBioC/inst/scripts/AffySimStudy.R
   pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R
Modified:
   pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R
Log:
scripts updated and some new added

Added: pkg/RobLoxBioC/inst/scripts/AffySimStudy.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffySimStudy.R	                        (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/AffySimStudy.R	2009-04-18 15:03:14 UTC (rev 285)
@@ -0,0 +1,132 @@
+###############################################################################
+## Simulation study comparing Tukey's biweight with the rmx estimator
+###############################################################################
+
+library(affy)
+library(RobLox)
+library(distr)
+library(RColorBrewer)
+
+## Load function AffySimStudy which is in file "AffySimStudyFunction.R"
+source(file = "AffySimStudyFunction.R")
+
+## fixed variables
+n <- 11
+M <- 1e5
+eps.lower <- 0
+eps.upper <- 0.05
+seed <- 123 ## due to 1e5 replications the influence of the seed is neglectable
+
+eps <- 0.01
+contD <- Norm(0, 9)
+(res1 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res2 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res3 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res4 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res5 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res6 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res7 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res8 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1.51)
+(res9 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.025
+contD <- Norm(0, 9)
+(res11 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res12 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res13 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res14 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res15 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res16 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res17 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res18 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(1.51)
+(res19 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+
+eps <- 0.05
+contD <- Norm(0, 9)
+(res21 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Td(df = 3)
+(res22 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Cauchy()
+(res23 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(3, 1)
+(res24 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 1)
+(res25 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(10, 1)
+(res26 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(5, 9)
+(res27 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Dirac(100)
+(res28 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))
+contD <- Norm(1.51)
+(res29 <- AffySimStudy(n = n, M = M, eps = eps, seed = seed, 
+                     eps.lower = eps.lower, eps.upper = eps.upper, 
+                     contD = contD, plot1 = FALSE, plot2 = FALSE, plot3 = FALSE))

Added: pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R	                        (rev 0)
+++ pkg/RobLoxBioC/inst/scripts/AffySimStudyFunction.R	2009-04-18 15:03:14 UTC (rev 285)
@@ -0,0 +1,116 @@
+###############################################################################
+## Function to perform simulation study comparing Tukey's biweight with the 
+## rmx estimator
+###############################################################################
+
+## n: sample size
+## M: Monte Carlo replications
+## eps: amount of contamination
+## seed: seed for simulations
+## eps.lower: eps.lower for rmx estimator
+## eps.upper: eps.upper for rmx estimator
+## steps: number of steps used for estimator construction
+## contD: contaminating distribution
+## plot1: plot densities of ideal and real situation
+## plot2: plot 20 randomly chosen samples out of the M samples
+## plot3: boxplots of the estimates
+AffySimStudy <- function(n, M, eps, seed = 123, eps.lower = 0, eps.upper = 0.2, 
+                         steps = 3, fsCor = TRUE, contD, 
+                         plot1 = TRUE, plot2 = FALSE, plot3 = TRUE){
+    if(plot1){
+        from <- min(-6, q(contD)(1e-15))
+        to <- max(6, q(contD)(1-1e-15))
+        curve(dnorm, from = from, to = to, lwd = 2, n = 201, 
+              main = "Comparison: ideal vs. real")
+        fun <- function(x) (1-eps)*dnorm(x)+eps*d(contD)(x)
+        curve(fun, from = from, to = to, add = TRUE, col = "orange", 
+              lwd = 2, n = 201)
+        legend("topleft", legend = c("ideal", "real"), 
+              fill = c("black", "orange"))
+    }
+
+    set.seed(seed)
+    r <- rbinom(n*M, prob = eps, size = 1)
+    Mid <- rnorm(n*M)
+    Mcont <- r(contD)(n*M)
+    Mre <- matrix((1-r)*Mid + r*Mcont, ncol = n)
+
+    if(plot2){
+        library(lattice)
+        ind <- sample(1:M, 20)
+        if(plot1) dev.new()
+        print(
+          stripplot(rep(1:20, each = 20) ~ as.vector(Mre[ind,]), 
+          ylab = "samples", xlab = "x", pch = 20,
+          main = "Randomly chosen samples")
+        )
+    }
+
+    ## ML-estimator: mean and sd
+    Mean <- rowMeans(Mre)
+    Sd <- sqrt(rowMeans((Mre-Mean)^2))
+    ## Median and MAD
+    Median <- rowMedians(Mre)
+    Mad <- rowMedians(abs(Mre - Median))/qnorm(0.75)
+    ## Tukey 1-step + MAD
+    Tukey <- apply(Mre, 1, function(x) tukey.biweight(x))
+    Tukey <- cbind(Tukey, Mad)
+
+    ## Radius-minimax estimator
+    RadMinmax1 <- estimate(rowRoblox(Mre, eps.lower = eps.lower, 
+                                    eps.upper = eps.upper, k = steps,
+                                    fsCor = fsCor))
+    RadMinmax2 <- estimate(rowRoblox(Mre, sd = Mad, eps.lower = eps.lower, 
+                                    eps.upper = eps.upper, k = steps,
+                                    fsCor = fsCor))
+    RadMinmax2 <- cbind(RadMinmax2, Mad)
+
+    if(plot3){
+        Ergebnis1 <- list(Mean, Median, Tukey[,1], RadMinmax1[,1], RadMinmax2[,1])
+        Ergebnis2 <- list(Sd, Mad, Tukey[,2], RadMinmax1[,2], RadMinmax2[,2])
+        myCol <- brewer.pal(4, "Dark2")
+        if(plot1 || plot2) dev.new()
+        layout(matrix(c(1, 1, 1, 1, 3, 2, 2, 2, 2, 3), ncol = 2))
+        boxplot(Ergebnis1, col = myCol, pch = 20, main = "Location")
+        abline(h = 0)
+        boxplot(Ergebnis2, col = myCol, pch = 20, main = "Scale")
+        abline(h = 1)
+        op <- par(mar = rep(2, 4))
+        plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
+        legend("center", c("ML", "Med/MAD", "biweight", "rmx", "rmx/MAD"),
+               fill = myCol, ncol = 5, cex = 1.5)
+        par(op)
+    }
+
+    ## ML-estimator
+    MSE1.1 <- n*mean(Mean^2)
+    ## Median + MAD
+    MSE2.1 <- n*mean(Median^2)
+    ## Tukey
+    MSE3.1 <- n*mean(Tukey[,1]^2)
+    ## Radius-minimax
+    MSE4.1 <- n*mean(RadMinmax1[,1]^2)
+    MSE5.1 <- n*mean(RadMinmax2[,1]^2)
+    empMSE <- data.frame(ML = MSE1.1, Med = MSE2.1, Tukey = MSE3.1, 
+                         "rmx" = MSE4.1, "rmx1" = MSE5.1)
+    rownames(empMSE) <- "n x empMSE (loc)"
+
+    ## ML-estimator
+    MSE1.2 <- n*mean((Sd-1)^2)
+    ## Median + MAD
+    MSE2.2 <- n*mean((Mad-1)^2)
+    ## Tukey
+    MSE3.2 <- MSE2.2
+    ## Radius-minimax
+    MSE4.2 <- n*mean((RadMinmax1[,2]-1)^2)
+    MSE5.2 <- n*mean((RadMinmax2[,2]-1)^2)
+    empMSE <- rbind(empMSE, c(MSE1.2, MSE2.2, MSE3.2, MSE4.2, MSE5.2))
+    rownames(empMSE)[2] <- "n x empMSE (scale)"
+    empMSE <- rbind(empMSE, c(MSE1.1 + MSE1.2, MSE2.1 + MSE2.2, MSE3.1 + MSE3.2, 
+                              MSE4.1 + MSE4.2, MSE5.1 + MSE5.2))
+    rownames(empMSE)[3] <- "n x empMSE (loc + scale)"
+
+    empMSE
+}
+
+

Modified: pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R
===================================================================
--- pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R	2009-04-18 09:09:51 UTC (rev 284)
+++ pkg/RobLoxBioC/inst/scripts/AffymetrixExample.R	2009-04-18 15:03:14 UTC (rev 285)
@@ -93,7 +93,7 @@
 ###########################################################
 
 ## takes more than 90 min on Intel P9500 (64bit Linux, 4 GByte RAM)
-ns <- 5:20
+ns <- c(5:20, 69)
 M <- length(ns)
 minKD.norm <- matrix(NA, nrow = 50000, ncol = M)
 for(i in seq_len(M)){
@@ -129,7 +129,43 @@
 lines(1:length(ns), 1/(2*ns), col = "orange", lwd = 2)
 legend("topright", legend = "minimal possible distance", fill = "orange")
 
+#######################################
+## Figure 1 in Kohl and Deigner (2009)
+#######################################
+res1 <- split(as.vector(minKD.hgu95a$dist), as.vector(minKD.hgu95a$n))
+res2 <- split(as.vector(minKD.hgu133a$dist), as.vector(minKD.hgu133a$n))
+res3 <- lapply(as.data.frame(minKD.norm[,c(2:12,16,17)]), function(x) x)
+uni.n <- c(as.integer(names(res1)), as.integer(names(res2)), as.integer(names(res3)))
 
+postscript(file = "minKDAffy.eps", height = 6, width = 9, paper = "special", 
+           horizontal = TRUE)
+par(mar = c(4, 4, 3, 1))
+plot(0, 0, type = "n", ylim = c(0, 0.49), xlim = c(0.5, 37.5), 
+     panel.first = abline(h = seq(0, 0.45, by = 0.05), lty = 2, col = "grey"), 
+     main = "Minimum Kolmogorov distance", 
+     ylab = "minimum Kolmogorov distance", 
+     xlab = "sample size", axes = FALSE)
+axis(1, c(1:13, 15:23, 25:37), labels = uni.n, cex.axis = 0.6)
+axis(2, seq(0, 0.45, by = 0.05), labels = seq(0, 0.45, by = 0.05), las = 2,
+     cex.axis = 0.8)
+box()
+boxplot(c(res1, res2, res3), at = c(1:13, 15:23, 25:37), add = TRUE, pch = 20, 
+        names = FALSE, axes = FALSE)
+abline(v = c(14, 24), lwd = 1.5)
+text(c(7, 19, 31), rep(0.48, 3), labels = c("HGU95A", "HGU133A", "Normal Samples"),
+     font = 2)
+lines(1:13, 1/(2*uni.n[1:13]), lwd = 2)
+lines(15:23, 1/(2*uni.n[14:22]), lwd = 2)
+lines(25:37, 1/(2*uni.n[23:35]), lwd = 2)
+legend("bottomleft", legend = "minimal possible distance", lty = 1, 
+       bg = "white", cex = 0.8)
+dev.off()
+
+## Comparison of median distances
+abs(sapply(res1, median) - sapply(res3, median))
+abs(sapply(res2, median) - sapply(res3, median)[-c(1,2,4,7)])
+
+
 ###########################################################
 ## assessments for MAS 5.0 and RMA including dilution data from package affycomp
 ###########################################################



More information about the Robast-commits mailing list