[Robast-commits] r449 - in pkg/RobLox: . R inst man

Thu Jan 6 11:31:52 CET 2011

Author: stamats
Date: 2011-01-06 11:31:51 +0100 (Thu, 06 Jan 2011)
New Revision: 449

Added:
   pkg/RobLox/R/showdown.R
   pkg/RobLox/man/showdown.Rd
Modified:
   pkg/RobLox/DESCRIPTION
   pkg/RobLox/NAMESPACE
   pkg/RobLox/inst/NEWS
Log:
added new function 'showdown' to compare some estimator with our rmx estimator by some Monte-Carlo study. Had to add 'importFrom(distr, q)' to NAMESPACE to avoid error with function 'q' inside of 'showdown' - why?!

Modified: pkg/RobLox/DESCRIPTION
===================================================================

--- pkg/RobLox/DESCRIPTION	2011-01-06 09:41:34 UTC (rev 448)
+++ pkg/RobLox/DESCRIPTION	2011-01-06 10:31:51 UTC (rev 449)
@@ -1,11 +1,11 @@
 Package: RobLox
 Version: 0.8
-Date: 2010-12-03
+Date: 2011-01-06
 Title: Optimally robust influence curves and estimators for location and scale
 Description: Functions for the determination of optimally robust influence curves and
         estimators in case of normal location and/or scale
-Depends: R(>= 2.7.0), stats, distrMod(>= 2.0.1), RobAStBase(>= 0.1.1)
-Suggests: Biobase
+Depends: R(>= 2.7.0), stats, lattice, RColorBrewer, Biobase, distr, distrMod, RobAStBase
+Suggests: MASS
 Author: Matthias Kohl <Matthias.Kohl at stamats.de>
 Maintainer: Matthias Kohl <Matthias.Kohl at stamats.de>
 LazyLoad: yes

Modified: pkg/RobLox/NAMESPACE
===================================================================
--- pkg/RobLox/NAMESPACE	2011-01-06 09:41:34 UTC (rev 448)
+++ pkg/RobLox/NAMESPACE	2011-01-06 10:31:51 UTC (rev 449)
@@ -1,3 +1,5 @@
+importFrom(distr, q)
+
 export(finiteSampleCorrection,
        rlsOptIC.AL, 
        rlsOptIC.M, 
@@ -21,4 +23,5 @@
        rsOptIC,
        roblox,
        rowRoblox,
-       colRoblox)
+       colRoblox,
+       showdown)

Added: pkg/RobLox/R/showdown.R
===================================================================
--- pkg/RobLox/R/showdown.R	                        (rev 0)
+++ pkg/RobLox/R/showdown.R	2011-01-06 10:31:51 UTC (rev 449)
@@ -0,0 +1,122 @@
+###############################################################################
+## Function to perform simulation study comparing some estimator with
+## rmx estimators
+###############################################################################
+showdown <- function(n, M, eps, contD, seed = 123, estfun, estMean, estSd,
+                     eps.lower = 0, eps.upper = 0.05, steps = 3L, fsCor = TRUE, 
+                     plot1 = FALSE, plot2 = FALSE, plot3 = FALSE){
+    stopifnot(n >= 3)
+    stopifnot(eps >= 0, eps <= 0.5)
+    if(plot1){
+        from <- min(-6, q(contD)(1e-15))
+        to <- max(6, q(contD)(1-1e-15))
+        curve(pnorm, from = from, to = to, lwd = 2, n = 201, 
+              main = "Comparison: ideal vs. real", ylab = "cdf")
+        fun <- function(x) (1-eps)*pnorm(x) + eps*p(contD)(x)
+        curve(fun, from = from, to = to, add = TRUE, col = "orange", 
+              lwd = 2, n = 201, ylab = "cdf")
+        legend("topleft", legend = c("ideal", "real"), 
+              fill = c("black", "orange"))
+    }
+
+    set.seed(seed)
+    r <- rbinom(n*M, prob = eps, size = 1)
+    Mid <- rnorm(n*M)
+    Mcont <- r(contD)(n*M)
+    Mre <- matrix((1-r)*Mid + r*Mcont, ncol = n)
+    ind <- rowSums(matrix(r, ncol = n)) >= n/2
+    while(any(ind)){
+        M1 <- sum(ind)
+        cat("Samples to re-simulate:\t", M1, "\n")
+        r <- rbinom(n*M1, prob = eps, size = 1)
+        Mid <- rnorm(n*M1)
+        Mcont <- r(contD)(n*M1)
+        Mre[ind,] <- (1-r)*Mid + r*Mcont
+        ind[ind] <- rowSums(matrix(r, ncol = n)) >= n/2
+    }
+    rm(Mid, Mcont, r, ind)
+
+
+    if(plot2){
+        ind <- if(M <= 20) 1:M else sample(1:M, 20)
+        if(plot1) dev.new()
+        M1 <- min(M, 20)
+        print(
+          stripplot(rep(1:M1, each = n) ~ as.vector(Mre[ind,]), 
+                    ylab = "samples", xlab = "x", pch = 20,
+                    main = ifelse(M <= 20, "Samples", "20 randomly chosen samples"))
+        )
+    }
+
+    ## ML-estimator: mean and sd
+    Mean <- rowMeans(Mre)
+    Sd <- sqrt(rowMeans((Mre-Mean)^2))
+    ## Median and MAD
+    Median <- rowMedians(Mre)
+    Mad <- rowMedians(abs(Mre - Median))/qnorm(0.75)
+    ## Competitor
+    if(missing(estfun)){
+        Comp <- apply(Mre, 1, estMean)
+        Comp <- cbind(Comp, apply(Mre, 1, estSd))
+    }else
+        Comp <- t(apply(Mre, 1, estfun))
+
+    ## Radius-minimax estimator
+    RadMinmax <- estimate(rowRoblox(Mre, eps.lower = eps.lower, 
+                                    eps.upper = eps.upper, k = steps,
+                                    fsCor = fsCor))
+
+    if(plot3){
+        Ergebnis1 <- list(Mean, Median, Comp[,1], RadMinmax[,1])
+        Ergebnis2 <- list(Sd, Mad, Comp[,2], RadMinmax[,2])
+        myCol <- brewer.pal(4, "Dark2")
+        if(plot1 || plot2) dev.new()
+        layout(matrix(c(1, 1, 1, 1, 3, 2, 2, 2, 2, 3), ncol = 2))
+        boxplot(Ergebnis1, col = myCol, pch = 20, main = "Location")
+        abline(h = 0)
+        boxplot(Ergebnis2, col = myCol, pch = 20, main = "Scale")
+        abline(h = 1)
+        op <- par(mar = rep(2, 4))
+        plot(c(0,1), c(1, 0), type = "n", axes = FALSE)
+        legend("center", c("ML", "Med/MAD", "competitor", "rmx"),
+               fill = myCol, ncol = 4, cex = 1.5)
+        on.exit(par(op))
+    }
+
+    ## ML-estimator
+    MSE1.1 <- n*mean(Mean^2)
+    ## Median + MAD
+    MSE2.1 <- n*mean(Median^2)
+    ## Tukey
+    MSE3.1 <- n*mean(Comp[,1]^2)
+    ## Radius-minimax
+    MSE4.1 <- n*mean(RadMinmax[,1]^2)
+    empMSE <- data.frame(ML = MSE1.1, Med = MSE2.1, Competitor = MSE3.1, "rmx" = MSE4.1)
+    rownames(empMSE) <- "n x empMSE (loc)"
+    relMSE <- empMSE[1,]/empMSE[1,4]
+    empMSE <- rbind(empMSE, relMSE)
+    rownames(empMSE)[2] <- "relMSE (loc)"
+
+    ## ML-estimator
+    MSE1.2 <- n*mean((Sd-1)^2)
+    ## Median + MAD
+    MSE2.2 <- n*mean((Mad-1)^2)
+    ## Tukey
+    MSE3.2 <- n*mean((Comp[,2]-1)^2)
+    ## Radius-minimax
+    MSE4.2 <- n*mean((RadMinmax[,2]-1)^2)
+    empMSE <- rbind(empMSE, c(MSE1.2, MSE2.2, MSE3.2, MSE4.2))
+    rownames(empMSE)[3] <- "n x empMSE (scale)"
+    relMSE <- empMSE[3,]/empMSE[3,4]
+    empMSE <- rbind(empMSE, relMSE)
+    rownames(empMSE)[4] <- "relMSE (scale)"
+    empMSE <- rbind(empMSE, c(MSE1.1 + MSE1.2, MSE2.1 + MSE2.2, MSE3.1 + MSE3.2, 
+                              MSE4.1 + MSE4.2))
+    rownames(empMSE)[5] <- "n x empMSE (loc + scale)"
+    relMSE <- empMSE[5,]/empMSE[5,4]
+    empMSE <- rbind(empMSE, relMSE)
+    rownames(empMSE)[6] <- "relMSE (loc + scale)"
+
+    empMSE
+}
+

Modified: pkg/RobLox/inst/NEWS
===================================================================
--- pkg/RobLox/inst/NEWS	2011-01-06 09:41:34 UTC (rev 448)
+++ pkg/RobLox/inst/NEWS	2011-01-06 10:31:51 UTC (rev 449)
@@ -11,6 +11,10 @@
 version 0.8
 #######################################
 
++ added function showdown to compare some estimator for normal location and
+  scale with our rmx estimators. The comparison is based on Monte-Carlo
+  simulation study.
+
 BUG FIX:
 + roblox(): if only sd or mean is to be estimated, starting 
   value mean.sd was not; similarly, in case only mean is of 

Added: pkg/RobLox/man/showdown.Rd
===================================================================
--- pkg/RobLox/man/showdown.Rd	                        (rev 0)
+++ pkg/RobLox/man/showdown.Rd	2011-01-06 10:31:51 UTC (rev 449)
@@ -0,0 +1,78 @@
+\name{showdown}
+\Rdversion{1.1}
+\alias{showdown}
+\title{Estimator Showdown by Monte-Carlo Study.}
+\description{
+  The function \code{showdown} can be used to perform Monte-Carlo studies 
+  comparing a competitor with rmx estimators in case of normal location and scale. 
+  In addition, maximum likelihood (ML) estimators (mean and sd) and median and 
+  MAD are computed. The comparison is based on the empirical MSE.
+}
+\usage{
+showdown(n, M, eps, contD, seed = 123, estfun, estMean, estSd,
+         eps.lower = 0, eps.upper = 0.05, steps = 3L, fsCor = TRUE, 
+         plot1 = FALSE, plot2 = FALSE, plot3 = FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{n}{integer; sample size, should be at least 3.}
+  \item{M}{integer; Monte-Carlo replications.}
+  \item{eps}{amount of contamination in [0, 0.5].}
+  \item{contD}{object of class \code{"UnivariateDistribution"}; contaminating distribution.}
+  \item{seed}{random seed.}
+  \item{estfun}{function to compute location and scale estimator; see details below.}
+  \item{estMean}{function to compute location estimator; see details below.}
+  \item{estSd}{function to compute scale estimator; see details below.}
+  \item{eps.lower}{used by rmx estimator.}
+  \item{eps.upper}{used by rmx estimator.}
+  \item{steps}{integer; steps used for estimator construction.}
+  \item{fsCor}{logical; use finite-sample correction.}
+  \item{plot1}{logical; plot cdf of ideal and real distribution.}
+  \item{plot2}{logical; plot 20 (or M if M < 20) randomly selected samples.}
+  \item{plot3}{logical; generate boxplots of the results.}
+}
+\details{
+Normal location and scale with mean = 0 and sd = 1 is used as ideal model (without
+restriction due to equivariance).
+
+Since there is no estimator which yields reliable results if 50 percent or more of the
+observations are contaminated, we use a modification where we re-simulate all samples
+including at least 50 percent contaminated data.
+
+If \code{estfun} is specified it has to compute and return a location and scale estimate
+(vector of length 2). One can also specify the location and scale estimator separately 
+by using \code{estMean} and \code{estSd} where \code{estMean} computes and returns
+the location estimate and \code{estSd} the scale estimate.
+
+We use funtion \code{\link{rowRoblox}} for the computation of the rmx estimator.
+}
+\value{Data.frame including empirical MSE (standardized by sample size n) and
+relMSE with respect to the rmx estimator.
+}
+\references{
+  Kohl, M. (2005) \emph{Numerical Contributions to the Asymptotic Theory of Robustness}. 
+  Bayreuth: Dissertation.
+
+  Rieder, H. (1994) \emph{Robust Asymptotic Statistics}. New York: Springer.
+
+  Rieder, H., Kohl, M. and Ruckdeschel, P. (2008) The Costs of not Knowing
+  the Radius. Statistical Methods and Applications \emph{17}(1) 13-40.
+  Extended version: \url{http://www.stamats.de/RRlong.pdf}
+}
+\author{Matthias Kohl \email{Matthias.Kohl at stamats.de}}
+%\note{}
+\seealso{\code{\link{rowRoblox}}}
+\examples{
+library(MASS)
+## compare with Huber's Proposal 2
+showdown(n = 20, M = 100, eps = 0.02, contD = Norm(mean = 3, sd = 3), 
+         estfun = function(x){ unlist(hubers(x)) },
+         plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+
+## compare with Huber M estimator with MAD scale
+showdown(n = 20, M = 100, eps = 0.02, contD = Norm(mean = 3, sd = 3), 
+         estfun = function(x){ unlist(huber(x)) },
+         plot1 = TRUE, plot2 = TRUE, plot3 = TRUE)
+}
+\concept{Monte-Carlo study}
+\keyword{robust}