[Georob-commits] r5 - in pkg: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu May 23 17:41:57 CEST 2013


Author: papritz
Date: 2013-05-23 17:41:57 +0200 (Thu, 23 May 2013)
New Revision: 5

Added:
   pkg/R/georob.cv.R
Removed:
   pkg/R/georob.xvalid.R
Modified:
   pkg/ChangeLog
   pkg/R/georob.S3methods.R
   pkg/R/georob.exported.functions.R
   pkg/R/georob.predict.R
   pkg/R/variogram.R
   pkg/man/S3methods.georob.Rd
   pkg/man/georob.Rd
Log:
korrekturen fuer korrektes handling von fehlwerten


Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog	2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/ChangeLog	2013-05-23 15:41:57 UTC (rev 5)
@@ -1,6 +1,6 @@
 2012-12-18  Andreas Papritz  <papritz at env.ethz.ch>
 
-* georob.xvalid.R (print.cv.georob, print.summary.cv.georob): return invisible(x)
+* georob.cv.R (print.cv.georob, print.summary.cv.georob): return invisible(x)
 * georob.S3methods.R (print.georob, print.summary.georob): return invisible(x)
 * variogram.R (print.summary.sample.variogram, print.fitted.variogram)
 (print.summary.fitted.variogram): return invisible(x)
@@ -42,7 +42,7 @@
 * georob.predict.R (predict.georob): new names for robustness weights
 * georob.private.functions.R (compute.covariances, update.betahat.bhat, estimate.betahat.bhat, compute.estimating equations, georob.fit): new names for robustness weights
 * georob.S3methods.R (ranef.georob, rstandard.georob, summary.georob, print.summary.georob): new names for robustness weights
-* georob.xvalid.R (cv.georob): changes for parallelization on windows os
+* georob.cv.R (cv.georob): changes for parallelization on windows os
 
 
 2013-04-29  Andreas Papritz  <papritz at env.ethz.ch>
@@ -62,3 +62,14 @@
 
 
 
+2013-05-23  Andreas Papritz  <papritz at env.ethz.ch>
+
+* georob.cv.R (cv.georob): correct handling of missing observations
+* georob.exported.functions.R (georob): improved way to handle missing observations and to construct model.frame
+* georob.predict.R (predict.georob): correct handling of missing observations
+* georob.S3methods.R (georob.residuals): new argument "terms"
+* georob.S3methods.R (ranef.georob, rstandard.georob,deviance.georob): correct handling of missing observations
+* variogram.R (plot.georob): correct handling of missing observations
+
+
+

Modified: pkg/R/georob.S3methods.R
===================================================================
--- pkg/R/georob.S3methods.R	2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/R/georob.S3methods.R	2013-05-23 15:41:57 UTC (rev 5)
@@ -169,6 +169,7 @@
   ## 2012-10-18 AP changes for new definition of eta
   ## 2012-11-26 AP method for random.effects
   ## 2013-04-23 AP new names for robustness weights
+  ## 2013-05-23 AP correct handling of missing observations
   
   object$Valpha.objects <- expand( object$Valpha.objects )
   object$cov       <- expand( object$cov )
@@ -247,7 +248,8 @@
     
   }
   
-  return( bhat )        
+  bhat <- naresid( object$na.action, bhat )
+  return( bhat )
   
 }
 
@@ -282,6 +284,7 @@
   function( 
     object,
     type = c("working", "response", "deviance", "pearson", "partial" ), 
+    terms = NULL,
     level = 1,
     ...
   )
@@ -300,6 +303,7 @@
   
   ## 2011-10-13 A. Papritz    
   ## 2011-12-14 AP modified for replicated observations
+  ## 2013-05-23 AP modified for computing partial residuals for single terms
   
   type <- match.arg( type )
   
@@ -315,14 +319,15 @@
     partial = r
   )
   
+  res <- naresid(object$na.action, res)
+    
   if( level == 0 && any( type %in% c( "working", "response", "partial" ) ) ){
     res <- res + ranef( object, standard = FALSE )[object$Tmat]
   }
   
-  res <- naresid(object$na.action, res)
   if( type == "partial" ) 
-    res <- res + predict( object, type = "terms" )$fit
-  res
+    res <- res + predict( object, type = "terms", terms = terms )$fit
+  drop( res )
 }
 
 
@@ -347,6 +352,7 @@
   ## 2012-01-05 AP modified for compress storage of matrices
   ## 2012-10-18 AP changes for new definition of eta
   ## 2013-04-23 AP new names for robustness weights
+  ## 2013-05-23 AP correct handling of missing observations
   
   object <- model
   object$Valpha.objects <- expand( object$Valpha.objects )
@@ -426,6 +432,8 @@
   
   ## compute standardized residuals
   
+  se <- naresid( model$na.action, se )
+  
   residuals( model, level = level ) / se
   
 }
@@ -867,7 +875,14 @@
   ## deviance method for class georob
   
   ## 2012-12-22 A. Papritz
+  ## 2013-05-23 AP correct handling of missing observations
   
+  ## redefine na.action component of object
+  
+  if( identical( class( object$na.action ), "exclude" ) ){
+    class( object$na.action ) <- "omit"
+  }
+  
   if( object[["tuning.psi"]] < georob.control()[["tuning.psi.nr"]] ){
     result <- NA_real_
   } else {

Added: pkg/R/georob.cv.R
===================================================================
--- pkg/R/georob.cv.R	                        (rev 0)
+++ pkg/R/georob.cv.R	2013-05-23 15:41:57 UTC (rev 5)
@@ -0,0 +1,929 @@
+##  ############################################################################
+
+cv <- function( object, ... ) UseMethod( "cv" )
+
+##  ############################################################################
+
+cv.georob <-
+  function(
+    object,
+    formula = NULL, subset = NULL,
+    nset = 10, seed = NULL, sets = NULL,
+    duplicates.in.same.set = TRUE,
+    re.estimate = TRUE, param = object$param, 
+    fit.param = object$initial.objects$fit.param,
+    return.fit = FALSE, reduced.output = TRUE,
+    lgn = FALSE,
+    ncores = min( nset, detectCores() ),
+    verbose = 0,
+    ...
+  )
+{
+  
+  ## Function computes nset-fold cross-validation predictions from a
+  ## fitted georob object
+  
+  ## Arguments:
+  
+  ## object        fitted georob object
+  ## formula       a formula passed by update to georob
+  ## nset          integer scalar for the number of cross-validation subsets
+  ## seed integer scalar passed to set.seed before selecting the 
+  ##               cross-valdation subsets by a call to runif()
+  ## sets          an integer vector with length nrow(data) defining the 
+  ##               cross-validation sets and over-riding the values provided 
+  ##               for nset and seed
+  ## duplicates.in.same.set   logical flag controlling whether replicated observations
+  ##               at a given location are assigned to the same cross-validation set
+  ## re.estimate   logical flag controlling whether the variogram parameters should 
+  ##               be re-estimated for each cross-validation subset
+  ## param         initial values of variogram parameters when the variogram is 
+  ##               re-estimated for each cross-validation subset
+  ## return.fit    logical flag to control whether the information about the fit are
+  ##               should be returned for each cross-valdiation subset when re-estimating the
+  ##               model
+  ## reduced.output    logical flag controlling whether for each cross-valdiation subset the
+  ##               the full fitted object or just a selection (information about convergence,
+  ##               variogram and fixed-effects parameter estimates) should be returned when 
+  ##               re-estimating the model
+  ## lgn   logical flag controlling whether lognormal kriging predictions should be computed 
+  ## ncores        integer scalar with the number of cores to used in parallel processing
+  ## verbose       integer scalar, controlling verbosity of the information sent to standard output
+  ## ...           further arguments passed by update to georob or to 
+  ##               mclapply on non-windows platforms
+  
+  ## ToDos:
+  
+  ## - Klasse und Methoden definieren fuer cv (kompatibel mit geoR)
+  
+  ## History:
+  
+  ## 2011-10-24 Korrektur Ausschluss von nichtbenoetigten Variablen fuer lognormal kriging
+  ## 2011-12-23 AP modified for replicated observations and for parallel computing
+  ## 2012-03-02 AP eliminated possibility for logging to file in parallel processing
+  ## 2012-03-19 AP correction of error in parallel processing on Windows
+  ## 2012-05-01 AP correct handling of NAs
+  ## 2012-05-04 AP modifications for lognormal block kriging
+  ## 2012-05-09 AP correction of error if a new formula is passed via update to georob
+  ## 2012-05-22 AP correction of error in passing param and fit.param to georob
+  ## 2012-06-05 AP correction of error in handling optional subset argument
+  ## 2012-11-04 AP handling compressed cov.betahat
+  ## 2012-12-04 AP modifiction for changes in predict.georob
+  ## 2013-04-24 AP changes for parallelization on windows os
+  ## 2013-05-23 AP correct handling of missing observations
+    
+  ## auxiliary function that fits the model and computes the predictions of
+  ## a cross-validation set
+  
+  f.aux <- function( 
+    ..i.., object, formula, data, sets, re.estimate, param, fit.param, lgn, verbose, ...
+  ){  ## cv function
+    
+    if (verbose) cat( "\n\n  processing cross-validation set", ..i.., "\n" ) 
+    
+    ## fit model to complement of current set
+    
+    if( !re.estimate ){
+      fit.param <- c( 
+        variance = FALSE, snugget = FALSE, nugget = FALSE, scale = FALSE, 
+        a = FALSE, alpha = FALSE, beta = FALSE, delta = FALSE, 
+        gamma = FALSE, lambda = FALSE, n = FALSE, nu = FALSE,
+        f1 = FALSE, f2  =FALSE, omega = FALSE, phi = FALSE, zeta = FALSE      
+      )[names( param )]
+    }
+    
+    ## change environment of terms and formula so that subset selection works for update            
+    
+    environment( formula ) <- environment()
+    environment( object$terms ) <- environment()
+
+    t.georob <- update( 
+      object, 
+      formula = formula,
+      data = data,
+      subset = -sets[[..i..]] ,
+      param = param,
+      fit.param = fit.param,
+      verbose = verbose,
+      ...
+    )
+    
+    if( verbose > 0 ){
+      cat( "\n\n" )
+      print( summary( t.georob ) )
+    }
+    
+    ## compute predictions for current set
+    
+    t.predict <- predict( 
+      t.georob, newdata = data[sets[[..i..]], ], type = "response",
+      mmax = length( sets[[..i..]] ),
+      extended.output = lgn,
+      ncores = 1
+    )
+    
+    ## backtransformation for log-normal kriging
+    
+    if( lgn ){
+      t.predict <- lgnpp( t.predict )
+      t.predict <- t.predict[, -match( 
+        c( "trend", "var.pred", "cov.pred.target", "var.target" ), names( t.predict ) 
+      )]
+    }
+    
+    t.predict <- data.frame( i = sets[[..i..]], t.predict )
+    
+    t.ex <- c( 
+      grep( "lower", colnames( t.predict ), fixed = TRUE ),
+      grep( "upper", colnames( t.predict ), fixed = TRUE )
+    )
+    
+    t.predict <- t.predict[, -t.ex]
+    
+    if( reduced.output ){
+      
+      if( !is.null( t.georob$cov$cov.betahat ) ){
+        t.se.coef <- sqrt( diag( expand( t.georob$cov$cov.betahat ) ) )
+      } else {
+        t.se.coef <- NULL
+      }
+      
+      t.georob <- t.georob[c(  
+        "tuning.psi", "converged", "convergence.code",
+        "gradient", "param", "aniso",
+        "coefficients"
+      )]
+      
+      t.georob$aniso <- t.georob$aniso$aniso
+      
+      if( !is.null( t.se.coef ) ) t.georob$se.coefficients <- t.se.coef
+      
+    }
+    
+    return( list( pred = t.predict, fit = t.georob ) )
+    ## end cv function
+  }
+  
+  ## redefine na.action component of object
+  
+  if( identical( class( object$na.action ), "exclude" ) ){
+    class( object$na.action ) <- "omit"
+  }
+  
+  ## update terms of object is formula is provided
+  
+  if( !is.null( formula ) ){
+    formula <- update( formula( object ), formula )
+    object$terms <- terms( formula )
+  } else {
+    formula <- formula( object )
+  }
+    
+  ## get data.frame with required variables (note that the data.frame passed
+  ## as data argument to georob must exist in GlobalEnv)
+  
+  data <- cbind(
+    get_all_vars( formula( object ), eval( getCall(object)$data ) ),
+    get_all_vars( object$locations.objects$locations, eval( getCall(object)$data ) )
+  )
+  
+  ## select subset if appropriate
+  
+  if( !is.null( subset ) ){
+    data <- data[subset, ]
+    object$Tmat <- object$Tmat[subset]
+  } else if( !is.null( getCall(object)$subset ) ){
+   data <- data[eval( getCall(object)$subset ), ]
+  }
+  
+#   if( !is.null( getCall(object)$subset ) )
+   
+  ## define cross-validation sets
+  
+  if( is.null( sets ) ){
+    
+    if( !is.null( seed ) ) set.seed( seed )
+    sets <- runif( NROW( data ) )
+    sets <- cut( 
+      sets, 
+      breaks = c( -0.1, quantile( sets, probs = ( 1:(nset-1)/nset ) ), 1.1 )
+    )
+    sets <- factor( as.integer( sets ) )
+
+  } else {
+    
+    if( length( sets ) != NROW( data ) ) stop(
+      "sets must be an integer vector with length equal to the number of observations"      
+    )
+    
+  }
+  
+  if( duplicates.in.same.set ){
+    dups <- duplicated( object$Tmat )
+    idups <- match( object$Tmat[dups], object$Tmat[!dups] )
+    sets[dups] <- (sets[!dups])[idups]
+  }
+  
+  sets <- tapply(
+    1:NROW( data ),
+    sets,
+    function( x ) x
+  )
+  
+  ## loop over all cross-validation sets
+  
+  if( .Platform$OS.type == "windows" ){
+    
+    ## create a SNOW cluster on windows OS
+    
+    cl <- makePSOCKcluster( ncores, outfile = "")
+    
+    ## export required items to workers
+    
+    junk <- clusterEvalQ( cl, require( georob, quietly = TRUE ) )
+    
+    t.result <- parLapply(
+      cl, 
+      1:length( sets ),
+      f.aux, 
+      object = object,
+      formula = formula,
+      data = data,
+      sets = sets,
+      re.estimate = re.estimate,
+      param = param,
+      fit.param = fit.param,
+      lgn = lgn,
+      verbose = verbose,
+      ...
+    )
+    
+    stopCluster(cl)
+    
+  } else {
+        
+    ## fork child processes on non-windows OS
+    
+    t.result <- mclapply(
+      1:length( sets ),
+      f.aux, 
+      object = object,
+      formula = formula,
+      data = data,
+      sets = sets,
+      re.estimate = re.estimate,
+      param = param,
+      fit.param = fit.param,
+      lgn = lgn,
+      verbose = verbose,
+      mc.cores = ncores,
+      mc.allow.recursive = FALSE,
+      ...
+    )
+    
+  }
+    
+  ## create single data frame with cross-validation results 
+  
+  result <- t.result[[1]]$pred
+  result$subset <- rep( 1, nrow( t.result[[1]]$pred ) )
+  
+  for( t.i in 2:length( t.result ) ) {
+    result <- rbind( 
+      result, 
+      data.frame( 
+        t.result[[t.i]]$pred,
+        subset = rep( t.i, nrow( t.result[[t.i]]$pred ) )
+      )
+    )
+  }
+  t.ix <- sort( result$i, index.return = T )$ix
+  result <- result[t.ix, ]
+  result$data <- model.response( 
+    model.frame( formula( object), data, na.action = na.pass ) 
+  )
+  
+  if( lgn ) result$lgn.data <- exp( result$data )
+  
+  result <- result[, -match("i", colnames( result) )]
+  
+  isubset <- match( "subset", colnames( result ) )
+  idata <- grep( "data", colnames( result ), fixed = TRUE )
+  ipred<-  grep( "pred", colnames( result ), fixed = TRUE )
+  ise <-   grep( "se", colnames( result ), fixed = TRUE )
+  ise <- ise[ise != isubset]
+  
+  result <- cbind(
+    result[, -c(isubset, idata, ipred, ise)],
+    result[,  c(isubset, idata, ipred, ise)]
+  )
+  
+  t.fit <- lapply( t.result, function( x ) return( x$fit ) )
+  
+  if( re.estimate && !all( sapply( t.fit, function(x) x$converged ) ) )
+  warning(
+    "lack of covergence when fitting model to cross-validation sets"
+  )
+  
+  result <- list( 
+    pred = result, 
+    fit = if( return.fit ) t.fit else NULL
+  )
+  
+  class( result ) <- "cv.georob"
+  
+  invisible( result )
+  
+}
+
+##  ###########################################################################
+
+plot.cv.georob <-
+  function( 
+    x, type = c( "sc", "lgn.sc", "ta", "qq", "pit", "mc", "bs" ), 
+    ncutoff = NULL, 
+    add = FALSE, 
+    col, pch, lty,
+    main, xlab, ylab, 
+    ... 
+  )
+{
+  
+  ## plot method for class "cv.georob"  
+  
+  ## 2011-12-21 A. Papritz
+  
+  x <- x$pred
+  
+  type = match.arg( type )
+  
+  if( type == "sc.lgn" && !"lgn.pred" %in% names( x ) ) stop(
+    "lognormal kriging results missing, use 'lgn = TRUE' for cross-validation"
+  )
+  
+  if( type %in% c( "pit", "mc", "bs" ) ){
+    
+    result <- validate.predictions( 
+      data = x$data,
+      pred = x$pred,
+      se.pred = x$se,
+      statistic = type, ncutoff = ncutoff
+    )
+    
+  }
+  
+  if( missing( col ) ) col <- 1
+  if( missing( pch ) ) pch <- 1
+  if( missing( lty ) ) lty <- 1
+
+  
+    
+  switch(
+    type,
+    sc = {
+      
+      ##  scatterplot of (transformed) measurements vs. predictions
+      
+      if( missing( main ) ) main <- "data vs. predictions"
+      if( missing( xlab ) ) xlab <- "predictions"
+      if( missing( ylab ) ) ylab <- "data"
+      
+      if( add ){
+        points( data ~ pred, x, col = col, pch = pch, ... )
+      } else {        
+        plot( 
+          data ~ pred, x, col = col, pch = pch, 
+          main = main, xlab = xlab, ylab = ylab, ... 
+        )
+      }
+      
+      
+    },        
+    lgn.sc = {
+      
+      ##  scatterplot of original measurements vs.  back-transformded
+      ##  lognormal predictions
+      
+      if( missing( main ) ) main <- "data vs. back-transformed predictions"
+      if( missing( xlab ) ) xlab <- "back-transformed predictions"
+      if( missing( ylab ) ) ylab <- "data"
+      
+      if( add ){
+        points( lgn.data ~ lgn.pred, x, col = col, pch = pch, ... )
+      } else {        
+        plot( 
+          lgn.data ~ lgn.pred, x, col = col, pch = pch, 
+          main = main, xlab = xlab, ylab = ylab, ... 
+        )
+      }
+      
+      
+      
+    }, 
+    ta = {
+      
+      ##  Tukey-Anscombe plot
+      
+      if( missing( main ) ) main <- "Tukey-Anscombe plot"
+      if( missing( xlab ) ) xlab <- "predictions"
+      if( missing( ylab ) ) ylab <- "standardized prediction errors"
+      
+      if( add ){
+        points( I((data-pred)/se) ~ pred, x, col = col, pch = pch, ... )
+      } else {        
+        plot( 
+          I((data-pred)/se) ~ pred, x, col = col, pch = pch, 
+          main = main, xlab = xlab, ylab = ylab, ... 
+        )
+      }
+      
+    },
+    qq = {
+      
+      ##  normal QQ-Plot of standardized prediction errors
+      
+      if( missing( main ) ) main <- "normal-QQ-plot of standardized prediction errors"
+      if( missing( xlab ) ) xlab <- "quantile N(0,1)"
+      if( missing( ylab ) ) ylab <- "quantiles of standardized prediction errors"
+      
+      r.qq <- with( x, qqnorm( ( data - pred ) / se, plot.it = FALSE ) )
+      
+      if( add ){
+        points( r.qq, col = col, pch = pch, ... )
+      } else {
+        plot( r.qq, col = col, pch = pch, main = main, xlab = xlab, ylab = ylab, ... )
+      }
+    },
+    pit = {
+      
+      ##  histogramm of probability-integral-transformation
+      
+      if( missing( main ) ) main <- "histogramm PIT-values"
+      if( missing( xlab ) ) xlab <- "PIT"
+      if( missing( ylab ) ) ylab <- "density"
+      
+      r.hist <- hist( 
+        result, 
+        col = col, lty = lty, 
+        main = main, xlab = xlab, ylab = ylab, freq = FALSE, ... )
+    },
+    mc = {
+      
+      ##  narginal calibration plots: ecdf of measurements and mean
+      ##  predictive cdf
+      
+      if( missing( main ) ) main <- "empirical cdf of data and mean predictive cdfs"
+      if( missing( xlab ) ) xlab <- "data or predicitons"
+      if( missing( ylab ) ) ylab <- "probability"
+      
+      matplot( 
+        result$y, 
+        result[, c( "ghat", "fbar" )], type = "l",
+        col = c( "black", "red" ),
+        lty = c( "solid", "dashed" ),
+        main = main, xlab = xlab, ylab = ylab,
+        ...
+      )
+      
+      t.usr <- par( "usr" )
+      t.usr[3:4] <- with( result, range( fbar - ghat ) ) *1.04
+      par( usr = t.usr )
+      with( result, lines( y, fbar-ghat, col= "blue", lty = "dotted" ) )
+      axis(2, pos = t.usr[2], col.axis = "blue", col.ticks = "blue" )
+      legend( 
+        "topleft",
+        lty = c("solid", "dashed", "dotted" ), 
+        col = c( "black", "red", "blue" ), bty = "n", cex = 1,
+        legend = c(
+          expression( paste( "empirical cdf ", hat(G) ) ),
+          expression( paste( "mean predictive cdf ", bar(F) ) ),
+          expression( bar(F)-hat(G) )
+        )
+      )
+    },
+    bs  ={
+      
+      # plot of brier score vs. cutoff
+      
+      if( missing( main ) ) main <- "Brier score vs. cutoff"
+      if( missing( xlab ) ) xlab <- "cutoff"
+      if( missing( ylab ) ) ylab <- "Brier score"
+      
+      if( add ){
+        lines( result$y, result$bs, col = col, lty = lty, ... )
+      } else {
+        plot( result$y, result$bs, type = "l", col = col, lty = lty, 
+          main = main, xlab = xlab, ylab = ylab, ...
+        )
+      }
+    }
+  )
+  invisible( NULL )
+}
+
+  
+
+##  ###########################################################################
+
+print.cv.georob <-
+  function( 
+    x, digits = max(3, getOption("digits") - 3), ...
+  )
+{   ## print method for class "cv.georob"  
+  
+  ## 2011-10-13 A. Papritz
+  ## 2012-12-18 AP invisible(x)
+  
+  x <- x$pred
+  
+  st <- validate.predictions( 
+    data = x$data,
+    pred = x$pred,
+    se.pred = x$se,
+    statistic = "st",
+    ...
+  )
+  
+  print(
+    format( st, digits = digits ), print.gap = 2, 
+    quote = FALSE
+  )
+  
+  invisible( x )
+}
+
+
+##  ###########################################################################
+
+summary.cv.georob <-
+  function( object, se = FALSE, ... )
+{
+  
+  ## summary method for class "cv.georob" 
+  
+  ## function computes statistics of the cross-validation errors
+  
+  ## 2011-10-13 A. Papritz
+  ## 2012-05-21 ap
+  
+  
+  object <- object$pred
+  
+  bs <- validate.predictions( 
+    data = object$data,
+    pred = object$pred,
+    se.pred = object$se,
+    ncutoff = length( object$data ),
+    statistic = "bs"
+  )
+  
+  t.d <- diff( bs$y )
+  crps <- sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) )
+  
+  st <- validate.predictions( 
+    data = object$data,
+    pred = object$pred,
+    se.pred = object$se,
+    statistic = "st"
+  )
+  
+  if( !is.null( object$lgn.pred ) ){
+    st.lgn <- validate.predictions( 
+      data = object$lgn.data,
+      pred = object$lgn.pred,
+      se.pred = object$lgn.se,
+      statistic = "st"
+    )
+  } else {
+    st.lgn <- NULL
+  }
+  
+  ## collect results
+  
+  result <- list( st = st, crps = crps, st.lgn = st.lgn )
+
+  ## compute standard errors of criteria across cross-validation sets
+  
+  if( se && !is.null( object$subset ) ){
+    
+    criteria <- t( sapply(
+        tapply(
+          1:nrow( object ),
+          factor( object$subset ),
+          function( i, data, pred, se.pred, lgn.data, lgn.pred, lgn.se.pred ){
+            
+            bs <- validate.predictions( 
+              data = data[i],
+              pred = pred[i],
+              se.pred = se.pred[i],
+              ncutoff = length( data[i] ),
+              statistic = "bs"
+            )
+            
+            t.d <- diff( bs$y )
+            crps <- c( crps = sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) ) )
+            
+            st <- validate.predictions( 
+              data = data[i],
+              pred = pred[i],
+              se.pred = se.pred[i],
+              statistic = "st"
+            )
+            
+            if( !is.null( lgn.pred ) ){
+              st.lgn <- validate.predictions( 
+                data = lgn.data[i],
+                pred = lgn.pred[i],
+                se.pred = lgn.se.pred[i],
+                statistic = "st"
+              )
+              names( st.lgn ) <- paste( names( st.lgn ), "lgn", sep = "." )
+            } else {
+              st.lgn <- NULL
+            }
+            
+            
+            return( c( st, st.lgn, crps ) )
+            
+          },
+          data = object$data,
+          pred = object$pred,
+          se.pred = object$se,
+          lgn.data = object$lgn.data,
+          lgn.pred = object$lgn.pred,
+          lgn.se.pred = object$lgn.se
+        ),
+        function( x ) x
+      ))
+    
+    se.criteria <- apply(
+      criteria, 2,
+      function( x ) sd( x ) / sqrt( length( x ) )
+    )
+    
+    result$se.st <- se.criteria[c( "me", "mede", "rmse", "made", "qne", "msse", "medsse")]
+    result$se.crps <- se.criteria["crps"]
+    if( !is.null( st.lgn ) ){
+      result$se.st.lgn <- se.criteria[
+        c( "me.lgn", "mede.lgn", "rmse.lgn", "made.lgn", "qne.lgn", "msse.lgn", "medsse.lgn")
+      ]
+      names( result$se.st.lgn ) <- gsub( ".lgn", "", names( result$se.st.lgn ) )
+    }
+    
+  }
+  
+  class( result ) <- "summary.cv.georob"
+ 
+  
+  return( result )
+  
+}
+
+##  ###########################################################################
+
+print.summary.cv.georob <-
+  function( 
+    x, digits = max(3, getOption("digits") - 3), ...
+  )
+{
+  
+  ## print method for class "summary.cv.georob"  
+  
+  ## 2011-12-20 A. Papritz
+  ## 2012-05-21 ap
+  ## 2012-12-18 AP invisible(x)
+  
+
+  result <- c( x$st, crps = x$crps )
+  if( !is.null( x$se.st ) ){
+    result <- rbind( result, c( x$se.st, crps = x$se.crps ) )
+    rownames( result ) <- c( "", "se" )
+  }
+  
+  cat( "\nStatistics of cross-validation prediction errors\n" )
+  print(
+    format( result, digits = digits ), print.gap = 2, 
+    quote = FALSE
+  )
+  
+  if( !is.null( x$st.lgn ) ){
+    result <- x$st.lgn
+    if( !is.null( x$se.st.lgn ) ){
+      result <- rbind( x$st.lgn, x$se.st.lgn )
+      rownames( result ) <- c( "", "se" )
+    }
+    
+    cat( "\nStatistics of back-transformed cross-validation prediction errors\n" )
+    print(
+      format( result, digits = digits ), print.gap = 2, 
+      quote = FALSE
+    )
+  }
+  
+  invisible( x )
+  
+}
+
+##  ###########################################################################
+
+rstudent.cv.georob <-
+  function( model, ... )
+{
+  
+  ## Function extracts studentized residuals from cv.georob object
+  
+  ## Arguments:
+  
+  ## model     cv.georob object
+  ## ...       further arguments (currently not used)
+  
+  ## 2011-10-13 A. Papritz
+  
+  if( !identical( class( model )[1], "cv.georob" ) ) stop(
+    "model is not of class 'cv.georob'" 
+  )
+  
+  model <- model$pred
+  
+  ( model$data - model$pred ) / model$se
+  
+}
+
+# ##  ###########################################################################
+# 
+# cv.variomodel <-
+#   function( object, geodata, ... )
+# {
+#   
+#   ## Wrapper function for cross-validation of object of class variomodel{geoR}
+#   ## by function xvalid{geoR}
+#   
+#   ## Arguments:
+#   
+#   ## model     an object of class "variomodel{geoR}
+#   ## ...       further arguements passed to xvalid{geoR), cf. respective help page
+#   
+#   ## 2012-11-22 A. Papritz
+#   
+#   call.fc <- match.call()
+# 
+#   res <- geoR::xvalid( model = object, ... )
+#   
+#   if( !is.null( attr( res, "geodata.xvalid" ) ) ){
+#     attr( res, "geodata.xvalid" ) <- call.fc$geodata
+#   }
+#   if( !is.null( attr( res, "locations.xvalid" ) ) ){
+#     attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid
+#   }
+#   
+#   return(res)
+#   
+# }
+# 
+# cv.likGRF <-
+#   function( object, geodata, ... )
+# {
+#   
+#   ## Wrapper function for cross-validation of object of class variomodel{geoR}
+#   ## by function xvalid{geoR}
+#   
+#   ## Arguments:
+#   
+#   ## model     an object of class "likGRF{geoR}
+#   ## ...       further arguements passed to xvalid{geoR), cf. respective help page
+#   
+#   ## 2012-11-22 A. Papritz
+#   
+#   call.fc <- match.call()
+# 
+#   res <- geoR::xvalid( model = object, geodata = geodata, ... )
+#   
+#   if( !is.null( attr( res, "geodata.xvalid" ) ) ){
+#     attr( res, "geodata.xvalid" ) <- call.fc$geodata
+#   }
+#   if( !is.null( attr( res, "locations.xvalid" ) ) ){
+#     attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid
+#   }
+#   
+#   return(res)
+#   
+# }
+
+## ======================================================================
+validate.predictions <- 
+  function( 
+    data,
+    pred,
+    se.pred,
+    statistic = c( "pit", "mc", "bs", "st" ),	
+    ncutoff = NULL
+  )
+{
+  
+  ## function computes several statistics to validate probabilistic
+  ## predictions, cf.  Gneiting et al., 2007, JRSSB
+  
+  ## Arguments:
+  
+  ## data					numeric vector with data
+  ## pred					numeric vector with predictions
+  ## se.pred			numeric vector with prediction standard errors
+  ## statistic					character scalar specifying  the statistic to compute
+  ##							possible values are:
+  ##							"pit"			probability integral transformation
+  ##							"mc"			empirical cdf of data and mean predictive 
+  ##												normal distribution
+  ##							"bs"			Brier score
+  ##							"st"			basic statistics such as mean error, mean
+  ##												mean squared error, mean squared standardized error
+  ##												and robust equivalents
+  ## ncutoff			number of quantiles (statistic == "mc") or number of cutoffs (statistic = "bs")
+  
+  # 2011-20-21 A. Papritz
+  # 2012-05-04 AP coping with NAs
+  
+  statistic = match.arg( statistic )
+  
+  ## exclude item with NAs
+  
+  t.sel <- complete.cases( data, pred, se.pred )
+  
+  if( sum( t.sel ) < length( t.sel ) ) warnings(
+    "missing values encountered when validating predictions"
+  )
+  
+  data    <- data[t.sel]
+  pred    <- pred[t.sel]
+  se.pred <- se.pred[t.sel]
+  
+  if( missing( ncutoff ) || is.null( ncutoff ) ) ncutoff <- min( 500, length( data ) )
+  
+  result <- switch(
+    statistic,
+    pit = {
+      
+      ## probability integral transformation
+      
+      pnorm( data, mean = pred, sd = se.pred )
+    },
+    mc = ,
+    bs = {
+      
+      ## marginal calibration and brier score
+      
+      margin.calib <- data.frame(
+        y = t.x <- unique( t.y <- sort( c( data ) ) ),
+        ghat = cumsum( tabulate( match( t.y, t.x ) ) ) / length(t.y)
+      )
+      t.sel <- trunc( 
+        seq( 
+          from = as.integer(1), 
+          to = nrow( margin.calib ), 
+          length.out = min( nrow( margin.calib ), ncutoff ) 
+        ) 
+      )
+      margin.calib <- margin.calib[t.sel,]
+      
+      t.bla <- t(
+        sapply(
+          margin.calib$y,
+          function( q, m, s, y ){
+            t.p <- pnorm( q, mean = m, sd = s )
+            c( 
+              fbar = mean( t.p ), 
+              bs = mean( ( t.p - as.numeric( y <= q ) )^2 )
+            )
+          },
+          m = pred,
+          s = se.pred,
+          y = data
+        )
+      )
+      cbind(
+        margin.calib, as.data.frame( t.bla ) 
+      )
+    },
+    st = {
+      
+      ## statistics of (standardized) prediction errors
+      
+      error <- data - pred
+      std.error <- error / se.pred
+      
+      statistics <- c( 
+        me = mean( error ),
+        mede = median( error ),
+        rmse = sqrt( mean( error^2 ) ),
+        made = mad( error, center = 0 ),
+        qne = Qn( error, finite.corr = TRUE ),
+        msse = mean( std.error^2 ),
+        medsse = median( std.error^2 )
+      )
+      
+    }
+  )
+  
+  return( result )   
+  
+}
+


Property changes on: pkg/R/georob.cv.R
___________________________________________________________________
Added: svn:executable
   + *

Modified: pkg/R/georob.exported.functions.R
===================================================================
--- pkg/R/georob.exported.functions.R	2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/R/georob.exported.functions.R	2013-05-23 15:41:57 UTC (rev 5)
@@ -75,6 +75,7 @@
   ## 2012-05-07 AP correction of error for constant trend
   ## 2012-05-28 AP handle missing names of coefficients after calling update
   ## 2013-04-23 AP new names for robustness weights
+  ## 2013-05-23 AP correct handling of missing observations and to construct model.frame
   
   ## check whether input is complete
   
@@ -87,13 +88,23 @@
   ret.x <- x
   ret.y <- y
   
-  ## vector with row number of included observations
+#   ## vector with row number of included observations
+#   
+#   in.subset <- 1:NROW( data )
+#   if( !missing( subset ) ) in.subset <- in.subset[subset]
   
-  in.subset <- 1:NROW( data )
-  if( !missing( subset ) ) in.subset <- in.subset[subset]
+  ## build combined formula for fixed effects and locations
   
-  ## start setting-up model frames
+  extended.formula <- update( 
+    formula,
+    paste( 
+      paste( as.character( formula )[c(2, 1, 3)], collapse = " " ),
+      as.character( locations )[2], sep = " + "
+    )
+  )
   
+  ## setting-up model frame
+  
   cl <- match.call()
   mf <- match.call( expand.dots = FALSE )
   m <- match( 
@@ -101,48 +112,25 @@
     names(mf), 0L 
   )
   mf <- mf[c(1L, m)]
+  mf$formula <- extended.formula
   mf$drop.unused.levels <- TRUE
   mf[[1L]] <- as.name( "model.frame" )
   
-  ## spot missing observations for fixed effects
+  mf <- eval( mf, parent.frame() )
   
-  mf.try <- eval( mf, parent.frame() )
-  if( !is.null( attr( mf.try, "na.action" ) ) ){
-    ex.mf <- unname( unclass( attr( mf.try, "na.action" ) ) )
-  } else ex.mf <- integer(0)
-  ##     if( verbose > 3 ) cat( "ex.mf    :", ex.mf, "\n" )
+  ## eliminate intercept from locations
   
-  ## spot missing observations for locations
+  locations <- as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() )
   
-  locations <-
-    as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() )
-  mf.loc <- mf
-  mf.loc$formula <- locations
-  mf.loc.try <- eval( mf.loc, parent.frame() )
-  if( !is.null( attr( mf.loc.try, "na.action" ) ) ){
-    ex.mf.loc <- unname( unclass( attr( mf.loc.try, "na.action" ) ) )
-  } else ex.mf.loc <- integer( 0 )
-  ##     if( verbose > 3 ) cat( "ex.mf.loc:", ex.mf.loc, "\n" )
+  ## setting-up terms objects
   
-  ## missing observations either for fixed effects or locations
+  mt     <- terms( formula )
+  mt.loc <- terms( locations )
+    
+  ## ... and assign fixed effects terms object as attribute to model.frame
   
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/georob -r 5


More information about the Georob-commits mailing list