[Georob-commits] r5 - in pkg: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu May 23 17:41:57 CEST 2013
Author: papritz
Date: 2013-05-23 17:41:57 +0200 (Thu, 23 May 2013)
New Revision: 5
Added:
pkg/R/georob.cv.R
Removed:
pkg/R/georob.xvalid.R
Modified:
pkg/ChangeLog
pkg/R/georob.S3methods.R
pkg/R/georob.exported.functions.R
pkg/R/georob.predict.R
pkg/R/variogram.R
pkg/man/S3methods.georob.Rd
pkg/man/georob.Rd
Log:
korrekturen fuer korrektes handling von fehlwerten
Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog 2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/ChangeLog 2013-05-23 15:41:57 UTC (rev 5)
@@ -1,6 +1,6 @@
2012-12-18 Andreas Papritz <papritz at env.ethz.ch>
-* georob.xvalid.R (print.cv.georob, print.summary.cv.georob): return invisible(x)
+* georob.cv.R (print.cv.georob, print.summary.cv.georob): return invisible(x)
* georob.S3methods.R (print.georob, print.summary.georob): return invisible(x)
* variogram.R (print.summary.sample.variogram, print.fitted.variogram)
(print.summary.fitted.variogram): return invisible(x)
@@ -42,7 +42,7 @@
* georob.predict.R (predict.georob): new names for robustness weights
* georob.private.functions.R (compute.covariances, update.betahat.bhat, estimate.betahat.bhat, compute.estimating equations, georob.fit): new names for robustness weights
* georob.S3methods.R (ranef.georob, rstandard.georob, summary.georob, print.summary.georob): new names for robustness weights
-* georob.xvalid.R (cv.georob): changes for parallelization on windows os
+* georob.cv.R (cv.georob): changes for parallelization on windows os
2013-04-29 Andreas Papritz <papritz at env.ethz.ch>
@@ -62,3 +62,14 @@
+2013-05-23 Andreas Papritz <papritz at env.ethz.ch>
+
+* georob.cv.R (cv.georob): correct handling of missing observations
+* georob.exported.functions.R (georob): improved way to handle missing observations and to construct model.frame
+* georob.predict.R (predict.georob): correct handling of missing observations
+* georob.S3methods.R (georob.residuals): new argument "terms"
+* georob.S3methods.R (ranef.georob, rstandard.georob,deviance.georob): correct handling of missing observations
+* variogram.R (plot.georob): correct handling of missing observations
+
+
+
Modified: pkg/R/georob.S3methods.R
===================================================================
--- pkg/R/georob.S3methods.R 2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/R/georob.S3methods.R 2013-05-23 15:41:57 UTC (rev 5)
@@ -169,6 +169,7 @@
## 2012-10-18 AP changes for new definition of eta
## 2012-11-26 AP method for random.effects
## 2013-04-23 AP new names for robustness weights
+ ## 2013-05-23 AP correct handling of missing observations
object$Valpha.objects <- expand( object$Valpha.objects )
object$cov <- expand( object$cov )
@@ -247,7 +248,8 @@
}
- return( bhat )
+ bhat <- naresid( object$na.action, bhat )
+ return( bhat )
}
@@ -282,6 +284,7 @@
function(
object,
type = c("working", "response", "deviance", "pearson", "partial" ),
+ terms = NULL,
level = 1,
...
)
@@ -300,6 +303,7 @@
## 2011-10-13 A. Papritz
## 2011-12-14 AP modified for replicated observations
+ ## 2013-05-23 AP modified for computing partial residuals for single terms
type <- match.arg( type )
@@ -315,14 +319,15 @@
partial = r
)
+ res <- naresid(object$na.action, res)
+
if( level == 0 && any( type %in% c( "working", "response", "partial" ) ) ){
res <- res + ranef( object, standard = FALSE )[object$Tmat]
}
- res <- naresid(object$na.action, res)
if( type == "partial" )
- res <- res + predict( object, type = "terms" )$fit
- res
+ res <- res + predict( object, type = "terms", terms = terms )$fit
+ drop( res )
}
@@ -347,6 +352,7 @@
## 2012-01-05 AP modified for compress storage of matrices
## 2012-10-18 AP changes for new definition of eta
## 2013-04-23 AP new names for robustness weights
+ ## 2013-05-23 AP correct handling of missing observations
object <- model
object$Valpha.objects <- expand( object$Valpha.objects )
@@ -426,6 +432,8 @@
## compute standardized residuals
+ se <- naresid( model$na.action, se )
+
residuals( model, level = level ) / se
}
@@ -867,7 +875,14 @@
## deviance method for class georob
## 2012-12-22 A. Papritz
+ ## 2013-05-23 AP correct handling of missing observations
+ ## redefine na.action component of object
+
+ if( identical( class( object$na.action ), "exclude" ) ){
+ class( object$na.action ) <- "omit"
+ }
+
if( object[["tuning.psi"]] < georob.control()[["tuning.psi.nr"]] ){
result <- NA_real_
} else {
Added: pkg/R/georob.cv.R
===================================================================
--- pkg/R/georob.cv.R (rev 0)
+++ pkg/R/georob.cv.R 2013-05-23 15:41:57 UTC (rev 5)
@@ -0,0 +1,929 @@
+## ############################################################################
+
+cv <- function( object, ... ) UseMethod( "cv" )
+
+## ############################################################################
+
+cv.georob <-
+ function(
+ object,
+ formula = NULL, subset = NULL,
+ nset = 10, seed = NULL, sets = NULL,
+ duplicates.in.same.set = TRUE,
+ re.estimate = TRUE, param = object$param,
+ fit.param = object$initial.objects$fit.param,
+ return.fit = FALSE, reduced.output = TRUE,
+ lgn = FALSE,
+ ncores = min( nset, detectCores() ),
+ verbose = 0,
+ ...
+ )
+{
+
+ ## Function computes nset-fold cross-validation predictions from a
+ ## fitted georob object
+
+ ## Arguments:
+
+ ## object fitted georob object
+ ## formula a formula passed by update to georob
+ ## nset integer scalar for the number of cross-validation subsets
+ ## seed integer scalar passed to set.seed before selecting the
+ ## cross-valdation subsets by a call to runif()
+ ## sets an integer vector with length nrow(data) defining the
+ ## cross-validation sets and over-riding the values provided
+ ## for nset and seed
+ ## duplicates.in.same.set logical flag controlling whether replicated observations
+ ## at a given location are assigned to the same cross-validation set
+ ## re.estimate logical flag controlling whether the variogram parameters should
+ ## be re-estimated for each cross-validation subset
+ ## param initial values of variogram parameters when the variogram is
+ ## re-estimated for each cross-validation subset
+ ## return.fit logical flag to control whether the information about the fit are
+ ## should be returned for each cross-valdiation subset when re-estimating the
+ ## model
+ ## reduced.output logical flag controlling whether for each cross-valdiation subset the
+ ## the full fitted object or just a selection (information about convergence,
+ ## variogram and fixed-effects parameter estimates) should be returned when
+ ## re-estimating the model
+ ## lgn logical flag controlling whether lognormal kriging predictions should be computed
+ ## ncores integer scalar with the number of cores to used in parallel processing
+ ## verbose integer scalar, controlling verbosity of the information sent to standard output
+ ## ... further arguments passed by update to georob or to
+ ## mclapply on non-windows platforms
+
+ ## ToDos:
+
+ ## - Klasse und Methoden definieren fuer cv (kompatibel mit geoR)
+
+ ## History:
+
+ ## 2011-10-24 Korrektur Ausschluss von nichtbenoetigten Variablen fuer lognormal kriging
+ ## 2011-12-23 AP modified for replicated observations and for parallel computing
+ ## 2012-03-02 AP eliminated possibility for logging to file in parallel processing
+ ## 2012-03-19 AP correction of error in parallel processing on Windows
+ ## 2012-05-01 AP correct handling of NAs
+ ## 2012-05-04 AP modifications for lognormal block kriging
+ ## 2012-05-09 AP correction of error if a new formula is passed via update to georob
+ ## 2012-05-22 AP correction of error in passing param and fit.param to georob
+ ## 2012-06-05 AP correction of error in handling optional subset argument
+ ## 2012-11-04 AP handling compressed cov.betahat
+ ## 2012-12-04 AP modifiction for changes in predict.georob
+ ## 2013-04-24 AP changes for parallelization on windows os
+ ## 2013-05-23 AP correct handling of missing observations
+
+ ## auxiliary function that fits the model and computes the predictions of
+ ## a cross-validation set
+
+ f.aux <- function(
+ ..i.., object, formula, data, sets, re.estimate, param, fit.param, lgn, verbose, ...
+ ){ ## cv function
+
+ if (verbose) cat( "\n\n processing cross-validation set", ..i.., "\n" )
+
+ ## fit model to complement of current set
+
+ if( !re.estimate ){
+ fit.param <- c(
+ variance = FALSE, snugget = FALSE, nugget = FALSE, scale = FALSE,
+ a = FALSE, alpha = FALSE, beta = FALSE, delta = FALSE,
+ gamma = FALSE, lambda = FALSE, n = FALSE, nu = FALSE,
+ f1 = FALSE, f2 =FALSE, omega = FALSE, phi = FALSE, zeta = FALSE
+ )[names( param )]
+ }
+
+ ## change environment of terms and formula so that subset selection works for update
+
+ environment( formula ) <- environment()
+ environment( object$terms ) <- environment()
+
+ t.georob <- update(
+ object,
+ formula = formula,
+ data = data,
+ subset = -sets[[..i..]] ,
+ param = param,
+ fit.param = fit.param,
+ verbose = verbose,
+ ...
+ )
+
+ if( verbose > 0 ){
+ cat( "\n\n" )
+ print( summary( t.georob ) )
+ }
+
+ ## compute predictions for current set
+
+ t.predict <- predict(
+ t.georob, newdata = data[sets[[..i..]], ], type = "response",
+ mmax = length( sets[[..i..]] ),
+ extended.output = lgn,
+ ncores = 1
+ )
+
+ ## backtransformation for log-normal kriging
+
+ if( lgn ){
+ t.predict <- lgnpp( t.predict )
+ t.predict <- t.predict[, -match(
+ c( "trend", "var.pred", "cov.pred.target", "var.target" ), names( t.predict )
+ )]
+ }
+
+ t.predict <- data.frame( i = sets[[..i..]], t.predict )
+
+ t.ex <- c(
+ grep( "lower", colnames( t.predict ), fixed = TRUE ),
+ grep( "upper", colnames( t.predict ), fixed = TRUE )
+ )
+
+ t.predict <- t.predict[, -t.ex]
+
+ if( reduced.output ){
+
+ if( !is.null( t.georob$cov$cov.betahat ) ){
+ t.se.coef <- sqrt( diag( expand( t.georob$cov$cov.betahat ) ) )
+ } else {
+ t.se.coef <- NULL
+ }
+
+ t.georob <- t.georob[c(
+ "tuning.psi", "converged", "convergence.code",
+ "gradient", "param", "aniso",
+ "coefficients"
+ )]
+
+ t.georob$aniso <- t.georob$aniso$aniso
+
+ if( !is.null( t.se.coef ) ) t.georob$se.coefficients <- t.se.coef
+
+ }
+
+ return( list( pred = t.predict, fit = t.georob ) )
+ ## end cv function
+ }
+
+ ## redefine na.action component of object
+
+ if( identical( class( object$na.action ), "exclude" ) ){
+ class( object$na.action ) <- "omit"
+ }
+
+ ## update terms of object is formula is provided
+
+ if( !is.null( formula ) ){
+ formula <- update( formula( object ), formula )
+ object$terms <- terms( formula )
+ } else {
+ formula <- formula( object )
+ }
+
+ ## get data.frame with required variables (note that the data.frame passed
+ ## as data argument to georob must exist in GlobalEnv)
+
+ data <- cbind(
+ get_all_vars( formula( object ), eval( getCall(object)$data ) ),
+ get_all_vars( object$locations.objects$locations, eval( getCall(object)$data ) )
+ )
+
+ ## select subset if appropriate
+
+ if( !is.null( subset ) ){
+ data <- data[subset, ]
+ object$Tmat <- object$Tmat[subset]
+ } else if( !is.null( getCall(object)$subset ) ){
+ data <- data[eval( getCall(object)$subset ), ]
+ }
+
+# if( !is.null( getCall(object)$subset ) )
+
+ ## define cross-validation sets
+
+ if( is.null( sets ) ){
+
+ if( !is.null( seed ) ) set.seed( seed )
+ sets <- runif( NROW( data ) )
+ sets <- cut(
+ sets,
+ breaks = c( -0.1, quantile( sets, probs = ( 1:(nset-1)/nset ) ), 1.1 )
+ )
+ sets <- factor( as.integer( sets ) )
+
+ } else {
+
+ if( length( sets ) != NROW( data ) ) stop(
+ "sets must be an integer vector with length equal to the number of observations"
+ )
+
+ }
+
+ if( duplicates.in.same.set ){
+ dups <- duplicated( object$Tmat )
+ idups <- match( object$Tmat[dups], object$Tmat[!dups] )
+ sets[dups] <- (sets[!dups])[idups]
+ }
+
+ sets <- tapply(
+ 1:NROW( data ),
+ sets,
+ function( x ) x
+ )
+
+ ## loop over all cross-validation sets
+
+ if( .Platform$OS.type == "windows" ){
+
+ ## create a SNOW cluster on windows OS
+
+ cl <- makePSOCKcluster( ncores, outfile = "")
+
+ ## export required items to workers
+
+ junk <- clusterEvalQ( cl, require( georob, quietly = TRUE ) )
+
+ t.result <- parLapply(
+ cl,
+ 1:length( sets ),
+ f.aux,
+ object = object,
+ formula = formula,
+ data = data,
+ sets = sets,
+ re.estimate = re.estimate,
+ param = param,
+ fit.param = fit.param,
+ lgn = lgn,
+ verbose = verbose,
+ ...
+ )
+
+ stopCluster(cl)
+
+ } else {
+
+ ## fork child processes on non-windows OS
+
+ t.result <- mclapply(
+ 1:length( sets ),
+ f.aux,
+ object = object,
+ formula = formula,
+ data = data,
+ sets = sets,
+ re.estimate = re.estimate,
+ param = param,
+ fit.param = fit.param,
+ lgn = lgn,
+ verbose = verbose,
+ mc.cores = ncores,
+ mc.allow.recursive = FALSE,
+ ...
+ )
+
+ }
+
+ ## create single data frame with cross-validation results
+
+ result <- t.result[[1]]$pred
+ result$subset <- rep( 1, nrow( t.result[[1]]$pred ) )
+
+ for( t.i in 2:length( t.result ) ) {
+ result <- rbind(
+ result,
+ data.frame(
+ t.result[[t.i]]$pred,
+ subset = rep( t.i, nrow( t.result[[t.i]]$pred ) )
+ )
+ )
+ }
+ t.ix <- sort( result$i, index.return = T )$ix
+ result <- result[t.ix, ]
+ result$data <- model.response(
+ model.frame( formula( object), data, na.action = na.pass )
+ )
+
+ if( lgn ) result$lgn.data <- exp( result$data )
+
+ result <- result[, -match("i", colnames( result) )]
+
+ isubset <- match( "subset", colnames( result ) )
+ idata <- grep( "data", colnames( result ), fixed = TRUE )
+ ipred<- grep( "pred", colnames( result ), fixed = TRUE )
+ ise <- grep( "se", colnames( result ), fixed = TRUE )
+ ise <- ise[ise != isubset]
+
+ result <- cbind(
+ result[, -c(isubset, idata, ipred, ise)],
+ result[, c(isubset, idata, ipred, ise)]
+ )
+
+ t.fit <- lapply( t.result, function( x ) return( x$fit ) )
+
+ if( re.estimate && !all( sapply( t.fit, function(x) x$converged ) ) )
+ warning(
+ "lack of covergence when fitting model to cross-validation sets"
+ )
+
+ result <- list(
+ pred = result,
+ fit = if( return.fit ) t.fit else NULL
+ )
+
+ class( result ) <- "cv.georob"
+
+ invisible( result )
+
+}
+
+## ###########################################################################
+
+plot.cv.georob <-
+ function(
+ x, type = c( "sc", "lgn.sc", "ta", "qq", "pit", "mc", "bs" ),
+ ncutoff = NULL,
+ add = FALSE,
+ col, pch, lty,
+ main, xlab, ylab,
+ ...
+ )
+{
+
+ ## plot method for class "cv.georob"
+
+ ## 2011-12-21 A. Papritz
+
+ x <- x$pred
+
+ type = match.arg( type )
+
+ if( type == "sc.lgn" && !"lgn.pred" %in% names( x ) ) stop(
+ "lognormal kriging results missing, use 'lgn = TRUE' for cross-validation"
+ )
+
+ if( type %in% c( "pit", "mc", "bs" ) ){
+
+ result <- validate.predictions(
+ data = x$data,
+ pred = x$pred,
+ se.pred = x$se,
+ statistic = type, ncutoff = ncutoff
+ )
+
+ }
+
+ if( missing( col ) ) col <- 1
+ if( missing( pch ) ) pch <- 1
+ if( missing( lty ) ) lty <- 1
+
+
+
+ switch(
+ type,
+ sc = {
+
+ ## scatterplot of (transformed) measurements vs. predictions
+
+ if( missing( main ) ) main <- "data vs. predictions"
+ if( missing( xlab ) ) xlab <- "predictions"
+ if( missing( ylab ) ) ylab <- "data"
+
+ if( add ){
+ points( data ~ pred, x, col = col, pch = pch, ... )
+ } else {
+ plot(
+ data ~ pred, x, col = col, pch = pch,
+ main = main, xlab = xlab, ylab = ylab, ...
+ )
+ }
+
+
+ },
+ lgn.sc = {
+
+ ## scatterplot of original measurements vs. back-transformded
+ ## lognormal predictions
+
+ if( missing( main ) ) main <- "data vs. back-transformed predictions"
+ if( missing( xlab ) ) xlab <- "back-transformed predictions"
+ if( missing( ylab ) ) ylab <- "data"
+
+ if( add ){
+ points( lgn.data ~ lgn.pred, x, col = col, pch = pch, ... )
+ } else {
+ plot(
+ lgn.data ~ lgn.pred, x, col = col, pch = pch,
+ main = main, xlab = xlab, ylab = ylab, ...
+ )
+ }
+
+
+
+ },
+ ta = {
+
+ ## Tukey-Anscombe plot
+
+ if( missing( main ) ) main <- "Tukey-Anscombe plot"
+ if( missing( xlab ) ) xlab <- "predictions"
+ if( missing( ylab ) ) ylab <- "standardized prediction errors"
+
+ if( add ){
+ points( I((data-pred)/se) ~ pred, x, col = col, pch = pch, ... )
+ } else {
+ plot(
+ I((data-pred)/se) ~ pred, x, col = col, pch = pch,
+ main = main, xlab = xlab, ylab = ylab, ...
+ )
+ }
+
+ },
+ qq = {
+
+ ## normal QQ-Plot of standardized prediction errors
+
+ if( missing( main ) ) main <- "normal-QQ-plot of standardized prediction errors"
+ if( missing( xlab ) ) xlab <- "quantile N(0,1)"
+ if( missing( ylab ) ) ylab <- "quantiles of standardized prediction errors"
+
+ r.qq <- with( x, qqnorm( ( data - pred ) / se, plot.it = FALSE ) )
+
+ if( add ){
+ points( r.qq, col = col, pch = pch, ... )
+ } else {
+ plot( r.qq, col = col, pch = pch, main = main, xlab = xlab, ylab = ylab, ... )
+ }
+ },
+ pit = {
+
+ ## histogramm of probability-integral-transformation
+
+ if( missing( main ) ) main <- "histogramm PIT-values"
+ if( missing( xlab ) ) xlab <- "PIT"
+ if( missing( ylab ) ) ylab <- "density"
+
+ r.hist <- hist(
+ result,
+ col = col, lty = lty,
+ main = main, xlab = xlab, ylab = ylab, freq = FALSE, ... )
+ },
+ mc = {
+
+ ## narginal calibration plots: ecdf of measurements and mean
+ ## predictive cdf
+
+ if( missing( main ) ) main <- "empirical cdf of data and mean predictive cdfs"
+ if( missing( xlab ) ) xlab <- "data or predicitons"
+ if( missing( ylab ) ) ylab <- "probability"
+
+ matplot(
+ result$y,
+ result[, c( "ghat", "fbar" )], type = "l",
+ col = c( "black", "red" ),
+ lty = c( "solid", "dashed" ),
+ main = main, xlab = xlab, ylab = ylab,
+ ...
+ )
+
+ t.usr <- par( "usr" )
+ t.usr[3:4] <- with( result, range( fbar - ghat ) ) *1.04
+ par( usr = t.usr )
+ with( result, lines( y, fbar-ghat, col= "blue", lty = "dotted" ) )
+ axis(2, pos = t.usr[2], col.axis = "blue", col.ticks = "blue" )
+ legend(
+ "topleft",
+ lty = c("solid", "dashed", "dotted" ),
+ col = c( "black", "red", "blue" ), bty = "n", cex = 1,
+ legend = c(
+ expression( paste( "empirical cdf ", hat(G) ) ),
+ expression( paste( "mean predictive cdf ", bar(F) ) ),
+ expression( bar(F)-hat(G) )
+ )
+ )
+ },
+ bs ={
+
+ # plot of brier score vs. cutoff
+
+ if( missing( main ) ) main <- "Brier score vs. cutoff"
+ if( missing( xlab ) ) xlab <- "cutoff"
+ if( missing( ylab ) ) ylab <- "Brier score"
+
+ if( add ){
+ lines( result$y, result$bs, col = col, lty = lty, ... )
+ } else {
+ plot( result$y, result$bs, type = "l", col = col, lty = lty,
+ main = main, xlab = xlab, ylab = ylab, ...
+ )
+ }
+ }
+ )
+ invisible( NULL )
+}
+
+
+
+## ###########################################################################
+
+print.cv.georob <-
+ function(
+ x, digits = max(3, getOption("digits") - 3), ...
+ )
+{ ## print method for class "cv.georob"
+
+ ## 2011-10-13 A. Papritz
+ ## 2012-12-18 AP invisible(x)
+
+ x <- x$pred
+
+ st <- validate.predictions(
+ data = x$data,
+ pred = x$pred,
+ se.pred = x$se,
+ statistic = "st",
+ ...
+ )
+
+ print(
+ format( st, digits = digits ), print.gap = 2,
+ quote = FALSE
+ )
+
+ invisible( x )
+}
+
+
+## ###########################################################################
+
+summary.cv.georob <-
+ function( object, se = FALSE, ... )
+{
+
+ ## summary method for class "cv.georob"
+
+ ## function computes statistics of the cross-validation errors
+
+ ## 2011-10-13 A. Papritz
+ ## 2012-05-21 ap
+
+
+ object <- object$pred
+
+ bs <- validate.predictions(
+ data = object$data,
+ pred = object$pred,
+ se.pred = object$se,
+ ncutoff = length( object$data ),
+ statistic = "bs"
+ )
+
+ t.d <- diff( bs$y )
+ crps <- sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) )
+
+ st <- validate.predictions(
+ data = object$data,
+ pred = object$pred,
+ se.pred = object$se,
+ statistic = "st"
+ )
+
+ if( !is.null( object$lgn.pred ) ){
+ st.lgn <- validate.predictions(
+ data = object$lgn.data,
+ pred = object$lgn.pred,
+ se.pred = object$lgn.se,
+ statistic = "st"
+ )
+ } else {
+ st.lgn <- NULL
+ }
+
+ ## collect results
+
+ result <- list( st = st, crps = crps, st.lgn = st.lgn )
+
+ ## compute standard errors of criteria across cross-validation sets
+
+ if( se && !is.null( object$subset ) ){
+
+ criteria <- t( sapply(
+ tapply(
+ 1:nrow( object ),
+ factor( object$subset ),
+ function( i, data, pred, se.pred, lgn.data, lgn.pred, lgn.se.pred ){
+
+ bs <- validate.predictions(
+ data = data[i],
+ pred = pred[i],
+ se.pred = se.pred[i],
+ ncutoff = length( data[i] ),
+ statistic = "bs"
+ )
+
+ t.d <- diff( bs$y )
+ crps <- c( crps = sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) ) )
+
+ st <- validate.predictions(
+ data = data[i],
+ pred = pred[i],
+ se.pred = se.pred[i],
+ statistic = "st"
+ )
+
+ if( !is.null( lgn.pred ) ){
+ st.lgn <- validate.predictions(
+ data = lgn.data[i],
+ pred = lgn.pred[i],
+ se.pred = lgn.se.pred[i],
+ statistic = "st"
+ )
+ names( st.lgn ) <- paste( names( st.lgn ), "lgn", sep = "." )
+ } else {
+ st.lgn <- NULL
+ }
+
+
+ return( c( st, st.lgn, crps ) )
+
+ },
+ data = object$data,
+ pred = object$pred,
+ se.pred = object$se,
+ lgn.data = object$lgn.data,
+ lgn.pred = object$lgn.pred,
+ lgn.se.pred = object$lgn.se
+ ),
+ function( x ) x
+ ))
+
+ se.criteria <- apply(
+ criteria, 2,
+ function( x ) sd( x ) / sqrt( length( x ) )
+ )
+
+ result$se.st <- se.criteria[c( "me", "mede", "rmse", "made", "qne", "msse", "medsse")]
+ result$se.crps <- se.criteria["crps"]
+ if( !is.null( st.lgn ) ){
+ result$se.st.lgn <- se.criteria[
+ c( "me.lgn", "mede.lgn", "rmse.lgn", "made.lgn", "qne.lgn", "msse.lgn", "medsse.lgn")
+ ]
+ names( result$se.st.lgn ) <- gsub( ".lgn", "", names( result$se.st.lgn ) )
+ }
+
+ }
+
+ class( result ) <- "summary.cv.georob"
+
+
+ return( result )
+
+}
+
+## ###########################################################################
+
+print.summary.cv.georob <-
+ function(
+ x, digits = max(3, getOption("digits") - 3), ...
+ )
+{
+
+ ## print method for class "summary.cv.georob"
+
+ ## 2011-12-20 A. Papritz
+ ## 2012-05-21 ap
+ ## 2012-12-18 AP invisible(x)
+
+
+ result <- c( x$st, crps = x$crps )
+ if( !is.null( x$se.st ) ){
+ result <- rbind( result, c( x$se.st, crps = x$se.crps ) )
+ rownames( result ) <- c( "", "se" )
+ }
+
+ cat( "\nStatistics of cross-validation prediction errors\n" )
+ print(
+ format( result, digits = digits ), print.gap = 2,
+ quote = FALSE
+ )
+
+ if( !is.null( x$st.lgn ) ){
+ result <- x$st.lgn
+ if( !is.null( x$se.st.lgn ) ){
+ result <- rbind( x$st.lgn, x$se.st.lgn )
+ rownames( result ) <- c( "", "se" )
+ }
+
+ cat( "\nStatistics of back-transformed cross-validation prediction errors\n" )
+ print(
+ format( result, digits = digits ), print.gap = 2,
+ quote = FALSE
+ )
+ }
+
+ invisible( x )
+
+}
+
+## ###########################################################################
+
+rstudent.cv.georob <-
+ function( model, ... )
+{
+
+ ## Function extracts studentized residuals from cv.georob object
+
+ ## Arguments:
+
+ ## model cv.georob object
+ ## ... further arguments (currently not used)
+
+ ## 2011-10-13 A. Papritz
+
+ if( !identical( class( model )[1], "cv.georob" ) ) stop(
+ "model is not of class 'cv.georob'"
+ )
+
+ model <- model$pred
+
+ ( model$data - model$pred ) / model$se
+
+}
+
+# ## ###########################################################################
+#
+# cv.variomodel <-
+# function( object, geodata, ... )
+# {
+#
+# ## Wrapper function for cross-validation of object of class variomodel{geoR}
+# ## by function xvalid{geoR}
+#
+# ## Arguments:
+#
+# ## model an object of class "variomodel{geoR}
+# ## ... further arguements passed to xvalid{geoR), cf. respective help page
+#
+# ## 2012-11-22 A. Papritz
+#
+# call.fc <- match.call()
+#
+# res <- geoR::xvalid( model = object, ... )
+#
+# if( !is.null( attr( res, "geodata.xvalid" ) ) ){
+# attr( res, "geodata.xvalid" ) <- call.fc$geodata
+# }
+# if( !is.null( attr( res, "locations.xvalid" ) ) ){
+# attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid
+# }
+#
+# return(res)
+#
+# }
+#
+# cv.likGRF <-
+# function( object, geodata, ... )
+# {
+#
+# ## Wrapper function for cross-validation of object of class variomodel{geoR}
+# ## by function xvalid{geoR}
+#
+# ## Arguments:
+#
+# ## model an object of class "likGRF{geoR}
+# ## ... further arguements passed to xvalid{geoR), cf. respective help page
+#
+# ## 2012-11-22 A. Papritz
+#
+# call.fc <- match.call()
+#
+# res <- geoR::xvalid( model = object, geodata = geodata, ... )
+#
+# if( !is.null( attr( res, "geodata.xvalid" ) ) ){
+# attr( res, "geodata.xvalid" ) <- call.fc$geodata
+# }
+# if( !is.null( attr( res, "locations.xvalid" ) ) ){
+# attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid
+# }
+#
+# return(res)
+#
+# }
+
+## ======================================================================
+validate.predictions <-
+ function(
+ data,
+ pred,
+ se.pred,
+ statistic = c( "pit", "mc", "bs", "st" ),
+ ncutoff = NULL
+ )
+{
+
+ ## function computes several statistics to validate probabilistic
+ ## predictions, cf. Gneiting et al., 2007, JRSSB
+
+ ## Arguments:
+
+ ## data numeric vector with data
+ ## pred numeric vector with predictions
+ ## se.pred numeric vector with prediction standard errors
+ ## statistic character scalar specifying the statistic to compute
+ ## possible values are:
+ ## "pit" probability integral transformation
+ ## "mc" empirical cdf of data and mean predictive
+ ## normal distribution
+ ## "bs" Brier score
+ ## "st" basic statistics such as mean error, mean
+ ## mean squared error, mean squared standardized error
+ ## and robust equivalents
+ ## ncutoff number of quantiles (statistic == "mc") or number of cutoffs (statistic = "bs")
+
+ # 2011-20-21 A. Papritz
+ # 2012-05-04 AP coping with NAs
+
+ statistic = match.arg( statistic )
+
+ ## exclude item with NAs
+
+ t.sel <- complete.cases( data, pred, se.pred )
+
+ if( sum( t.sel ) < length( t.sel ) ) warnings(
+ "missing values encountered when validating predictions"
+ )
+
+ data <- data[t.sel]
+ pred <- pred[t.sel]
+ se.pred <- se.pred[t.sel]
+
+ if( missing( ncutoff ) || is.null( ncutoff ) ) ncutoff <- min( 500, length( data ) )
+
+ result <- switch(
+ statistic,
+ pit = {
+
+ ## probability integral transformation
+
+ pnorm( data, mean = pred, sd = se.pred )
+ },
+ mc = ,
+ bs = {
+
+ ## marginal calibration and brier score
+
+ margin.calib <- data.frame(
+ y = t.x <- unique( t.y <- sort( c( data ) ) ),
+ ghat = cumsum( tabulate( match( t.y, t.x ) ) ) / length(t.y)
+ )
+ t.sel <- trunc(
+ seq(
+ from = as.integer(1),
+ to = nrow( margin.calib ),
+ length.out = min( nrow( margin.calib ), ncutoff )
+ )
+ )
+ margin.calib <- margin.calib[t.sel,]
+
+ t.bla <- t(
+ sapply(
+ margin.calib$y,
+ function( q, m, s, y ){
+ t.p <- pnorm( q, mean = m, sd = s )
+ c(
+ fbar = mean( t.p ),
+ bs = mean( ( t.p - as.numeric( y <= q ) )^2 )
+ )
+ },
+ m = pred,
+ s = se.pred,
+ y = data
+ )
+ )
+ cbind(
+ margin.calib, as.data.frame( t.bla )
+ )
+ },
+ st = {
+
+ ## statistics of (standardized) prediction errors
+
+ error <- data - pred
+ std.error <- error / se.pred
+
+ statistics <- c(
+ me = mean( error ),
+ mede = median( error ),
+ rmse = sqrt( mean( error^2 ) ),
+ made = mad( error, center = 0 ),
+ qne = Qn( error, finite.corr = TRUE ),
+ msse = mean( std.error^2 ),
+ medsse = median( std.error^2 )
+ )
+
+ }
+ )
+
+ return( result )
+
+}
+
Property changes on: pkg/R/georob.cv.R
___________________________________________________________________
Added: svn:executable
+ *
Modified: pkg/R/georob.exported.functions.R
===================================================================
--- pkg/R/georob.exported.functions.R 2013-05-14 15:58:21 UTC (rev 4)
+++ pkg/R/georob.exported.functions.R 2013-05-23 15:41:57 UTC (rev 5)
@@ -75,6 +75,7 @@
## 2012-05-07 AP correction of error for constant trend
## 2012-05-28 AP handle missing names of coefficients after calling update
## 2013-04-23 AP new names for robustness weights
+ ## 2013-05-23 AP correct handling of missing observations and to construct model.frame
## check whether input is complete
@@ -87,13 +88,23 @@
ret.x <- x
ret.y <- y
- ## vector with row number of included observations
+# ## vector with row number of included observations
+#
+# in.subset <- 1:NROW( data )
+# if( !missing( subset ) ) in.subset <- in.subset[subset]
- in.subset <- 1:NROW( data )
- if( !missing( subset ) ) in.subset <- in.subset[subset]
+ ## build combined formula for fixed effects and locations
- ## start setting-up model frames
+ extended.formula <- update(
+ formula,
+ paste(
+ paste( as.character( formula )[c(2, 1, 3)], collapse = " " ),
+ as.character( locations )[2], sep = " + "
+ )
+ )
+ ## setting-up model frame
+
cl <- match.call()
mf <- match.call( expand.dots = FALSE )
m <- match(
@@ -101,48 +112,25 @@
names(mf), 0L
)
mf <- mf[c(1L, m)]
+ mf$formula <- extended.formula
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name( "model.frame" )
- ## spot missing observations for fixed effects
+ mf <- eval( mf, parent.frame() )
- mf.try <- eval( mf, parent.frame() )
- if( !is.null( attr( mf.try, "na.action" ) ) ){
- ex.mf <- unname( unclass( attr( mf.try, "na.action" ) ) )
- } else ex.mf <- integer(0)
- ## if( verbose > 3 ) cat( "ex.mf :", ex.mf, "\n" )
+ ## eliminate intercept from locations
- ## spot missing observations for locations
+ locations <- as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() )
- locations <-
- as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() )
- mf.loc <- mf
- mf.loc$formula <- locations
- mf.loc.try <- eval( mf.loc, parent.frame() )
- if( !is.null( attr( mf.loc.try, "na.action" ) ) ){
- ex.mf.loc <- unname( unclass( attr( mf.loc.try, "na.action" ) ) )
- } else ex.mf.loc <- integer( 0 )
- ## if( verbose > 3 ) cat( "ex.mf.loc:", ex.mf.loc, "\n" )
+ ## setting-up terms objects
- ## missing observations either for fixed effects or locations
+ mt <- terms( formula )
+ mt.loc <- terms( locations )
+
+ ## ... and assign fixed effects terms object as attribute to model.frame
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/georob -r 5
More information about the Georob-commits
mailing list