From noreply at r-forge.r-project.org Tue May 14 17:58:21 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 14 May 2013 17:58:21 +0200 (CEST) Subject: [Georob-commits] r4 - in pkg: . R Message-ID: <20130514155821.C81D0184F7B@r-forge.r-project.org> Author: papritz Date: 2013-05-14 17:58:21 +0200 (Tue, 14 May 2013) New Revision: 4 Modified: pkg/ChangeLog pkg/DESCRIPTION pkg/NAMESPACE pkg/R/variogram.R Log: attaching instead of importing functions of package parallel correction error in handling optional arguments in function plot.sample.variogram M pkg/R/variogram.R M pkg/DESCRIPTION M pkg/ChangeLog M pkg/NAMESPACE Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2013-04-29 15:27:19 UTC (rev 3) +++ pkg/ChangeLog 2013-05-14 15:58:21 UTC (rev 4) @@ -49,3 +49,16 @@ * georob_example.R: correction of argument names in examples + +2013-05-12 Andreas Papritz + +* variogram.R (plot.sample.variogram): correction of handling optional arguments + + +2013-05-14 Andreas Papritz + +* DESCRIPTION: attaching instead of importing functionS of package parallel +* NAMESPACE: attaching instead of importing functionS of package parallel + + + Modified: pkg/DESCRIPTION =================================================================== --- pkg/DESCRIPTION 2013-04-29 15:27:19 UTC (rev 3) +++ pkg/DESCRIPTION 2013-05-14 15:58:21 UTC (rev 4) @@ -8,8 +8,8 @@ email = "andreas.papritz at env.ethz.ch" ), person( "Cornelia", "Schwierz", role = "ctb" )) Depends: R(>= 2.14.0), lmtest, nlme, - robustbase, sp(>= 0.9-60) -Imports: constrainedKriging(>= 0.1-9), nleqslv, parallel, quantreg, + robustbase, sp(>= 0.9-60), parallel +Imports: constrainedKriging(>= 0.1-9), nleqslv, quantreg, RandomFields(>= 2.0.55), spatialCovariance(>= 0.6-4) Suggests: geoR Description: The georob package provides functions for fitting linear models Modified: pkg/NAMESPACE =================================================================== --- pkg/NAMESPACE 2013-04-29 15:27:19 UTC (rev 3) +++ pkg/NAMESPACE 2013-05-14 15:58:21 UTC (rev 4) @@ -1,4 +1,4 @@ -import( parallel, stats ) +import( stats ) importFrom( constrainedKriging, covmodel, f.point.block.cov, K, preCKrige ) importFrom( lmtest, waldtest, waldtest.default ) Modified: pkg/R/variogram.R =================================================================== --- pkg/R/variogram.R 2013-04-29 15:27:19 UTC (rev 3) +++ pkg/R/variogram.R 2013-05-14 15:58:21 UTC (rev 4) @@ -367,10 +367,11 @@ ## 2012-12-12 A. Papritz ## 2012-12-21 AP correction for using col and pch + ## 2913-05-12 AP correction for using ... if( !add ) plot( gamma ~ lag.dist, x, type = "n", - xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab + xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, ... ) if( missing( col ) ){ From noreply at r-forge.r-project.org Thu May 23 17:41:57 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 23 May 2013 17:41:57 +0200 (CEST) Subject: [Georob-commits] r5 - in pkg: . R man Message-ID: <20130523154157.DD106184893@r-forge.r-project.org> Author: papritz Date: 2013-05-23 17:41:57 +0200 (Thu, 23 May 2013) New Revision: 5 Added: pkg/R/georob.cv.R Removed: pkg/R/georob.xvalid.R Modified: pkg/ChangeLog pkg/R/georob.S3methods.R pkg/R/georob.exported.functions.R pkg/R/georob.predict.R pkg/R/variogram.R pkg/man/S3methods.georob.Rd pkg/man/georob.Rd Log: korrekturen fuer korrektes handling von fehlwerten Modified: pkg/ChangeLog =================================================================== --- pkg/ChangeLog 2013-05-14 15:58:21 UTC (rev 4) +++ pkg/ChangeLog 2013-05-23 15:41:57 UTC (rev 5) @@ -1,6 +1,6 @@ 2012-12-18 Andreas Papritz -* georob.xvalid.R (print.cv.georob, print.summary.cv.georob): return invisible(x) +* georob.cv.R (print.cv.georob, print.summary.cv.georob): return invisible(x) * georob.S3methods.R (print.georob, print.summary.georob): return invisible(x) * variogram.R (print.summary.sample.variogram, print.fitted.variogram) (print.summary.fitted.variogram): return invisible(x) @@ -42,7 +42,7 @@ * georob.predict.R (predict.georob): new names for robustness weights * georob.private.functions.R (compute.covariances, update.betahat.bhat, estimate.betahat.bhat, compute.estimating equations, georob.fit): new names for robustness weights * georob.S3methods.R (ranef.georob, rstandard.georob, summary.georob, print.summary.georob): new names for robustness weights -* georob.xvalid.R (cv.georob): changes for parallelization on windows os +* georob.cv.R (cv.georob): changes for parallelization on windows os 2013-04-29 Andreas Papritz @@ -62,3 +62,14 @@ +2013-05-23 Andreas Papritz + +* georob.cv.R (cv.georob): correct handling of missing observations +* georob.exported.functions.R (georob): improved way to handle missing observations and to construct model.frame +* georob.predict.R (predict.georob): correct handling of missing observations +* georob.S3methods.R (georob.residuals): new argument "terms" +* georob.S3methods.R (ranef.georob, rstandard.georob,deviance.georob): correct handling of missing observations +* variogram.R (plot.georob): correct handling of missing observations + + + Modified: pkg/R/georob.S3methods.R =================================================================== --- pkg/R/georob.S3methods.R 2013-05-14 15:58:21 UTC (rev 4) +++ pkg/R/georob.S3methods.R 2013-05-23 15:41:57 UTC (rev 5) @@ -169,6 +169,7 @@ ## 2012-10-18 AP changes for new definition of eta ## 2012-11-26 AP method for random.effects ## 2013-04-23 AP new names for robustness weights + ## 2013-05-23 AP correct handling of missing observations object$Valpha.objects <- expand( object$Valpha.objects ) object$cov <- expand( object$cov ) @@ -247,7 +248,8 @@ } - return( bhat ) + bhat <- naresid( object$na.action, bhat ) + return( bhat ) } @@ -282,6 +284,7 @@ function( object, type = c("working", "response", "deviance", "pearson", "partial" ), + terms = NULL, level = 1, ... ) @@ -300,6 +303,7 @@ ## 2011-10-13 A. Papritz ## 2011-12-14 AP modified for replicated observations + ## 2013-05-23 AP modified for computing partial residuals for single terms type <- match.arg( type ) @@ -315,14 +319,15 @@ partial = r ) + res <- naresid(object$na.action, res) + if( level == 0 && any( type %in% c( "working", "response", "partial" ) ) ){ res <- res + ranef( object, standard = FALSE )[object$Tmat] } - res <- naresid(object$na.action, res) if( type == "partial" ) - res <- res + predict( object, type = "terms" )$fit - res + res <- res + predict( object, type = "terms", terms = terms )$fit + drop( res ) } @@ -347,6 +352,7 @@ ## 2012-01-05 AP modified for compress storage of matrices ## 2012-10-18 AP changes for new definition of eta ## 2013-04-23 AP new names for robustness weights + ## 2013-05-23 AP correct handling of missing observations object <- model object$Valpha.objects <- expand( object$Valpha.objects ) @@ -426,6 +432,8 @@ ## compute standardized residuals + se <- naresid( model$na.action, se ) + residuals( model, level = level ) / se } @@ -867,7 +875,14 @@ ## deviance method for class georob ## 2012-12-22 A. Papritz + ## 2013-05-23 AP correct handling of missing observations + ## redefine na.action component of object + + if( identical( class( object$na.action ), "exclude" ) ){ + class( object$na.action ) <- "omit" + } + if( object[["tuning.psi"]] < georob.control()[["tuning.psi.nr"]] ){ result <- NA_real_ } else { Added: pkg/R/georob.cv.R =================================================================== --- pkg/R/georob.cv.R (rev 0) +++ pkg/R/georob.cv.R 2013-05-23 15:41:57 UTC (rev 5) @@ -0,0 +1,929 @@ +## ############################################################################ + +cv <- function( object, ... ) UseMethod( "cv" ) + +## ############################################################################ + +cv.georob <- + function( + object, + formula = NULL, subset = NULL, + nset = 10, seed = NULL, sets = NULL, + duplicates.in.same.set = TRUE, + re.estimate = TRUE, param = object$param, + fit.param = object$initial.objects$fit.param, + return.fit = FALSE, reduced.output = TRUE, + lgn = FALSE, + ncores = min( nset, detectCores() ), + verbose = 0, + ... + ) +{ + + ## Function computes nset-fold cross-validation predictions from a + ## fitted georob object + + ## Arguments: + + ## object fitted georob object + ## formula a formula passed by update to georob + ## nset integer scalar for the number of cross-validation subsets + ## seed integer scalar passed to set.seed before selecting the + ## cross-valdation subsets by a call to runif() + ## sets an integer vector with length nrow(data) defining the + ## cross-validation sets and over-riding the values provided + ## for nset and seed + ## duplicates.in.same.set logical flag controlling whether replicated observations + ## at a given location are assigned to the same cross-validation set + ## re.estimate logical flag controlling whether the variogram parameters should + ## be re-estimated for each cross-validation subset + ## param initial values of variogram parameters when the variogram is + ## re-estimated for each cross-validation subset + ## return.fit logical flag to control whether the information about the fit are + ## should be returned for each cross-valdiation subset when re-estimating the + ## model + ## reduced.output logical flag controlling whether for each cross-valdiation subset the + ## the full fitted object or just a selection (information about convergence, + ## variogram and fixed-effects parameter estimates) should be returned when + ## re-estimating the model + ## lgn logical flag controlling whether lognormal kriging predictions should be computed + ## ncores integer scalar with the number of cores to used in parallel processing + ## verbose integer scalar, controlling verbosity of the information sent to standard output + ## ... further arguments passed by update to georob or to + ## mclapply on non-windows platforms + + ## ToDos: + + ## - Klasse und Methoden definieren fuer cv (kompatibel mit geoR) + + ## History: + + ## 2011-10-24 Korrektur Ausschluss von nichtbenoetigten Variablen fuer lognormal kriging + ## 2011-12-23 AP modified for replicated observations and for parallel computing + ## 2012-03-02 AP eliminated possibility for logging to file in parallel processing + ## 2012-03-19 AP correction of error in parallel processing on Windows + ## 2012-05-01 AP correct handling of NAs + ## 2012-05-04 AP modifications for lognormal block kriging + ## 2012-05-09 AP correction of error if a new formula is passed via update to georob + ## 2012-05-22 AP correction of error in passing param and fit.param to georob + ## 2012-06-05 AP correction of error in handling optional subset argument + ## 2012-11-04 AP handling compressed cov.betahat + ## 2012-12-04 AP modifiction for changes in predict.georob + ## 2013-04-24 AP changes for parallelization on windows os + ## 2013-05-23 AP correct handling of missing observations + + ## auxiliary function that fits the model and computes the predictions of + ## a cross-validation set + + f.aux <- function( + ..i.., object, formula, data, sets, re.estimate, param, fit.param, lgn, verbose, ... + ){ ## cv function + + if (verbose) cat( "\n\n processing cross-validation set", ..i.., "\n" ) + + ## fit model to complement of current set + + if( !re.estimate ){ + fit.param <- c( + variance = FALSE, snugget = FALSE, nugget = FALSE, scale = FALSE, + a = FALSE, alpha = FALSE, beta = FALSE, delta = FALSE, + gamma = FALSE, lambda = FALSE, n = FALSE, nu = FALSE, + f1 = FALSE, f2 =FALSE, omega = FALSE, phi = FALSE, zeta = FALSE + )[names( param )] + } + + ## change environment of terms and formula so that subset selection works for update + + environment( formula ) <- environment() + environment( object$terms ) <- environment() + + t.georob <- update( + object, + formula = formula, + data = data, + subset = -sets[[..i..]] , + param = param, + fit.param = fit.param, + verbose = verbose, + ... + ) + + if( verbose > 0 ){ + cat( "\n\n" ) + print( summary( t.georob ) ) + } + + ## compute predictions for current set + + t.predict <- predict( + t.georob, newdata = data[sets[[..i..]], ], type = "response", + mmax = length( sets[[..i..]] ), + extended.output = lgn, + ncores = 1 + ) + + ## backtransformation for log-normal kriging + + if( lgn ){ + t.predict <- lgnpp( t.predict ) + t.predict <- t.predict[, -match( + c( "trend", "var.pred", "cov.pred.target", "var.target" ), names( t.predict ) + )] + } + + t.predict <- data.frame( i = sets[[..i..]], t.predict ) + + t.ex <- c( + grep( "lower", colnames( t.predict ), fixed = TRUE ), + grep( "upper", colnames( t.predict ), fixed = TRUE ) + ) + + t.predict <- t.predict[, -t.ex] + + if( reduced.output ){ + + if( !is.null( t.georob$cov$cov.betahat ) ){ + t.se.coef <- sqrt( diag( expand( t.georob$cov$cov.betahat ) ) ) + } else { + t.se.coef <- NULL + } + + t.georob <- t.georob[c( + "tuning.psi", "converged", "convergence.code", + "gradient", "param", "aniso", + "coefficients" + )] + + t.georob$aniso <- t.georob$aniso$aniso + + if( !is.null( t.se.coef ) ) t.georob$se.coefficients <- t.se.coef + + } + + return( list( pred = t.predict, fit = t.georob ) ) + ## end cv function + } + + ## redefine na.action component of object + + if( identical( class( object$na.action ), "exclude" ) ){ + class( object$na.action ) <- "omit" + } + + ## update terms of object is formula is provided + + if( !is.null( formula ) ){ + formula <- update( formula( object ), formula ) + object$terms <- terms( formula ) + } else { + formula <- formula( object ) + } + + ## get data.frame with required variables (note that the data.frame passed + ## as data argument to georob must exist in GlobalEnv) + + data <- cbind( + get_all_vars( formula( object ), eval( getCall(object)$data ) ), + get_all_vars( object$locations.objects$locations, eval( getCall(object)$data ) ) + ) + + ## select subset if appropriate + + if( !is.null( subset ) ){ + data <- data[subset, ] + object$Tmat <- object$Tmat[subset] + } else if( !is.null( getCall(object)$subset ) ){ + data <- data[eval( getCall(object)$subset ), ] + } + +# if( !is.null( getCall(object)$subset ) ) + + ## define cross-validation sets + + if( is.null( sets ) ){ + + if( !is.null( seed ) ) set.seed( seed ) + sets <- runif( NROW( data ) ) + sets <- cut( + sets, + breaks = c( -0.1, quantile( sets, probs = ( 1:(nset-1)/nset ) ), 1.1 ) + ) + sets <- factor( as.integer( sets ) ) + + } else { + + if( length( sets ) != NROW( data ) ) stop( + "sets must be an integer vector with length equal to the number of observations" + ) + + } + + if( duplicates.in.same.set ){ + dups <- duplicated( object$Tmat ) + idups <- match( object$Tmat[dups], object$Tmat[!dups] ) + sets[dups] <- (sets[!dups])[idups] + } + + sets <- tapply( + 1:NROW( data ), + sets, + function( x ) x + ) + + ## loop over all cross-validation sets + + if( .Platform$OS.type == "windows" ){ + + ## create a SNOW cluster on windows OS + + cl <- makePSOCKcluster( ncores, outfile = "") + + ## export required items to workers + + junk <- clusterEvalQ( cl, require( georob, quietly = TRUE ) ) + + t.result <- parLapply( + cl, + 1:length( sets ), + f.aux, + object = object, + formula = formula, + data = data, + sets = sets, + re.estimate = re.estimate, + param = param, + fit.param = fit.param, + lgn = lgn, + verbose = verbose, + ... + ) + + stopCluster(cl) + + } else { + + ## fork child processes on non-windows OS + + t.result <- mclapply( + 1:length( sets ), + f.aux, + object = object, + formula = formula, + data = data, + sets = sets, + re.estimate = re.estimate, + param = param, + fit.param = fit.param, + lgn = lgn, + verbose = verbose, + mc.cores = ncores, + mc.allow.recursive = FALSE, + ... + ) + + } + + ## create single data frame with cross-validation results + + result <- t.result[[1]]$pred + result$subset <- rep( 1, nrow( t.result[[1]]$pred ) ) + + for( t.i in 2:length( t.result ) ) { + result <- rbind( + result, + data.frame( + t.result[[t.i]]$pred, + subset = rep( t.i, nrow( t.result[[t.i]]$pred ) ) + ) + ) + } + t.ix <- sort( result$i, index.return = T )$ix + result <- result[t.ix, ] + result$data <- model.response( + model.frame( formula( object), data, na.action = na.pass ) + ) + + if( lgn ) result$lgn.data <- exp( result$data ) + + result <- result[, -match("i", colnames( result) )] + + isubset <- match( "subset", colnames( result ) ) + idata <- grep( "data", colnames( result ), fixed = TRUE ) + ipred<- grep( "pred", colnames( result ), fixed = TRUE ) + ise <- grep( "se", colnames( result ), fixed = TRUE ) + ise <- ise[ise != isubset] + + result <- cbind( + result[, -c(isubset, idata, ipred, ise)], + result[, c(isubset, idata, ipred, ise)] + ) + + t.fit <- lapply( t.result, function( x ) return( x$fit ) ) + + if( re.estimate && !all( sapply( t.fit, function(x) x$converged ) ) ) + warning( + "lack of covergence when fitting model to cross-validation sets" + ) + + result <- list( + pred = result, + fit = if( return.fit ) t.fit else NULL + ) + + class( result ) <- "cv.georob" + + invisible( result ) + +} + +## ########################################################################### + +plot.cv.georob <- + function( + x, type = c( "sc", "lgn.sc", "ta", "qq", "pit", "mc", "bs" ), + ncutoff = NULL, + add = FALSE, + col, pch, lty, + main, xlab, ylab, + ... + ) +{ + + ## plot method for class "cv.georob" + + ## 2011-12-21 A. Papritz + + x <- x$pred + + type = match.arg( type ) + + if( type == "sc.lgn" && !"lgn.pred" %in% names( x ) ) stop( + "lognormal kriging results missing, use 'lgn = TRUE' for cross-validation" + ) + + if( type %in% c( "pit", "mc", "bs" ) ){ + + result <- validate.predictions( + data = x$data, + pred = x$pred, + se.pred = x$se, + statistic = type, ncutoff = ncutoff + ) + + } + + if( missing( col ) ) col <- 1 + if( missing( pch ) ) pch <- 1 + if( missing( lty ) ) lty <- 1 + + + + switch( + type, + sc = { + + ## scatterplot of (transformed) measurements vs. predictions + + if( missing( main ) ) main <- "data vs. predictions" + if( missing( xlab ) ) xlab <- "predictions" + if( missing( ylab ) ) ylab <- "data" + + if( add ){ + points( data ~ pred, x, col = col, pch = pch, ... ) + } else { + plot( + data ~ pred, x, col = col, pch = pch, + main = main, xlab = xlab, ylab = ylab, ... + ) + } + + + }, + lgn.sc = { + + ## scatterplot of original measurements vs. back-transformded + ## lognormal predictions + + if( missing( main ) ) main <- "data vs. back-transformed predictions" + if( missing( xlab ) ) xlab <- "back-transformed predictions" + if( missing( ylab ) ) ylab <- "data" + + if( add ){ + points( lgn.data ~ lgn.pred, x, col = col, pch = pch, ... ) + } else { + plot( + lgn.data ~ lgn.pred, x, col = col, pch = pch, + main = main, xlab = xlab, ylab = ylab, ... + ) + } + + + + }, + ta = { + + ## Tukey-Anscombe plot + + if( missing( main ) ) main <- "Tukey-Anscombe plot" + if( missing( xlab ) ) xlab <- "predictions" + if( missing( ylab ) ) ylab <- "standardized prediction errors" + + if( add ){ + points( I((data-pred)/se) ~ pred, x, col = col, pch = pch, ... ) + } else { + plot( + I((data-pred)/se) ~ pred, x, col = col, pch = pch, + main = main, xlab = xlab, ylab = ylab, ... + ) + } + + }, + qq = { + + ## normal QQ-Plot of standardized prediction errors + + if( missing( main ) ) main <- "normal-QQ-plot of standardized prediction errors" + if( missing( xlab ) ) xlab <- "quantile N(0,1)" + if( missing( ylab ) ) ylab <- "quantiles of standardized prediction errors" + + r.qq <- with( x, qqnorm( ( data - pred ) / se, plot.it = FALSE ) ) + + if( add ){ + points( r.qq, col = col, pch = pch, ... ) + } else { + plot( r.qq, col = col, pch = pch, main = main, xlab = xlab, ylab = ylab, ... ) + } + }, + pit = { + + ## histogramm of probability-integral-transformation + + if( missing( main ) ) main <- "histogramm PIT-values" + if( missing( xlab ) ) xlab <- "PIT" + if( missing( ylab ) ) ylab <- "density" + + r.hist <- hist( + result, + col = col, lty = lty, + main = main, xlab = xlab, ylab = ylab, freq = FALSE, ... ) + }, + mc = { + + ## narginal calibration plots: ecdf of measurements and mean + ## predictive cdf + + if( missing( main ) ) main <- "empirical cdf of data and mean predictive cdfs" + if( missing( xlab ) ) xlab <- "data or predicitons" + if( missing( ylab ) ) ylab <- "probability" + + matplot( + result$y, + result[, c( "ghat", "fbar" )], type = "l", + col = c( "black", "red" ), + lty = c( "solid", "dashed" ), + main = main, xlab = xlab, ylab = ylab, + ... + ) + + t.usr <- par( "usr" ) + t.usr[3:4] <- with( result, range( fbar - ghat ) ) *1.04 + par( usr = t.usr ) + with( result, lines( y, fbar-ghat, col= "blue", lty = "dotted" ) ) + axis(2, pos = t.usr[2], col.axis = "blue", col.ticks = "blue" ) + legend( + "topleft", + lty = c("solid", "dashed", "dotted" ), + col = c( "black", "red", "blue" ), bty = "n", cex = 1, + legend = c( + expression( paste( "empirical cdf ", hat(G) ) ), + expression( paste( "mean predictive cdf ", bar(F) ) ), + expression( bar(F)-hat(G) ) + ) + ) + }, + bs ={ + + # plot of brier score vs. cutoff + + if( missing( main ) ) main <- "Brier score vs. cutoff" + if( missing( xlab ) ) xlab <- "cutoff" + if( missing( ylab ) ) ylab <- "Brier score" + + if( add ){ + lines( result$y, result$bs, col = col, lty = lty, ... ) + } else { + plot( result$y, result$bs, type = "l", col = col, lty = lty, + main = main, xlab = xlab, ylab = ylab, ... + ) + } + } + ) + invisible( NULL ) +} + + + +## ########################################################################### + +print.cv.georob <- + function( + x, digits = max(3, getOption("digits") - 3), ... + ) +{ ## print method for class "cv.georob" + + ## 2011-10-13 A. Papritz + ## 2012-12-18 AP invisible(x) + + x <- x$pred + + st <- validate.predictions( + data = x$data, + pred = x$pred, + se.pred = x$se, + statistic = "st", + ... + ) + + print( + format( st, digits = digits ), print.gap = 2, + quote = FALSE + ) + + invisible( x ) +} + + +## ########################################################################### + +summary.cv.georob <- + function( object, se = FALSE, ... ) +{ + + ## summary method for class "cv.georob" + + ## function computes statistics of the cross-validation errors + + ## 2011-10-13 A. Papritz + ## 2012-05-21 ap + + + object <- object$pred + + bs <- validate.predictions( + data = object$data, + pred = object$pred, + se.pred = object$se, + ncutoff = length( object$data ), + statistic = "bs" + ) + + t.d <- diff( bs$y ) + crps <- sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) ) + + st <- validate.predictions( + data = object$data, + pred = object$pred, + se.pred = object$se, + statistic = "st" + ) + + if( !is.null( object$lgn.pred ) ){ + st.lgn <- validate.predictions( + data = object$lgn.data, + pred = object$lgn.pred, + se.pred = object$lgn.se, + statistic = "st" + ) + } else { + st.lgn <- NULL + } + + ## collect results + + result <- list( st = st, crps = crps, st.lgn = st.lgn ) + + ## compute standard errors of criteria across cross-validation sets + + if( se && !is.null( object$subset ) ){ + + criteria <- t( sapply( + tapply( + 1:nrow( object ), + factor( object$subset ), + function( i, data, pred, se.pred, lgn.data, lgn.pred, lgn.se.pred ){ + + bs <- validate.predictions( + data = data[i], + pred = pred[i], + se.pred = se.pred[i], + ncutoff = length( data[i] ), + statistic = "bs" + ) + + t.d <- diff( bs$y ) + crps <- c( crps = sum( bs$bs * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) ) ) + + st <- validate.predictions( + data = data[i], + pred = pred[i], + se.pred = se.pred[i], + statistic = "st" + ) + + if( !is.null( lgn.pred ) ){ + st.lgn <- validate.predictions( + data = lgn.data[i], + pred = lgn.pred[i], + se.pred = lgn.se.pred[i], + statistic = "st" + ) + names( st.lgn ) <- paste( names( st.lgn ), "lgn", sep = "." ) + } else { + st.lgn <- NULL + } + + + return( c( st, st.lgn, crps ) ) + + }, + data = object$data, + pred = object$pred, + se.pred = object$se, + lgn.data = object$lgn.data, + lgn.pred = object$lgn.pred, + lgn.se.pred = object$lgn.se + ), + function( x ) x + )) + + se.criteria <- apply( + criteria, 2, + function( x ) sd( x ) / sqrt( length( x ) ) + ) + + result$se.st <- se.criteria[c( "me", "mede", "rmse", "made", "qne", "msse", "medsse")] + result$se.crps <- se.criteria["crps"] + if( !is.null( st.lgn ) ){ + result$se.st.lgn <- se.criteria[ + c( "me.lgn", "mede.lgn", "rmse.lgn", "made.lgn", "qne.lgn", "msse.lgn", "medsse.lgn") + ] + names( result$se.st.lgn ) <- gsub( ".lgn", "", names( result$se.st.lgn ) ) + } + + } + + class( result ) <- "summary.cv.georob" + + + return( result ) + +} + +## ########################################################################### + +print.summary.cv.georob <- + function( + x, digits = max(3, getOption("digits") - 3), ... + ) +{ + + ## print method for class "summary.cv.georob" + + ## 2011-12-20 A. Papritz + ## 2012-05-21 ap + ## 2012-12-18 AP invisible(x) + + + result <- c( x$st, crps = x$crps ) + if( !is.null( x$se.st ) ){ + result <- rbind( result, c( x$se.st, crps = x$se.crps ) ) + rownames( result ) <- c( "", "se" ) + } + + cat( "\nStatistics of cross-validation prediction errors\n" ) + print( + format( result, digits = digits ), print.gap = 2, + quote = FALSE + ) + + if( !is.null( x$st.lgn ) ){ + result <- x$st.lgn + if( !is.null( x$se.st.lgn ) ){ + result <- rbind( x$st.lgn, x$se.st.lgn ) + rownames( result ) <- c( "", "se" ) + } + + cat( "\nStatistics of back-transformed cross-validation prediction errors\n" ) + print( + format( result, digits = digits ), print.gap = 2, + quote = FALSE + ) + } + + invisible( x ) + +} + +## ########################################################################### + +rstudent.cv.georob <- + function( model, ... ) +{ + + ## Function extracts studentized residuals from cv.georob object + + ## Arguments: + + ## model cv.georob object + ## ... further arguments (currently not used) + + ## 2011-10-13 A. Papritz + + if( !identical( class( model )[1], "cv.georob" ) ) stop( + "model is not of class 'cv.georob'" + ) + + model <- model$pred + + ( model$data - model$pred ) / model$se + +} + +# ## ########################################################################### +# +# cv.variomodel <- +# function( object, geodata, ... ) +# { +# +# ## Wrapper function for cross-validation of object of class variomodel{geoR} +# ## by function xvalid{geoR} +# +# ## Arguments: +# +# ## model an object of class "variomodel{geoR} +# ## ... further arguements passed to xvalid{geoR), cf. respective help page +# +# ## 2012-11-22 A. Papritz +# +# call.fc <- match.call() +# +# res <- geoR::xvalid( model = object, ... ) +# +# if( !is.null( attr( res, "geodata.xvalid" ) ) ){ +# attr( res, "geodata.xvalid" ) <- call.fc$geodata +# } +# if( !is.null( attr( res, "locations.xvalid" ) ) ){ +# attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid +# } +# +# return(res) +# +# } +# +# cv.likGRF <- +# function( object, geodata, ... ) +# { +# +# ## Wrapper function for cross-validation of object of class variomodel{geoR} +# ## by function xvalid{geoR} +# +# ## Arguments: +# +# ## model an object of class "likGRF{geoR} +# ## ... further arguements passed to xvalid{geoR), cf. respective help page +# +# ## 2012-11-22 A. Papritz +# +# call.fc <- match.call() +# +# res <- geoR::xvalid( model = object, geodata = geodata, ... ) +# +# if( !is.null( attr( res, "geodata.xvalid" ) ) ){ +# attr( res, "geodata.xvalid" ) <- call.fc$geodata +# } +# if( !is.null( attr( res, "locations.xvalid" ) ) ){ +# attr( res, "locations.xvalid" ) <- call.fc$locations.xvalid +# } +# +# return(res) +# +# } + +## ====================================================================== +validate.predictions <- + function( + data, + pred, + se.pred, + statistic = c( "pit", "mc", "bs", "st" ), + ncutoff = NULL + ) +{ + + ## function computes several statistics to validate probabilistic + ## predictions, cf. Gneiting et al., 2007, JRSSB + + ## Arguments: + + ## data numeric vector with data + ## pred numeric vector with predictions + ## se.pred numeric vector with prediction standard errors + ## statistic character scalar specifying the statistic to compute + ## possible values are: + ## "pit" probability integral transformation + ## "mc" empirical cdf of data and mean predictive + ## normal distribution + ## "bs" Brier score + ## "st" basic statistics such as mean error, mean + ## mean squared error, mean squared standardized error + ## and robust equivalents + ## ncutoff number of quantiles (statistic == "mc") or number of cutoffs (statistic = "bs") + + # 2011-20-21 A. Papritz + # 2012-05-04 AP coping with NAs + + statistic = match.arg( statistic ) + + ## exclude item with NAs + + t.sel <- complete.cases( data, pred, se.pred ) + + if( sum( t.sel ) < length( t.sel ) ) warnings( + "missing values encountered when validating predictions" + ) + + data <- data[t.sel] + pred <- pred[t.sel] + se.pred <- se.pred[t.sel] + + if( missing( ncutoff ) || is.null( ncutoff ) ) ncutoff <- min( 500, length( data ) ) + + result <- switch( + statistic, + pit = { + + ## probability integral transformation + + pnorm( data, mean = pred, sd = se.pred ) + }, + mc = , + bs = { + + ## marginal calibration and brier score + + margin.calib <- data.frame( + y = t.x <- unique( t.y <- sort( c( data ) ) ), + ghat = cumsum( tabulate( match( t.y, t.x ) ) ) / length(t.y) + ) + t.sel <- trunc( + seq( + from = as.integer(1), + to = nrow( margin.calib ), + length.out = min( nrow( margin.calib ), ncutoff ) + ) + ) + margin.calib <- margin.calib[t.sel,] + + t.bla <- t( + sapply( + margin.calib$y, + function( q, m, s, y ){ + t.p <- pnorm( q, mean = m, sd = s ) + c( + fbar = mean( t.p ), + bs = mean( ( t.p - as.numeric( y <= q ) )^2 ) + ) + }, + m = pred, + s = se.pred, + y = data + ) + ) + cbind( + margin.calib, as.data.frame( t.bla ) + ) + }, + st = { + + ## statistics of (standardized) prediction errors + + error <- data - pred + std.error <- error / se.pred + + statistics <- c( + me = mean( error ), + mede = median( error ), + rmse = sqrt( mean( error^2 ) ), + made = mad( error, center = 0 ), + qne = Qn( error, finite.corr = TRUE ), + msse = mean( std.error^2 ), + medsse = median( std.error^2 ) + ) + + } + ) + + return( result ) + +} + Property changes on: pkg/R/georob.cv.R ___________________________________________________________________ Added: svn:executable + * Modified: pkg/R/georob.exported.functions.R =================================================================== --- pkg/R/georob.exported.functions.R 2013-05-14 15:58:21 UTC (rev 4) +++ pkg/R/georob.exported.functions.R 2013-05-23 15:41:57 UTC (rev 5) @@ -75,6 +75,7 @@ ## 2012-05-07 AP correction of error for constant trend ## 2012-05-28 AP handle missing names of coefficients after calling update ## 2013-04-23 AP new names for robustness weights + ## 2013-05-23 AP correct handling of missing observations and to construct model.frame ## check whether input is complete @@ -87,13 +88,23 @@ ret.x <- x ret.y <- y - ## vector with row number of included observations +# ## vector with row number of included observations +# +# in.subset <- 1:NROW( data ) +# if( !missing( subset ) ) in.subset <- in.subset[subset] - in.subset <- 1:NROW( data ) - if( !missing( subset ) ) in.subset <- in.subset[subset] + ## build combined formula for fixed effects and locations - ## start setting-up model frames + extended.formula <- update( + formula, + paste( + paste( as.character( formula )[c(2, 1, 3)], collapse = " " ), + as.character( locations )[2], sep = " + " + ) + ) + ## setting-up model frame + cl <- match.call() mf <- match.call( expand.dots = FALSE ) m <- match( @@ -101,48 +112,25 @@ names(mf), 0L ) mf <- mf[c(1L, m)] + mf$formula <- extended.formula mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name( "model.frame" ) - ## spot missing observations for fixed effects + mf <- eval( mf, parent.frame() ) - mf.try <- eval( mf, parent.frame() ) - if( !is.null( attr( mf.try, "na.action" ) ) ){ - ex.mf <- unname( unclass( attr( mf.try, "na.action" ) ) ) - } else ex.mf <- integer(0) - ## if( verbose > 3 ) cat( "ex.mf :", ex.mf, "\n" ) + ## eliminate intercept from locations - ## spot missing observations for locations + locations <- as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() ) - locations <- - as.formula( paste( deparse( locations ), "-1" ), env = parent.frame() ) - mf.loc <- mf - mf.loc$formula <- locations - mf.loc.try <- eval( mf.loc, parent.frame() ) - if( !is.null( attr( mf.loc.try, "na.action" ) ) ){ - ex.mf.loc <- unname( unclass( attr( mf.loc.try, "na.action" ) ) ) - } else ex.mf.loc <- integer( 0 ) - ## if( verbose > 3 ) cat( "ex.mf.loc:", ex.mf.loc, "\n" ) + ## setting-up terms objects - ## missing observations either for fixed effects or locations + mt <- terms( formula ) + mt.loc <- terms( locations ) + + ## ... and assign fixed effects terms object as attribute to model.frame [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/georob -r 5