[Analogue-commits] r219 - in pkg: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri May 27 11:22:39 CEST 2011
Author: gsimpson
Date: 2011-05-27 11:22:39 +0200 (Fri, 27 May 2011)
New Revision: 219
Added:
pkg/R/crossval.R
pkg/man/crossval.Rd
Modified:
pkg/DESCRIPTION
pkg/inst/ChangeLog
Log:
adds crossval()
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2011-05-24 09:03:53 UTC (rev 218)
+++ pkg/DESCRIPTION 2011-05-27 09:22:39 UTC (rev 219)
@@ -1,7 +1,7 @@
Package: analogue
Type: Package
Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.7-0
+Version: 0.7-1
Date: $Date$
Depends: R (>= 2.10.0), stats, graphics, vegan, lattice, grid, MASS, princurve
Author: Gavin L. Simpson, Jari Oksanen
Added: pkg/R/crossval.R
===================================================================
--- pkg/R/crossval.R (rev 0)
+++ pkg/R/crossval.R 2011-05-27 09:22:39 UTC (rev 219)
@@ -0,0 +1,151 @@
+## crossval() - crossvalidation methods for transfer functions
+
+`crossval` <- function(obj, ...) {
+ UseMethod("crossval")
+}
+
+## crossval method for wa()
+`crossval.wa` <- function(obj, method = c("LOO","kfold","bootstrap"),
+ nboot = 100, nfold = 10, folds = 5,
+ verbose = getOption("verbose"), ...) {
+ method <- match.arg(method)
+ X <- obj$orig.x
+ ENV <- obj$orig.env
+ N <- NROW(X)
+ M <- NCOL(X)
+ tolOpts <- obj$options.tol
+ Dtype <- obj$deshrink
+ if(identical(method, "LOO")) {
+ pred <- numeric(N)
+ nr <- N-1 ## number of rows - 1 for LOO
+ if(verbose) {
+ writeLines("\n LOO Cross-validation:")
+ pb <- txtProgressBar(min = 0, max = nr, style = 3)
+ on.exit(close(pb))
+ on.exit(cat("\n"), add = TRUE)
+ }
+ for(i in seq_along(pred)) {
+ if(verbose)
+ setTxtProgressBar(pb, i)
+ opt <- w.avg(X[-i, ], ENV[-i])
+ if(obj$tol.dw)
+ pred[i] <- predWAT(X, ENV, i, opt, tolOpts, nr, M,
+ Dtype)
+ else
+ pred[i] <- predWA(X, ENV, i, opt, Dtype)
+ }
+ }
+ if(identical(method, "kfold")) {
+ oob.pred <- matrix(NA, ncol = folds, nrow = N)
+ if(verbose) {
+ writeLines("\n n k-fold Cross-validation:")
+ pb <- txtProgressBar(min = 0, max = folds, style = 3)
+ on.exit(close(pb))
+ on.exit(cat("\n"), add = TRUE)
+ }
+ ind <- rep(seq_len(nfold), length = N) ## k-fold group indicator
+ ## n k-fold s
+ for(i in seq_len(folds)) {
+ if(verbose)
+ setTxtProgressBar(pb, i)
+ ## do a k-fold CV
+ pind <- ind[.Internal(sample(N, N, TRUE, NULL))]
+ for(k in seq_len(nfold)) {
+ sel <- pind == k ## sel is samples in leave out group
+ N.oob <- sum(sel) ## N in leave out group
+ N.mod <- sum(!sel) ## N in the model
+ sel <- which(sel) ## convert to indices
+ opt <- w.avg(X[-sel, , drop = FALSE], ENV[-sel])
+ if(obj$tol.dw) {
+ oob.pred[sel, i] <- predWAT(X, ENV, sel, opt, tolOpts,
+ N.mod, M, Dtype)
+ } else {
+ oob.pred[sel, i] <- predWA(X, ENV, sel, opt, Dtype)
+ }
+ }
+ }
+ pred <- rowMeans(oob.pred, na.rm = TRUE)
+ }
+ if(identical(method, "bootstrap")) {
+ oob.pred <- matrix(NA, ncol = nboot, nrow = N)
+ if(verbose) {
+ writeLines("\n Bootstrap Cross-validation:")
+ pb <- txtProgressBar(min = 0, max = nboot, style = 3)
+ on.exit(close(pb))
+ on.exit(cat("\n"), add = TRUE)
+ }
+ ind <- seq_len(N) ## indicator for samples
+ for(i in seq_len(nboot)) {
+ if(verbose)
+ setTxtProgressBar(pb, i)
+ bSamp <- .Internal(sample(N, N, TRUE, NULL))
+ sel <- which(!ind %in% bSamp) ## need indices!!!
+ N.oob <- NROW(X[sel, , drop = FALSE])
+ N.mod <- N - N.oob
+ opt <- w.avg(X[-sel, , drop = FALSE], ENV[-sel])
+ if(obj$tol.dw)
+ oob.pred[sel, i] <- predWAT(X, ENV, sel, opt, tolOpts,
+ N.mod, M, Dtype)
+ else
+ oob.pred[sel, i] <- predWA(X, ENV, sel, opt, Dtype)
+ }
+ pred <- rowMeans(oob.pred, na.rm = TRUE)
+ }
+ resid <- ENV - pred
+ out <- list(fitted.values = pred, residuals = resid)
+ performance <- data.frame(R2 = cor(pred, ENV)^2,
+ avgBias = mean(resid),
+ maxBias = unname(maxBias(resid, ENV)),
+ RMSEP = sqrt(mean(resid^2)),
+ RMSEP2 = NA,
+ s1 = NA,
+ s2 = NA)
+ if(identical(method, "bootstrap") ||
+ (identical(method, "kfold") && folds > 1)) {
+ performance$s1 <- sqrt(mean(apply(oob.pred, 1, sd, na.rm = TRUE)^2))
+ performance$s2 <- sqrt(mean(resid^2))
+ performance$RMSEP2 <- sqrt(performance$s1^2 + performance$s2^2)
+ }
+ out$performance <- performance
+ .call <- match.call()
+ .call[[1]] <- as.name("crossval")
+ out$call <- .call
+ out$CVparams <- list(method = method, nboot = nboot, nfold = nfold,
+ folds = folds)
+ class(out) <- "crossval"
+ out
+}
+
+`predWAT` <- function(X, ENV, i, optima, tolOpts, nr, nc,
+ deSh) {
+ tol <- w.tol(X[-i, ], ENV[-i], optima, tolOpts$useN2)
+ tol <- fixUpTol(tol, tolOpts$na.tol, tolOpts$small.tol,
+ tolOpts$min.tol, tolOpts$f, ENV[-i])
+ wa.env <- WATpred(X[-i, ], optima, tol, nr, nc)
+ p <- WATpred(X[i, , drop = FALSE], optima, tol, 1, nc)
+ deMod <- deshrink(ENV[-i], wa.env, deSh)
+ deshrinkPred(p, coef(deMod), deSh)
+}
+
+`predWA` <- function(X, ENV, i, optima, deSh) {
+ wa.env <- WApred(X[-i, ], optima)
+ p <- WApred(X[i, , drop = FALSE], optima)
+ deMod <- deshrink(ENV[-i], wa.env, deSh)
+ deshrinkPred(p, coef(deMod), deSh)
+}
+
+`print.crossval` <- function(x, digits = min(getOption("digits"), 5), ...) {
+ writeLines(strwrap("Model Cross-validation:", prefix = "\t"), sep = "\n\n")
+ print(x$call)
+ cat("\n")
+ method <- x$CVparams$method
+ writeLines(strwrap(paste(" Method:", method)))
+ if(identical(method, "bootstrap"))
+ writeLines(strwrap(paste(" No. Bootstraps:", x$CVparams$nboot)))
+ if(identical(method, "kfold")) {
+ writeLines(strwrap(paste("k:", x$CVparams$nfold)))
+ writeLines(strwrap(paste("No. of folds:", x$CVparams$folds)))
+ }
+ cat("\n")
+ print(x$performance, digits = digits, ...)
+}
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2011-05-24 09:03:53 UTC (rev 218)
+++ pkg/inst/ChangeLog 2011-05-27 09:22:39 UTC (rev 219)
@@ -1,5 +1,11 @@
analogue Change Log
+Version 0.7-1
+
+ * crossval: new function to perform leave-one-out, k-fold,
+ n k-fold, and bootstrap cross-validation on transfer function
+ models. A method for wa() models is provided.
+
Version 0.7-0
* timetrack: new function to passively project sediment core
Added: pkg/man/crossval.Rd
===================================================================
--- pkg/man/crossval.Rd (rev 0)
+++ pkg/man/crossval.Rd 2011-05-27 09:22:39 UTC (rev 219)
@@ -0,0 +1,85 @@
+\name{crossval}
+\alias{crossval}
+\alias{crossval.wa}
+\alias{print.crossval}
+\alias{predWA}
+\alias{predWAT}
+\title{Cross-validation of palaeoecological transfer function models}
+\description{
+ Performs leave-one-out, \emph{k}-fold, \emph{n} \emph{k}-fold and
+ bootstrap cross-validation of palaeoecological transfer function models.
+}
+\usage{
+crossval(obj, ...)
+
+\method{crossval}{wa}(obj, method = c("LOO","kfold","bootstrap"),
+ nboot = 100, nfold = 10, folds = 5,
+ verbose = getOption("verbose"), ...)
+
+}
+\arguments{
+ \item{obj}{A fitted transfer function model. Currently, only objects
+ of class \code{"wa"} are supported.}
+ \item{method}{character; type of cross-validation.}
+ \item{nboot}{numeric; number of bootstrap samples.}
+ \item{nfold}{numeric; number of chunks into which the training data
+ are split. The \emph{k} in \emph{k}-fold.}
+ \item{folds}{numeric; the number of times \emph{k}-fold CV is
+ performed.}
+ \item{verbose}{logical; should progress of the CV be displayed?}
+ \item{\dots}{Arguments passed to other methods.}
+}
+%\details{
+%
+%}
+\value{
+ Returns an object of class \code{"crossval"}, a list with the
+ following components:
+
+ \item{fitted.values}{numeric vector; the cross-validated estimates of
+ the response.}
+ \item{residuals}{numeric vector; residuals computed from the
+ cross-validated estimates of the response.}
+ \item{performance}{data frame; cross-validation performance statistics
+ for the model.}
+ \item{CVparams}{list; parameters holding details of the
+ cross-validation process.}
+ \item{call}{the matched call.}
+}
+%\references{TO DO}
+\author{Gavin L. Simpson}
+%\note{
+%}
+\seealso{\code{\link{wa}}}
+\examples{
+## Load the Imbrie & Kipp data and
+## summer sea-surface temperatures
+data(ImbrieKipp)
+data(SumSST)
+
+## fit the WA model
+mod <- wa(SumSST ~., data = ImbrieKipp)
+mod
+
+## Leave one out CV
+cv.loo <- crossval(mod)
+cv.loo
+
+## k-fold CV (k == 10)
+cv.kfold <- crossval(mod, kfold = 10, folds = 1, method = "kfold")
+cv.kfold
+
+## n k-fold CV (k == 10, n = 10)
+cv.nkfold <- crossval(mod, kfold = 10, folds = 10, method = "kfold")
+cv.nkfold
+
+## bootstrap with 250 bootstrap samples
+cv.boot <- crossval(mod, method = "bootstrap", nboot = 250)
+cv.boot
+
+## extract fitted values and residuals
+fitted(cv.boot)
+resid(cv.boot)
+
+}
+\keyword{methods}
More information about the Analogue-commits
mailing list