[Analogue-commits] r223 - in pkg: . R inst man tests/Examples
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon May 30 23:26:07 CEST 2011
Author: gsimpson
Date: 2011-05-30 23:26:07 +0200 (Mon, 30 May 2011)
New Revision: 223
Added:
pkg/R/pcr.R
pkg/man/pcr.Rd
Modified:
pkg/DESCRIPTION
pkg/inst/ChangeLog
pkg/tests/Examples/analogue-Ex.Rout.save
Log:
add pcr() for principal components regression, bump to 0.7-2
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2011-05-27 09:44:50 UTC (rev 222)
+++ pkg/DESCRIPTION 2011-05-30 21:26:07 UTC (rev 223)
@@ -1,7 +1,7 @@
Package: analogue
Type: Package
Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.7-1
+Version: 0.7-2
Date: $Date$
Depends: R (>= 2.10.0), stats, graphics, vegan, lattice, grid, MASS, princurve
Author: Gavin L. Simpson, Jari Oksanen
Added: pkg/R/pcr.R
===================================================================
--- pkg/R/pcr.R (rev 0)
+++ pkg/R/pcr.R 2011-05-30 21:26:07 UTC (rev 223)
@@ -0,0 +1,173 @@
+## Principal Components Regression Models
+
+## generic
+`pcr` <- function(x, ...) {
+ UseMethod("pcr")
+}
+
+## default
+`pcr.default` <- function(x, y, ncomp, tranFun, ...) {
+ ## convert to matrices for speed
+ x <- data.matrix(x)
+
+ ## dimensions
+ Nx <- NROW(x)
+ Ny <- length(y)
+ Mx <- NCOL(x) ## number of predictor vars
+
+ ## Apply a transformation if specified
+ if(!missing(tranFun)) {
+ FUN <- match.fun(tranFun)
+ x <- tranFun(x)
+ tranParms <- attr(x, "parms")
+ } else {
+ FUN <- NA
+ }
+
+ ## centre x and y
+ xMeans <- colMeans(x)
+ yMean <- mean(y)
+ x <- sweep(x, 2, xMeans, "-")
+ y <- y - yMean
+
+ ## How many components?
+ ncomp <- if(missing(ncomp)) {
+ min(Nx - 1, Mx)
+ } else {
+ if(ncomp < 1 || ncomp > (newcomp <- min(Nx - 1, Mx)))
+ warning("Invalid 'ncomp'. Resetting to max possible.")
+ newcomp
+ }
+ S <- seq_len(ncomp)
+
+ ## Storage objects:
+ ## B model coefficients
+ ## fitted.values
+ B <- matrix(0, nrow = Mx, ncol = ncomp)
+ fitted.values <- matrix(0, nrow = Nx, ncol = ncomp)
+
+ ## SVD
+ SVD <- La.svd(x)
+ D <- SVD$d[S]
+ TT <- SVD$u[, S, drop = FALSE] %*% diag(D, nrow = ncomp)
+ P <- t(SVD$vt[S, , drop = FALSE])
+ tQ <- crossprod(TT, y) / (varExpl <- D^2)
+
+ ## compute coefficients
+ for(b in S) {
+ bS <- seq_len(b)
+ B[, b] <- P[, bS, drop = FALSE] %*% tQ[bS, ]
+ fitted.values[, b] <- TT[, bS, drop = FALSE] %*% tQ[bS, ]
+ }
+
+ ## other model output
+ residuals <- y - fitted.values
+ fitted.values <- sweep(fitted.values, 2, yMean, "+")
+
+ ## model performance
+ Y <- y + yMean
+ performance <- data.frame(R2 = drop(cor(fitted.values, Y)),
+ avgBias = colMeans(residuals),
+ maxBias = apply(residuals, 2, maxBias, Y),
+ RMSEP = sqrt(colMeans(residuals^2)))
+ rownames(performance) <- paste("PC", S, sep = "")
+
+ ## get and fix up the call
+ .call <- match.call()
+ .call[[1]] <- as.name("pcr")
+ ## fix-up the name of the transformation function used,
+ ## needed when formula method called...
+ .call[[which(names(.call) == "tranFun")]] <- as.name(deparse(substitute(tranFun)))
+
+ ## return object
+ Obj <- list(fitted.values = fitted.values,
+ coefficients = coefficients,
+ residuals = residuals,
+ scores = TT,
+ loadings = P,
+ Yloadings = t(tQ),
+ xMeans = xMeans,
+ yMean = yMean,
+ varExpl = varExpl,
+ totvar = sum(x * x),
+ call = .call,
+ tranFun = FUN,
+ tranParms = tranParms,
+ performance = performance,
+ ncomp = ncomp)
+ class(Obj) <- "pcr"
+ Obj
+}
+
+`pcr.formula` <- function(formula, data, subset, na.action,
+ ..., model = FALSE) {
+ ## the function call
+ .call <- match.call()
+ ## need to reset due to method dispatch
+ .call[[1]] <- as.name("pcr")
+
+ ## keep only the arguments which should go into the model frame
+ mf <- match.call(expand.dots = FALSE)
+ m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0)
+ mf <- mf[c(1, m)]
+ mf$drop.unused.levels <- TRUE
+ mf[[1]] <- as.name("model.frame")
+ mf <- eval.parent(mf)
+ attr(attr(mf, "terms"), "intercept") <- 0
+
+ ## terms objects
+ mt <- attr(mf, "terms")
+ ## model matrices
+ y <- model.response(mf, "numeric")
+ x <- model.matrix(mt, mf)
+
+ ## fit & the PCR
+ Obj <- pcr.default(x = x, y = y, ...)
+ Obj$na.action <- attr(mf, "na.action")
+ Obj$terms <- mt
+ if(model)
+ Obj$model <- mf
+ Obj
+}
+
+`Hellinger` <- function(x, apply = FALSE) {
+ tran(x, method = "hellinger")
+ ## ignore 'apply' as no meta-parameters
+}
+
+`ChiSquare` <- function(x, apply = FALSE) {
+ if(apply) {
+ ## apply pre-computed meta-parameters to transform
+ ## test samples to match training samples
+ parms <- attr(x, "parms")
+ x <- with(parms, sqrt(gsum) * x/outer(rsum, sqrt(csum)))
+ } else {
+ ## perform transformation and preserve the meta-parameters
+ if (any(x < 0, na.rm = TRUE)) {
+ k <- min(x, na.rm = TRUE)
+ warning("'x'contains negative entries: result may be nonsense\n")
+ } else {
+ k <- .Machine$double.eps
+ }
+ gsum <- sum(x, na.rm = TRUE)
+ rsum <- pmax(k, rowSums(x, na.rm = TRUE))
+ csum <- colSums(x, na.rm = TRUE)
+ x <- sqrt(gsum) * x/outer(rsum, sqrt(csum))
+ attr(x, "parms") <- list(gsum = gsum, rsum = rsum, csum = csum)
+ }
+ x
+}
+
+`print.pcr` <- function(x, digits = min(getOption("digits"), 4), ...) {
+ cat("\n")
+ writeLines(strwrap("Principal Component Regression Model", prefix = "\t"),
+ sep = "\n\n")
+ writeLines(strwrap("Call:"))
+ print(x$call)
+ cat("\n")
+ writeLines(strwrap(paste("No. of Components:", x$ncomp)), sep = "\n\n")
+ writeLines("RMSEP (Apparent):")
+ perf <- x$performance[, "RMSEP"]
+ names(perf) <- rownames(x$performance)
+ print(perf)
+}
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2011-05-27 09:44:50 UTC (rev 222)
+++ pkg/inst/ChangeLog 2011-05-30 21:26:07 UTC (rev 223)
@@ -1,5 +1,12 @@
analogue Change Log
+Version 0.7-2
+
+ * pcr: new function pcr() performs principal components
+ regression. Designed to allow transformations in the spirit of
+ Legendre & Gallagher (2001) that allow PCA to be usefully
+ applied to species data.
+
Version 0.7-1
* crossval: new function to perform leave-one-out, k-fold,
Added: pkg/man/pcr.Rd
===================================================================
--- pkg/man/pcr.Rd (rev 0)
+++ pkg/man/pcr.Rd 2011-05-30 21:26:07 UTC (rev 223)
@@ -0,0 +1,135 @@
+\name{pcr}
+\alias{pcr}
+\alias{pcr.default}
+\alias{pcr.formula}
+\alias{print.pcr}
+\alias{Hellinger}
+\alias{ChiSquare}
+\title{Prinicpal component regression transfer function models}
+\description{
+ Fits a palaeoecological transfer function model using principal
+ component regression, using an optional transformation of the matrix
+ of predictor variables when these are species abundance data.
+}
+\usage{
+
+\method{pcr}{default}(x, y, ncomp, tranFun, ...)
+
+\method{pcr}{formula}(formula, data, subset, na.action, ..., model = FALSE)
+
+Hellinger(x, apply = FALSE)
+
+ChiSquare(x, apply = FALSE)
+}
+\arguments{
+ \item{x}{Matrix or data frame of predictor variables. Usually species
+ composition or abundance data for transfer function models.}
+ \item{y}{Numeric vector; the response variable to be modelled.}
+ \item{ncomp}{numeric; number of principal components to build models
+ for. If not supplied the largest possible number of components is
+ determined.}
+ \item{tranFun}{function; a function or name of a function that
+ performs a transformation of the predictor variables \code{x}. The
+ function must be self-contained as no arguments are passed to the
+ function when it is applied. See Details for more information.}
+ \item{formula}{a model formula.}
+ \item{data}{an optional data frame, list or environment (or object
+ coercible by \code{\link{as.data.frame}} to a data frame) containing
+ the variables specified on the RHS of the model formula. If not found in
+ \code{data}, the variables are taken from
+ \code{environment(formula)}, typically the environment from which
+ \code{pcr} is called.}
+ \item{subset}{an optional vector specifying a subset of observations
+ to be used in the fitting process.}
+ \item{na.action}{a function which indicates what should happen when
+ the data contain \code{NA}s. The default is set by the
+ \code{na.action} setting of \code{options}, and is \code{na.fail} if
+ that is unset. The 'factory-fresh' default is \code{na.omit}.
+ Another possible value is \code{NULL}, no action. Value
+ \code{na.exclude} can be useful.}
+ \item{model}{logical; If \code{TRUE} the model frame is returned?}
+ \item{apply}{logical; should an existing tranformation, using
+ pre-computed meta-parameters, be applied?}
+ \item{\dots}{Arguments passed to other methods.}
+}
+\details{
+ When applying cross-validation (CV) to transfer function models, any
+ transformation of the predictors must be applied separately during
+ each iteration of the CV procedure to the part of the data used in
+ fitting the model. In the same way, any samples to be predicted from
+ the model must use any meta-parameters derived from the training data
+ only. For examle, centring is appled to the training data only and the
+ variables means used to centre the training data are used to centre
+ the test samples. The variable means should not be computed on a
+ combination of the training and test samples.
+
+ When using PCR, we might wish to apply a transformation to the species
+ data predictor variables such that the PCA of those data preserves a
+ dissimilarity coefficient other than the Euclidean distance. This
+ transformation is applied to allow PCA to better describe patterns in
+ the species data (Legendre & Gallagher 2001).
+
+ How this is handled in \code{pcr} is to take a user-supplied function
+ that takes a single argument, the matrix of predictor variables. The
+ function should return a matrix of the same dimension as the input. If
+ any meta-parameters are required for subsequent use in prediction,
+ these should be returned as attribute \code{"parms"}, attached to the
+ matrix.
+
+ Two example transformation functions are provided implementing the
+ Hellinger and Chi Square transformations of Legendre & Gallagher
+ (2001). Users can base their transformation functions on
+ these. \code{ChiSquare()} illustrates how meta-parameters should be
+ returned as the attribute \code{"parms"}.
+}
+\value{
+ Returns an object of class \code{"pcr"}, a list with the
+ following components:
+
+ \item{fitted.values}{matrix; the PCR estimates of the response. The
+ columns contain fitted values using C components, where C is the Cth
+ column of the matrix.}
+ \item{coefficients}{matrix; regression coefficients for the
+ PCR. Columns as per \code{fitted} above.}
+ \item{residuals}{matrix; residuals, where the Cth column represents a
+ PCR model using C components.}
+ \item{scores}{}
+ \item{loadings}{}
+ \item{Yloadings}{}
+ \item{xMeans}{numeric; means of the predictor variables in the
+ training data.}
+ \item{yMean}{numeric; mean of the response variable in the training
+ data.}
+ \item{varExpl}{numeric; variance explained by the PCR model. These are
+ the squares of the singular values.}
+ \item{totvar}{numeric; total variance in the training data}
+ \item{call}{the matched call.}
+ \item{tranFun}{transformation function used. \code{NA} if none
+ supplied/used.}
+ \item{tranParms}{list; meta parameters used to computed the
+ transformed training data.}
+ \item{performance}{data frame; cross-validation performance statistics
+ for the model.}
+ \item{ncomp}{numeric; number of principal components computed}
+}
+%\references{TO DO}
+\author{Gavin L. Simpson}
+%\note{
+%}
+\seealso{\code{\link{wa}}}
+\examples{
+## Load the Imbrie & Kipp data and
+## summer sea-surface temperatures
+data(ImbrieKipp)
+data(SumSST)
+
+## normal interface and apply Hellinger transformation
+mod <- pcr(ImbrieKipp, SumSST, tranFun = Hellinger)
+mod
+
+## formula interface, but as above
+mod2 <- pcr(SumSST ~ ., data = ImbrieKipp, tranFun = Hellinger)
+mod2
+
+}
+\keyword{methods}
Modified: pkg/tests/Examples/analogue-Ex.Rout.save
===================================================================
--- pkg/tests/Examples/analogue-Ex.Rout.save 2011-05-27 09:44:50 UTC (rev 222)
+++ pkg/tests/Examples/analogue-Ex.Rout.save 2011-05-30 21:26:07 UTC (rev 223)
@@ -28,7 +28,7 @@
Loading required package: grid
Loading required package: MASS
Loading required package: princurve
-This is analogue 0.7-0
+This is analogue 0.7-2
>
> assign(".oldSearch", search(), pos = 'CheckExEnv')
> cleanEx()
@@ -5536,6 +5536,69 @@
>
>
> cleanEx()
+> nameEx("pcr")
+> ### * pcr
+>
+> flush(stderr()); flush(stdout())
+>
+> ### Name: pcr
+> ### Title: Prinicpal component regression transfer function models
+> ### Aliases: pcr pcr.default pcr.formula print.pcr Hellinger ChiSquare
+> ### Keywords: methods
+>
+> ### ** Examples
+>
+> ## Load the Imbrie & Kipp data and
+> ## summer sea-surface temperatures
+> data(ImbrieKipp)
+> data(SumSST)
+>
+> ## normal interface and apply Hellinger transformation
+> mod <- pcr(ImbrieKipp, SumSST, tranFun = Hellinger)
+> mod
+
+ Principal Component Regression Model
+
+Call:
+pcr(x = ImbrieKipp, y = SumSST, tranFun = Hellinger)
+
+No. of Components: 27
+
+RMSEP (Apparent):
+ PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
+2.381215 1.707588 1.680896 1.679774 1.608903 1.535145 1.507995 1.496939
+ PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
+1.496908 1.432142 1.426444 1.405155 1.391348 1.349172 1.349172 1.315284
+ PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
+1.313187 1.311801 1.291201 1.206484 1.188438 1.187503 1.171215 1.170947
+ PC25 PC26 PC27
+1.170380 1.162497 1.162355
+>
+> ## formula interface, but as above
+> mod2 <- pcr(SumSST ~ ., data = ImbrieKipp, tranFun = Hellinger)
+> mod2
+
+ Principal Component Regression Model
+
+Call:
+pcr(x = x, y = y, tranFun = Hellinger)
+
+No. of Components: 27
+
+RMSEP (Apparent):
+ PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
+2.381215 1.707588 1.680896 1.679774 1.608903 1.535145 1.507995 1.496939
+ PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
+1.496908 1.432142 1.426444 1.405155 1.391348 1.349172 1.349172 1.315284
+ PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
+1.313187 1.311801 1.291201 1.206484 1.188438 1.187503 1.171215 1.170947
+ PC25 PC26 PC27
+1.170380 1.162497 1.162355
+>
+>
+>
+>
+> cleanEx()
> nameEx("performance")
> ### * performance
>
@@ -7729,7 +7792,7 @@
> ### * <FOOTER>
> ###
> cat("Time elapsed: ", proc.time() - get("ptime", pos = 'CheckExEnv'),"\n")
-Time elapsed: 19.584 0.559 20.318 0 0
+Time elapsed: 19.381 0.603 20.167 0 0
> grDevices::dev.off()
null device
1
More information about the Analogue-commits
mailing list