[Analogue-commits] r136 - in pkg: . R inst man

Tue Jun 9 23:17:02 CEST 2009

Author: gsimpson
Date: 2009-06-09 23:17:01 +0200 (Tue, 09 Jun 2009)
New Revision: 136

Added:
   pkg/R/deshrink.R
   pkg/R/deshrinkPred.R
   pkg/man/deshrink.Rd
Modified:
   pkg/DESCRIPTION
   pkg/R/internal.R
   pkg/R/predict.wa.R
   pkg/R/wa.R
   pkg/inst/ChangeLog
   pkg/man/analogue-internal.Rd
Log:
Updated and improved deshrinking mechanism for wa() and predict.wa()

Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/DESCRIPTION	2009-06-09 21:17:01 UTC (rev 136)
@@ -1,7 +1,7 @@
 Package: analogue
 Type: Package
 Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.6-11
+Version: 0.6-12
 Date: $Date$
 Depends: R (>= 2.5.0), stats, graphics, vegan, lattice, MASS
 Author: Gavin L. Simpson, Jari Oksanen

Added: pkg/R/deshrink.R
===================================================================
--- pkg/R/deshrink.R	                        (rev 0)
+++ pkg/R/deshrink.R	2009-06-09 21:17:01 UTC (rev 136)
@@ -0,0 +1,52 @@
+`deshrink` <- function(env, wa.env,
+                       type = c("inverse", "classical",
+                       "expanded", "none")) {
+### Inline Functions ############################################
+    ## inverse deshrinking
+    `inverse` <- function(env, wa.env) {
+        X <- cbind(rep(1, length(wa.env)), wa.env)
+        QR <- qr(X)
+        coef <- qr.coef(QR, env)
+        pred <- qr.fitted(QR, env)
+        return(list(coefficients = coef, env = pred))
+    }
+    ## classical deshrinking
+    `classical` <- function(env, wa.env) {
+        X <- cbind(rep(1, length(env)), env)
+        QR <- qr(X)
+        coef <- drop(qr.coef(QR, wa.env))
+        ##coef <- c(-coef[1], 1)/coef[2]
+        ##pred <- deshrink.pred(wa.env, coef)
+        pred <- (wa.env - coef[1]) / coef[2]
+        return(list(coefficients = coef, env = pred))
+    }
+    ## deshrink to equal SD
+    ## A bit like in vegan:::wascores, but wascores uses
+    ## weighted sd which would need row and column sums in the
+    ## function call, and this would make the function API
+    ## incompatible with other *.deshrink functions.
+    `expanded` <- function(env, wa.env) {
+        b1 <- sd(env)/sd(wa.env)
+        b0 <- mean(env) - b1 * mean(wa.env)
+        pred <- b0 + b1 * wa.env
+        return(list(coefficients = c(b0, b1), env = pred))
+    }
+    ## No deshrinking
+    ## Do not deshrink: for those who think they know  what they
+    ## are doing
+    `none` <- function(env, wa.env) {
+        return(list(coefficients = c(0, 1), env = wa.env))
+    }
+### End Inline Functions #########################################
+    if(missing(type))
+        type <- "inverse"
+    type <- match.arg(type)
+    res <- switch(type,
+                  inverse = inverse(env, wa.env),
+                  classical = classical(env, wa.env),
+                  expanded = expanded(env, wa.env),
+                  none = none(env, wa.env))
+    class(res) <- c("deshrink","list")
+    attr(res, "type") <- type
+    return(res)
+}

Added: pkg/R/deshrinkPred.R
===================================================================
--- pkg/R/deshrinkPred.R	                        (rev 0)
+++ pkg/R/deshrinkPred.R	2009-06-09 21:17:01 UTC (rev 136)
@@ -0,0 +1,13 @@
+`deshrinkPred` <- function(x, coef,
+                           type = c("inverse", "classical",
+                           "expanded", "none")) {
+    if(missing(type))
+        type <- "inverse"
+    type <- match.arg(type)
+    res <- switch(type,
+                  inverse = coef[1] + (coef[2] * x),
+                  classical = (x - coef[1]) / coef[2],
+                  expanded = coef[1] + (coef[2] * x),
+                  none = coef[1] + (coef[2] * x))
+    return(res)
+}

Modified: pkg/R/internal.R
===================================================================
--- pkg/R/internal.R	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/R/internal.R	2009-06-09 21:17:01 UTC (rev 136)
@@ -167,39 +167,39 @@
 }
 
 ## inverse deshrinking function
-`inv.deshrink` <- function(env, wa.env) {
-    X <- cbind(rep(1, length(wa.env)), wa.env)
-    QR <- qr(X)
-    coef <- qr.coef(QR, env)
-    pred <- qr.fitted(QR, env)
-    return(list(coefficients = coef, env = pred))
-}
+## `inv.deshrink` <- function(env, wa.env) {
+##     X <- cbind(rep(1, length(wa.env)), wa.env)
+##     QR <- qr(X)
+##     coef <- qr.coef(QR, env)
+##     pred <- qr.fitted(QR, env)
+##     return(list(coefficients = coef, env = pred))
+## }
 
 ## classical deshrinking
-`class.deshrink` <- function(env, wa.env) {
-    X <- cbind(rep(1, length(env)), env)
-    QR <- qr(X)
-    coef <- drop(qr.coef(QR, wa.env))
-    coef <- c(-coef[1], 1)/coef[2]
-    pred <- deshrink.pred(wa.env, coef)
-    return(list(coefficients = coef, env = pred))
-}
+## `class.deshrink` <- function(env, wa.env) {
+##     X <- cbind(rep(1, length(env)), env)
+##     QR <- qr(X)
+##     coef <- drop(qr.coef(QR, wa.env))
+##     coef <- c(-coef[1], 1)/coef[2]
+##     pred <- deshrink.pred(wa.env, coef)
+##     return(list(coefficients = coef, env = pred))
+## }
 
 ## deshrinking to equal sd
 ## A bit like in vegan:::wascores, but wascores uses weighted sd which
 ## would need row and column sums in the function call, and this would
 ## make the function API incompatible with other *.deshrink functions.
-`expand.deshrink` <- function(env, wa.env) {
-    b1 <- sd(env)/sd(wa.env)
-    b0 <- mean(env) - b1 * mean(wa.env)
-    pred <- b0 + b1 * wa.env
-    return(list(coefficients = c(b0, b1), env = pred))
-}
+## `expand.deshrink` <- function(env, wa.env) {
+##     b1 <- sd(env)/sd(wa.env)
+##     b0 <- mean(env) - b1 * mean(wa.env)
+##     pred <- b0 + b1 * wa.env
+##     return(list(coefficients = c(b0, b1), env = pred))
+## }
 
 # Do not deshrink: for those who think they know what they do
-`no.deshrink` <- function(env, wa.env) {
-    return(list(coefficients = c(0, 1), env = wa.env))
-}
+## `no.deshrink` <- function(env, wa.env) {
+##     return(list(coefficients = c(0, 1), env = wa.env))
+## }
 
 ## fast rowSums and colSums functiosn without the checking
 `RowSums` <- function(x, na.rm = FALSE) {
@@ -217,9 +217,9 @@
 }
 
 ## deshrinking function given deshrinking coefs and a method
-`deshrink.pred` <- function(x, coef) {
-    coef[1] + x * coef[2]
-}
+##`deshrink.pred` <- function(x, coef) {
+##    coef[1] + x * coef[2]
+##}
 
 
 ## w.tol --- computes weighted standard deviations AKA tolerances

Modified: pkg/R/predict.wa.R
===================================================================
--- pkg/R/predict.wa.R	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/R/predict.wa.R	2009-06-09 21:17:01 UTC (rev 136)
@@ -15,12 +15,7 @@
     n.fossil <- nrow(newdata)
     n.in.train <- sum(colnames(newdata) %in%
                       names(object$wa.optima))
-    deshrink <- object$deshrink
-    deshrink.fun <- switch(deshrink,
-                           inverse = inv.deshrink,
-                           classical = class.deshrink,
-                           expanded = expand.deshrink,
-                           none = no.deshrink)
+    Dtype <- object$deshrink
     X <- object$orig.x
     ENV <- object$orig.env
     ## tolerance options from model
@@ -38,7 +33,7 @@
         } else {
             pred <- WApred(newdata[,want], object$wa.optima[want])
         }
-        pred <- deshrink.pred(pred, coef(object))
+        pred <- deshrinkPred(pred, coef(object))
     } else {
         ## CV wanted
         if(identical(CV, "LOO")) {
@@ -75,11 +70,11 @@
                     mod.pred[i] <- WApred(X[i,,drop=FALSE],
                                           wa.optima)
                 }
-                deshrink.mod <- deshrink.fun(ENV[-i], wa.env)
+                deshrink.mod <- deshrink(ENV[-i], wa.env, Dtype)
                 wa.env <- deshrink.mod$env
                 coefs <- coef(deshrink.mod)
                 ## LOO model predictions
-                mod.pred[i] <- deshrink.pred(mod.pred[i], coefs)
+                mod.pred[i] <- deshrinkPred(mod.pred[i], coefs)
                 ## newdata predictions
                 pred <- if(object$tol.dw) {
                     WATpred(newdata[,want], wa.optima[want],
@@ -87,7 +82,7 @@
                 } else {
                     WApred(newdata[,want], wa.optima[want])
                 }
-                loo.pred[,i] <- deshrink.pred(pred, coefs)
+                loo.pred[,i] <- deshrinkPred(pred, coefs)
             }
             ## average the LOO predictions
             pred <- rowMeans(loo.pred)
@@ -110,14 +105,14 @@
                 wa.optima[miss] <- 0
                 rowsum <- X[sel,] %*% ones
                 wa.env <- (X[sel,] %*% wa.optima) / rowsum
-                deshrink.mod <- deshrink.fun(ENV[sel], wa.env)
+                deshrink.mod <- deshrink(ENV[sel], wa.env, Dtype)
                 wa.env <- deshrink.mod$env
                 coefs <- coef(deshrink.mod) #$coef
                 ## if we want sample specific errors or
                 ## model performance stats
                 rowsum <- X[-sel,] %*% ones
                 pred <- (X[-sel,] %*% wa.optima) / rowsum
-                oob.pred[-sel,i] <- deshrink.pred(pred, coefs)
+                oob.pred[-sel,i] <- deshrinkPred(pred, coefs)
                 ## do the prediction step
                 want <- names(wa.optima) %in% colnames(newdata)
                 want <- names(wa.optima)[want]
@@ -127,7 +122,7 @@
                 rowsum <- newdata[,want] %*% ones
                 pred <- (newdata[,want] %*% wa.optima[want]) /
                     rowsum
-                boot.pred[,i] <- deshrink.pred(pred, coefs)
+                boot.pred[,i] <- deshrinkPred(pred, coefs)
             }
             pred <- rowMeans(boot.pred)
         } else if (identical(CV, "nfold")) {
@@ -151,14 +146,15 @@
                     wa.optima[miss] <- 0
                     rowsum <- X[sel,] %*% ones
                     wa.env <- (X[sel,] %*% wa.optima) / rowsum
-                    deshrink.mod <- deshrink.fun(ENV[sel], wa.env)
+                    deshrink.mod <- deshrink(ENV[sel], wa.env,
+                                             Dtype)
                     wa.env <- deshrink.mod$env
                     coefs <- coef(deshrink.mod) #$coef
                     ## if we want sample specific errors or
                     ## model performance stats
                     rowsum <- X[!sel,] %*% ones
                     pred <- (X[!sel,] %*% wa.optima) / rowsum
-                    oob.pred[!sel,i] <- deshrink.pred(pred, coefs)
+                    oob.pred[!sel,i] <- deshrinkPred(pred, coefs)
                     ## do the prediction step
                     want <- names(wa.optima) %in% colnames(newdata)
                     want <- names(wa.optima)[want]
@@ -168,7 +164,7 @@
                     rowsum <- newdata[,want] %*% ones
                     pred <- (newdata[,want] %*% wa.optima[want]) /
                         rowsum
-                    boot.pred[,i] <- deshrink.pred(pred, coefs)
+                    boot.pred[,i] <- deshrinkPred(pred, coefs)
                 }
             }
             pred <- rowMeans(boot.pred)
@@ -246,21 +242,10 @@
 }
 
 WATpred <- function(X, optima, tol, nr, nc) {
-    #ones <- rep.int(1, length(optima))
     miss <- is.na(optima)
-    #ones[miss] <- 0
     optima[miss] <- 0
     tol[miss] <- 1
     tol2 <- tol^2
-    #tmp <- sweep(X, 2, optima, "*", check.margin = FALSE)
-    ##tmp <- RowSums(t(t(X) * optima / tol2))
-    #tmp <- RowSums(sweep(tmp, 2, tol2, "/",
-    #                     check.margin = FALSE))
-    #tmp / RowSums(sweep(X, 2, tol2, "/",
-    #                    check.margin = FALSE)[,!miss, drop = FALSE])
-    #tmp / (sweep(X, 2, tol2, "/",check.margin = FALSE) %*% ones)
-    ##tmp / (t(t(X) / tol2) %*% ones)
-    #RowSums(t(t(X) * optima / tol2)) / (t(t(X) / tol2) %*% ones)
     res <- .C("WATpred", spp = as.double(X), opt = as.double(optima),
               tol2 = as.double(tol2), nr = as.integer(nr),
               nc = as.integer(nc), want = as.integer(miss),

Modified: pkg/R/wa.R
===================================================================
--- pkg/R/wa.R	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/R/wa.R	2009-06-09 21:17:01 UTC (rev 136)
@@ -46,11 +46,7 @@
         wa.env <- WApred(x, wa.optima)
     }
     ## taken averages twice so deshrink
-    expanded <- switch(deshrink,
-                       inverse = inv.deshrink(env, wa.env),
-                       classical = class.deshrink(env, wa.env),
-                       expanded = expand.deshrink(env, wa.env),
-                       none = no.deshrink(env, wa.env))
+    expanded <- deshrink(env, wa.env, type = deshrink)
     wa.env <- expanded$env
     coefficients <- coef(expanded)
     ## site/sample names need to be reapplied

Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/inst/ChangeLog	2009-06-09 21:17:01 UTC (rev 136)
@@ -1,5 +1,12 @@
 analogue Change Log
 
+Version 0.6-12
+
+	* deshrink, deshrinkPred; New utility functions for deshrinking WA
+	estimates. These replace the '*.deshrink' and 'deshrink.pred' internal
+	functions used to this end to date. This provides a more extensible
+	solution.
+
 Version 0.6-11
 
 	* tran: now converts input data to matrix using 'data.matrix' which

Modified: pkg/man/analogue-internal.Rd
===================================================================
--- pkg/man/analogue-internal.Rd	2009-06-08 22:11:35 UTC (rev 135)
+++ pkg/man/analogue-internal.Rd	2009-06-09 21:17:01 UTC (rev 136)
@@ -6,10 +6,10 @@
 \alias{.simpleCap}
 \alias{wmean}
 \alias{w.avg}
-\alias{inv.deshrink}
-\alias{class.deshrink}
-\alias{expand.deshrink}
-\alias{no.deshrink}
+%\alias{inv.deshrink}
+%\alias{class.deshrink}
+%\alias{expand.deshrink}
+%\alias{no.deshrink}
 \alias{RowSums}
 \alias{ColSums}
 \alias{deshrink.pred}

Added: pkg/man/deshrink.Rd
===================================================================
--- pkg/man/deshrink.Rd	                        (rev 0)
+++ pkg/man/deshrink.Rd	2009-06-09 21:17:01 UTC (rev 136)
@@ -0,0 +1,70 @@
+\name{deshrink}
+\Rdversion{1.1}
+\alias{deshrink}
+\alias{deshrinkPred}
+\title{
+Deshrinking techniques for WA transfer functions
+}
+\description{
+In Weighted Averaging models averages are taken twice and thus WA
+estimates shrink towards the training set mean and need to be
+deshrunk.\code{deshrink} performs this deshrinking using several
+techniques, whilst \code{deshrinkPred} will deshrink WA estimates for
+new samples given a set of deshrinking coefficients.
+}
+\usage{
+deshrink(env, wa.env, type = c("inverse", "classical",
+                               "expanded", "none"))
+
+deshrinkPred(x, coef, type = c("inverse", "classical",
+                               "expanded", "none"))
+}
+\arguments{
+  \item{env}{numeric; original environmental values.}
+  \item{wa.env}{numeric; initial weighted average estimates.}
+  \item{type}{character; the type of deshrinking. One of
+    \code{"inverse"}, \code{"classical"}, \code{"expand"},
+    \code{"none"}.}
+  \item{x}{numeric; estimates to be deshrunk.}
+  \item{coef}{numeric; deshrinking coefficients to use. Currently needs
+    to be a vector of length 2. These should be supplied in the order
+    \eqn{\beta_0,\beta_1}{beta[0], beta[1]}.}
+}
+%%\details{
+%% FIX ME - give details on the approach used
+%%}
+\value{
+  For \code{deshrinkPred} a numeric vector of deshrunk estimates.
+  
+  For an object of class \code{"deshrink"}, inheriting from class
+  \code{"list"}, with two components. The type of deshrinking performed
+  is stroed within attribute \code{"type"}. The componets of the
+  returned object are:
+  \item{coefficients}{The deshrinking coefficients used.}
+  \item{env}{The deshrunk WA estimates.}
+}
+\references{
+  Birks, H.J.B. (1995) Quantitative environmental reconstructions. In
+  \emph{Statistical modelling of Quaternary science data} (eds.~D.Maddy
+  \& J.S. Brew). Quaternary Research Association technical guide
+  5. Quaternary Research Association, Cambridge.
+}
+\author{
+  Gavin L. Simpson \& Jari Oksanen
+}
+%%\note{
+%%  ~~further notes~~
+%}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+\section{Warning }{\code{deshrinkPred}, does not currently check that
+  the correct coefficients have been supplied in the correct order.}
+
+\seealso{
+\code{\link{wa}}
+}
+%\examples{
+%}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{utilities}