[Analogue-commits] r405 - in pkg: . R inst man tests/Examples

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 13 23:26:44 CET 2014


Author: gsimpson
Date: 2014-02-13 23:26:44 +0100 (Thu, 13 Feb 2014)
New Revision: 405

Modified:
   pkg/DESCRIPTION
   pkg/R/crossval.pcr.R
   pkg/R/pcr.R
   pkg/inst/ChangeLog
   pkg/man/crossval.Rd
   pkg/tests/Examples/analogue-Ex.Rout.save
Log:
fix several bugs in crossval method for pcr() fits

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/DESCRIPTION	2014-02-13 22:26:44 UTC (rev 405)
@@ -1,7 +1,7 @@
 Package: analogue
 Type: Package
 Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.13-3
+Version: 0.13-4
 Date: $Date$
 Depends: R (>= 2.15.0), vegan (>= 1.17-12), lattice, rgl
 Imports: mgcv, MASS, stats, graphics, grid, brglm, princurve

Modified: pkg/R/crossval.pcr.R
===================================================================
--- pkg/R/crossval.pcr.R	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/R/crossval.pcr.R	2014-02-13 22:26:44 UTC (rev 405)
@@ -42,7 +42,8 @@
             TRAN <- obj$tranFun(x[-i, , drop = FALSE])
             X <- TRAN$data
             ## apply transformation to X[i, ], using parms from above
-            Xi <- obj$tranFun(x[i, , drop = FALSE], apply = TRUE, parms = TRAN$parms)$data
+            Xi <- obj$tranFun(x[i, , drop = FALSE], apply = TRUE,
+                              parms = TRAN$parms)$data
             ## centre the training data
             Xbar <- colMeans(X)
             ybar <- mean(y[-i])
@@ -166,36 +167,45 @@
     ## fitted values and derived stats
     if(identical(method, "none")) {
         fitted <- pred
-    } else if(method %in% c("","")) {
-        rowMeans(pred, na.rm = TRUE, dims = 2)
+    } else if(method %in% c("kfold","bootstrap")) {
+        fitted <- rowMeans(pred, na.rm = TRUE, dims = 2)
     } else {
         fitted <- rowMeans(pred)
     }
     residuals <- y - fitted                               ## residuals
     maxBias <- apply(residuals, 2, maxBias, y, n = 10)    ## maxBias
     avgBias <- colMeans(residuals)                        ## avgBias
-    r2 <- apply(fitted, cor, y)
+    r2 <- apply(fitted, 2, cor, y)
     ## s1 & s2 components for model and training set
     ns <- rowSums(!is.na(pred), dims = 2)
-    s1.train <- t(sqrt(rowSums((pred - as.vector(fitted))^2,
-                               na.rm = TRUE, dims = 2) / as.vector(ns - 1)))
+    s1.train <- sqrt(rowSums((pred - as.vector(fitted))^2,
+                             na.rm = TRUE, dims = 2) / as.vector(ns - 1))
     s1 <- sqrt(colMeans(s1.train^2))
     s2 <- sqrt(colMeans(residuals^2, na.rm = TRUE))
-    s2.train <- sweep(pred, c(2,1), y, "-")
-    s2.train <- sqrt(t(rowMeans(s2.train^2, na.rm = TRUE, dims = 2)))
+    ## s2.train <- sweep(pred, 1, y, "-")
+    ## s2.train <- sqrt(rowMeans(s2.train^2, na.rm = TRUE, dims = 2))
 
     ## RMSEP
-    rmsep.train <- sqrt(s1.train^2 + s2.train^2)
-    rmsep <- sqrt(s1^2 + s2^2)
-    rmsep2 <- sqrt(mean(residuals^2))
-    performance <- data.frame(comp = ncomp,
+    ## rmsep.train <- sqrt(s1.train^2 + s2.train^2)
+    rmsep2 <- sqrt(s1^2 + s2^2)
+    rmsep <- sqrt(colMeans(residuals^2, na.rm = TRUE))
+    fill <- rep(NA, ncomp)
+    performance <- data.frame(comp = seq_len(ncomp),
                               R2 = r2,
                               avgBias = avgBias,
                               maxBias = maxBias,
                               RMSEP = rmsep,
-                              RMSEP2 = rmsep2,
-                              s1 = s1,
-                              s2 = s2)
+                              RMSEP2 = fill,
+                              s1 = fill,
+                              s2 = fill)
+    ## add in the bits we can only do if bootstrapping or multiple fold
+    ## k-fold CV
+    if(identical(method, "bootstrap") ||
+       (identical(method, "kfold") && folds > 1)) {
+        performance$s1 <- s1
+        performance$s2 <- s2
+        performance$RMSEP2 <- rmsep2
+    }
 
     ## more additions to the call
     .call <- match.call()
@@ -204,9 +214,9 @@
     ## return object
     out <- list(fitted.values = fitted,
                 residuals = residuals,
-                rmsep = rmsep.train,
-                s1 = s1.train,
-                s2 = s2.train,
+                ##rmsep = rmsep.train, ## technically not in crossval.wa
+                ##s1 = s1.train,       ## so shoould they be in here?
+                ##s2 = s2.train,       ## need to formalise the crossval class
                 performance = performance,
                 call = .call,
                 CVparams = list(method = method, nboot = nboot,

Modified: pkg/R/pcr.R
===================================================================
--- pkg/R/pcr.R	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/R/pcr.R	2014-02-13 22:26:44 UTC (rev 405)
@@ -41,12 +41,12 @@
     y <- y - yMean
 
     ## How many components?
-    ncomp <- if(missing(ncomp)) {
-        min(Nx - 1, Mx)
+    if(missing(ncomp)) {
+        ncomp <- min(Nx - 1, Mx)
     } else {
         if(ncomp < 1 || ncomp > (newcomp <- min(Nx - 1, Mx))) {
             warning("Invalid 'ncomp'. Resetting to max possible.")
-            newcomp
+            ncomp <- newcomp
         }
     }
 

Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/inst/ChangeLog	2014-02-13 22:26:44 UTC (rev 405)
@@ -1,5 +1,10 @@
 analogue Change Log
 
+Version 0.13-4 13 Feb 2014
+
+	* crossval.pcr: Fixed a number of bugs in the method for PCR
+	related to k-fold cross validation, which were causing errors.
+
 Version 0.13-3 Opened 11 Feb 2014
 
 	* timetrack: A number of additions added and improvements made:

Modified: pkg/man/crossval.Rd
===================================================================
--- pkg/man/crossval.Rd	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/man/crossval.Rd	2014-02-13 22:26:44 UTC (rev 405)
@@ -1,9 +1,10 @@
 \name{crossval}
 \alias{crossval}
 \alias{crossval.wa}
+\alias{crossval.pcr}
 \alias{print.crossval}
-\alias{predWA}
-\alias{predWAT}
+%\alias{predWA}
+%\alias{predWAT}
 \title{Cross-validation of palaeoecological transfer function models}
 \description{
   Performs leave-one-out, \emph{k}-fold, \emph{n} \emph{k}-fold and
@@ -16,11 +17,17 @@
          nboot = 100, nfold = 10, folds = 5,
          verbose = getOption("verbose"), ...)
 
+\method{crossval}{pcr}(obj, method = c("LOO","kfold","bootstrap"),
+         ncomp, nboot = 100, nfold = 10, folds = 5,
+         verbose = getOption("verbose"), ...)
+
 }
 \arguments{
   \item{obj}{A fitted transfer function model. Currently, only objects
-    of class \code{"wa"} are supported.}
+    of class \code{\link{wa}} and \code{\link{pcr}} are supported.}
   \item{method}{character; type of cross-validation.}
+  \item{ncomp}{numeric; number of components to fit, as in models with
+    \code{1:ncomp} components.}
   \item{nboot}{numeric; number of bootstrap samples.}
   \item{nfold}{numeric; number of chunks into which the training data
     are split. The \emph{k} in \emph{k}-fold.}
@@ -66,11 +73,11 @@
 cv.loo
 
 ## k-fold CV (k == 10)
-cv.kfold <- crossval(mod, kfold = 10, folds = 1, method = "kfold")
+cv.kfold <- crossval(mod, method = "kfold", kfold = 10, folds = 1)
 cv.kfold
 
 ## n k-fold CV (k == 10, n = 10)
-cv.nkfold <- crossval(mod, kfold = 10, folds = 10, method = "kfold")
+cv.nkfold <- crossval(mod, method = "kfold", kfold = 10, folds = 10)
 cv.nkfold
 
 ## bootstrap with 250 bootstrap samples
@@ -81,5 +88,10 @@
 fitted(cv.boot)
 resid(cv.boot)
 
+## Principal Components Regression
+mpcr <- pcr(SumSST ~., data = ImbrieKipp, ncomp = 10)
+crossval(mpcr, method = "kfold", kfold = 10, folds = 2, ncomp = 10)
+
+crossval(mpcr, method = "bootstrap", nboot = 250, ncomp = 10)
 }
 \keyword{methods}

Modified: pkg/tests/Examples/analogue-Ex.Rout.save
===================================================================
--- pkg/tests/Examples/analogue-Ex.Rout.save	2014-02-11 20:04:31 UTC (rev 404)
+++ pkg/tests/Examples/analogue-Ex.Rout.save	2014-02-13 22:26:44 UTC (rev 405)
@@ -1758,7 +1758,7 @@
 > 
 > ### Name: crossval
 > ### Title: Cross-validation of palaeoecological transfer function models
-> ### Aliases: crossval crossval.wa print.crossval predWA predWAT
+> ### Aliases: crossval crossval.wa crossval.pcr print.crossval
 > ### Keywords: methods
 > 
 > ### ** Examples
@@ -1800,7 +1800,7 @@
 1 0.90028 -0.013652 -4.5985 2.2179     NA NA NA
 > 
 > ## k-fold CV (k == 10)
-> cv.kfold <- crossval(mod, kfold = 10, folds = 1, method = "kfold")
+> cv.kfold <- crossval(mod, method = "kfold", kfold = 10, folds = 1)
 > cv.kfold
 	Model Cross-validation:
 
@@ -1814,7 +1814,7 @@
 1 0.90107 0.0070881 -4.4826 2.2102     NA NA NA
 > 
 > ## n k-fold CV (k == 10, n = 10)
-> cv.nkfold <- crossval(mod, kfold = 10, folds = 10, method = "kfold")
+> cv.nkfold <- crossval(mod, method = "kfold", kfold = 10, folds = 10)
 > cv.nkfold
 	Model Cross-validation:
 
@@ -1864,6 +1864,29 @@
 [55]  2.06405926  1.47563233  0.88025973  0.30226739  0.22736763  0.07008432
 [61]  1.18109032
 > 
+> ## Principal Components Regression
+> mpcr <- pcr(SumSST ~., data = ImbrieKipp, ncomp = 10)
+> crossval(mpcr, method = "kfold", kfold = 10, folds = 2, ncomp = 10)
+	Model Cross-validation:
+
+crossval(obj = mpcr, method = "kfold", ncomp = 10, folds = 2, 
+    kfold = 10)
+
+Method: kfold
+k: 10
+No. of folds: 2
+
+   comp      R2   avgBias maxBias  RMSEP RMSEP2      s1     s2
+1     1 0.93102 -0.082069 -6.0751 2.5707 2.6306 0.55799 2.5707
+2     2 0.95154 -0.034190 -5.3449 2.1593 2.1665 0.17708 2.1593
+3     3 0.95452 -0.071033 -4.7328 2.0950 2.1092 0.24471 2.0950
+4     4 0.95506 -0.054893 -4.7586 2.0822 2.0957 0.23782 2.0822
+5     5 0.95565 -0.054278 -5.1491 2.0684 2.0891 0.29338 2.0684
+6     6 0.95568 -0.088115 -5.1732 2.0689 2.0916 0.30691 2.0689
+7     7 0.95473 -0.045325 -4.8739 2.0887 2.1143 0.32838 2.0887
+8     8 0.95818 -0.039048 -5.0150 2.0097 2.0350 0.31945 2.0097
+9     9 0.95742 -0.083771 -5.1489 2.0288 2.0585 0.34841 2.0288
+10   10 0.95784 -0.055376 -4.9774 2.0176 2.0499 0.36230 2.0176
 > 
 > 
 > 
@@ -7807,7 +7830,7 @@
 > ###
 > options(digits = 7L)
 > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
-Time elapsed:  17.35 0.299 17.92 0.001 0.002 
+Time elapsed:  17.431 0.285 17.846 0 0.003 
 > grDevices::dev.off()
 null device 
           1 



More information about the Analogue-commits mailing list