[Analogue-commits] r342 - in pkg: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Jul 20 22:30:29 CEST 2013


Author: gsimpson
Date: 2013-07-20 22:30:28 +0200 (Sat, 20 Jul 2013)
New Revision: 342

Added:
   pkg/R/smoothGAM.R
   pkg/man/smoothFuns.Rd
Modified:
   pkg/NAMESPACE
   pkg/R/prcurve.R
   pkg/man/prcurve.Rd
Log:
add smoothGAM as a plugin for prcurve; rearrange documentation for plugins separate from main prcurve function

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2013-07-20 20:28:38 UTC (rev 341)
+++ pkg/NAMESPACE	2013-07-20 20:30:28 UTC (rev 342)
@@ -78,6 +78,7 @@
        rankDC,
        roc,
        smoothSpline,
+       smoothGAM,
        sppResponse,
        Stratiplot,
        stdError,

Modified: pkg/R/prcurve.R
===================================================================
--- pkg/R/prcurve.R	2013-07-20 20:28:38 UTC (rev 341)
+++ pkg/R/prcurve.R	2013-07-20 20:30:28 UTC (rev 342)
@@ -49,11 +49,24 @@
     ## Vary degrees of freedom per variable?
     if(missing(complexity)) {
         complexity <- numeric(length = m)
+        if (trace){ ## set up progress bar
+            writeLines("\n   Determining initial DFs for each variable...")
+            pb <- txtProgressBar(max = m, style = 3)
+        }
         for(j in seq_along(complexity)) {
-            complexity[j] <- smoother(config$lambda, X[, j],
-                                      choose = TRUE, ...)$complexity
+            if(trace) { ## update progress
+                setTxtProgressBar(pb, j)
+            }
+            ## fit the mode & grab DF
+            complexity[j] <-
+                smoother(config$lambda, X[, j], choose = TRUE, ...)$complexity
         }
-        if(!vary) {
+        if (trace) { ## finalise the progress bar
+            close(pb)
+            writeLines("\n")
+        }
+
+        if(!vary) { ## median complexity for all vars
             complexity <- rep(median(complexity), m)
         }
     } else {
@@ -70,10 +83,21 @@
     ##
     iter <- 0L
     if(trace) {
-        writeLines(strwrap(tmp <- paste(rep("-", options("width")[[1]]),
-                                        collapse = "")))
-        writeLines(sprintf("Initial curve: d.sq: %.4f", config$dist))
+        ##writeLines(strwrap(tmp <- paste(rep("-", options("width")[[1]]),
+        ##                                collapse = "")))
+        writeLines("Fitting Principal Curve:\n")
+        writeLines(sprintf("Initial curve: d.sq: %.3f", config$dist))
     }
+
+    ## vary == FALSE needs to set some things for smoothers like GAM
+    ## which will select smoothness even if complexity stated
+    smooths <- c("smoothGAM")
+    if(!vary && (deparse(substitute(smoother)) %in% smooths)) {
+        CHOOSE <- TRUE
+    } else {
+        CHOOSE <- FALSE
+    }
+
     ##dist.raw <- sum(diag(var(X))) * (NROW(X) - 1)
     dist.old <- sum(diag(var(X)))
     s <- matrix(NA, nrow = n, ncol = m)
@@ -87,7 +111,7 @@
         for(j in seq_len(m)) {
             smooths[[j]] <- smoother(config$lambda, X[, j],
                                      complexity = complexity[j],
-                                     choose = FALSE, ...)
+                                     choose = CHOOSE, ...)
             s[, j] <- fitted(smooths[[j]])
         }
         ##
@@ -108,7 +132,7 @@
         if (trace)
             writeLines(sprintf(paste("Iteration %",
                                      max(3, nchar(maxit)),
-                                     "i: d.sq: %.4f", sep = ""),
+                                     "i: d.sq: %.3f", sep = ""),
                                iter, config$dist))
     }
     ## End iterations ------------------------------------------------
@@ -149,14 +173,14 @@
                                "CV", config$dist))
     }
     if(trace){
-        writeLines(strwrap(tmp))
+        cat("\n")
         if(converged) {
             writeLines(strwrap(paste("PC Converged in", iter, "iterations.")))
         } else {
             writeLines(strwrap(paste("PC did not converge after", iter,
                                      "iterations.")))
         }
-        writeLines(strwrap(tmp))
+        cat("\n")
     }
     names(config$tag) <- names(config$lambda) <-
         rownames(config$s) <- rownames(X)

Added: pkg/R/smoothGAM.R
===================================================================
--- pkg/R/smoothGAM.R	                        (rev 0)
+++ pkg/R/smoothGAM.R	2013-07-20 20:30:28 UTC (rev 342)
@@ -0,0 +1,30 @@
+## smoothGAM: smoother function supplied to prcurve
+## wrapper to mcgv
+
+`smoothGAM` <- function(lambda, x, choose = TRUE, complexity,
+                        bs = "tp", ...,
+                        family = gaussian(),
+                        method = "REML",
+                        select = FALSE,
+                        control = list()) {
+    ## complexity is the 'k' argument -
+    ## choose selects whether to use fixed complexity or allow
+    ## underlying fitting function to return complexity
+    ord <- order(lambda)
+    lambda <- lambda[ord]
+    x <- x[ord]
+    if(!missing(complexity)) {
+        complexity <- round(complexity) ## move this out of smoothGAM
+    } else {
+        complexity <- -1
+    }
+    f <- gam(x ~ s(lambda, k = complexity, fx = choose, bs = bs),
+             family = family, method = method, select = select,
+             control = control, ...)
+    p <- predict(f, x = lambda, type = "response")
+    edf <- sum(f$edf[f$smooth[[1]]$first.para:f$smooth[[1]]$last.para]) + 1
+    res <- list(lambda = lambda, x = x, fitted.values = p,
+                complexity = edf, model = f)
+    class(res) <- "prcurveSmoother"
+    res
+}

Modified: pkg/man/prcurve.Rd
===================================================================
--- pkg/man/prcurve.Rd	2013-07-20 20:28:38 UTC (rev 341)
+++ pkg/man/prcurve.Rd	2013-07-20 20:30:28 UTC (rev 342)
@@ -1,7 +1,6 @@
 \name{prcurve}
 \alias{prcurve}
 \alias{initCurve}
-\alias{smoothSpline}
 \alias{print.prcurve}
 
 \title{
@@ -21,10 +20,6 @@
 
 initCurve(X, method = c("ca", "pca", "random", "user"), rank = FALSE,
           axis = 1, start)
-
-smoothSpline(lambda, x, choose = TRUE, complexity, ...,
-             penalty = 1, cv = FALSE, keep.data = FALSE,
-             control.spar = list(low = 0))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -39,11 +34,13 @@
   \item{start}{numeric vector specifying the initial curve when
     \code{method = "user"}. Must be of length \code{nrow(X)}.}
   \item{smoother}{function; the choice of smoother used to fit the
-    principal curve. Currently, the only option is \code{smoothSpline}
-    which is a wrapper to \code{\link{smooth.spline}}.}
+    principal curve. Currently, the only options are
+    \code{smoothSpline}, which is a wrapper to
+    \code{\link{smooth.spline}}, and \code{smoothGAM}, which is a
+    wrapper to  \code{\link[mgcv]{gam}}.}
   \item{complexity}{numeric; the complexity of the fitted smooth
     functions.
-
+    
     The function passed as argument \code{smoother} should arrange for
     this argument to be passed on to relevant aspect of the underlying
     smoother. In the case of \code{smoothSpline}, complexity is the
@@ -73,26 +70,8 @@
   \item{plotit}{logical; should the fitting process be plotted? If
     \code{TRUE}, then the fitted principal curve and observations in
     \code{X} are plotted in principal component space.}
-  \item{\dots}{arguments passed on to lower functions. In the case of
-    \code{prcurve}, these additional arguments are passed solely on to
-    the function \code{smoother}.
-
-    In \code{smoothSpline}, \dots is passed on the the underlying
-    function \code{\link{smooth.spline}} and users should read that
-    function's help page for further details.
-  }
-  \item{lambda}{the current projection function; the position that each
-    sample projects to on the current principal curve. This is the
-    predictor variable or covariate in the smooth function.}
-  \item{x}{numeric vector; a column from \code{X} used as the response
-    variable in the smooth function. The principal curve algorithm fits
-    a separate scatterplot smoother (or similar smoother) to each
-    variable in \code{X} in turn as the response.}
-  \item{choose}{logical; should the underlying smoother function be
-    allowed to choose the degree of smooth complexity for each variable
-    in \code{X}?}
-  \item{penalty, cv, keep.data, control.spar}{arguments to
-    \code{\link{smooth.spline}}.}
+  \item{\dots}{additional arguments are passed solely on to the function
+    \code{smoother}.}
 }
 %\details{
 %TODO
@@ -128,24 +107,17 @@
 
 %% ~Make other sections like Warning with \section{Warning }{....} ~
 
-%\seealso{
-%% ~~objects to See Also as \code{\link{help}}, ~~~
-%}
+\seealso{
+  \code{\link{smoothGAM}} and \code{\link{smoothSpline}} for the
+  wrappers fitting smooth functions to each variable.
+}
 \examples{
+## Load Abernethy Forest data set
 data(abernethy)
 
-## Plot the most common taxa
-Stratiplot(Age ~ . - Depth, data =
-           chooseTaxa(abernethy, max.abun = 15, n.occ = 10),
-           type = c("g","poly"), sort = "wa")
-
 ## Remove the Depth and Age variables
 abernethy2 <- abernethy[, -(37:38)]
 
-## Fit PCA and CA
-aber.pca <- rda(abernethy2)
-aber.ca <- cca(abernethy2)
-
 ## Fit the principal curve using the median complexity over
 ## all species
 aber.pc <- prcurve(abernethy2, method = "ca", trace = TRUE,
@@ -155,6 +127,10 @@
 ## for each species
 aber.pc2 <- prcurve(abernethy2, method = "ca", trace = TRUE,
                     vary = TRUE, penalty = 1.4)
+
+## Fit principal curve using a GAM - currently slow ~10secs
+aber.pc3 <- prcurve(abernethy2, method = "ca", trace = TRUE,
+                    vary = TRUE, smoother = smoothGAM, bs = "cr")
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.

Added: pkg/man/smoothFuns.Rd
===================================================================
--- pkg/man/smoothFuns.Rd	                        (rev 0)
+++ pkg/man/smoothFuns.Rd	2013-07-20 20:30:28 UTC (rev 342)
@@ -0,0 +1,74 @@
+\name{smoothers}
+\alias{smoothSpline}
+\alias{smoothGAM}
+
+\title{
+  Smoother plugin function for use in fitting a principal curve
+}
+
+\description{
+  Functions to be used as plugins to \code{\link{prcurve}} that fit
+  smooth functions to each variable that, when combined, give the
+  principal curve. The functions act as wrappers to the main fitting
+  functions, which currently include \code{\link{smooth.spline}} and
+  \code{\link[mgcv]{gam}}.
+}
+
+\usage{
+smoothSpline(lambda, x, choose = TRUE, complexity, ...,
+             penalty = 1, cv = FALSE, keep.data = FALSE,
+             control.spar = list(low = 0))
+
+smoothGAM(lambda, x, choose = TRUE, complexity, bs = "tp", ...,
+          family = gaussian(), method = "REML", select = FALSE,
+          control = list())
+}
+\arguments{
+  \item{lambda}{the current projection function; the position that each
+    sample projects to on the current principal curve. This is the
+    predictor variable or covariate in the smooth function.}
+  \item{x}{numeric vector; used as the response variable in the smooth
+    function. The principal curve algorithm fits a separate scatterplot
+    smoother (or similar smoother) to each  variable in \code{X}
+    in turn as the response.}
+  \item{choose}{logical; should the underlying smoother function be
+    allowed to choose the degree of smooth complexity for each
+    variable?}
+  \item{complexity}{numeric; the complexity of the fitted smooth
+    functions.}
+  \item{penalty, cv, keep.data, control.spar}{arguments to
+    \code{\link{smooth.spline}}.}
+  \item{bs, family}{arguments to \code{\link[mgcv]{s}}.}
+  \item{method, select, control}{arguments to \code{\link{gam}}.}
+  \item{\dots}{arguments passed on the the underlying function
+    \code{\link{smooth.spline}} and users should read that function's
+    help page for further details.}
+}
+
+\value{
+  An object of class \code{"prcurveSmoother"} with the following
+  components:
+
+  \item{lambda}{for each observations, its arc-length from the beginning
+    of the curve.}
+  \item{x}{numeric vector of response values.}
+  \item{fitted.values}{numeric vector of fitted values for the
+    observations generated from the fitted smooth function.}
+  \item{complexity}{numeric; the degrees of freedom used for the smooth
+    function. The exact details of what these pertain to are in the help
+    for the respective fitting functions \code{\link{smooth.spline}} and
+    \code{\link[mgcv]{gam}}.}
+  \item{model}{the object fitted by the wrapped fitting function.}
+}
+
+\author{
+  Gavin L. Simpson
+}
+
+\seealso{
+  \code{\link{prcurve}} for how these functions are used.
+}
+
+\keyword{multivariate}
+\keyword{nonparametric}
+\keyword{smooth}
\ No newline at end of file



More information about the Analogue-commits mailing list