[Analogue-commits] r217 - in pkg: R inst man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue May 24 10:47:38 CEST 2011


Author: gsimpson
Date: 2011-05-24 10:47:38 +0200 (Tue, 24 May 2011)
New Revision: 217

Modified:
   pkg/R/initCurve.R
   pkg/R/prcurve.R
   pkg/R/smoothSpline.R
   pkg/inst/ChangeLog
   pkg/man/prcurve.Rd
Log:
add final version 1 of principal curve fitting code and documentation

Modified: pkg/R/initCurve.R
===================================================================
--- pkg/R/initCurve.R	2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/initCurve.R	2011-05-24 08:47:38 UTC (rev 217)
@@ -1,7 +1,7 @@
 ## initCurve: initialise the PC from one of several starting
 ## configurations
-initCurve <- function(X, method = c("ca","pca","random"),
-                      rank = FALSE, axis = 1) {
+initCurve <- function(X, method = c("ca","pca","random","user"),
+                      rank = FALSE, axis = 1, start) {
     ## X must be a matrix, attempt to coerce
     if(!isTRUE(all.equal(class(X), "matrix")))
         X <- data.matrix(X)
@@ -24,7 +24,9 @@
                                              scaling = 0))
               },
            random = {lambda <- sample.int(NROW(X))
-                 }
+                 },
+           user = {lambda <- start
+               }
            )
     dist <- sum(diag(var(X))) * (NROW(X) - 1)
     ## Ordering of obs. along PCur

Modified: pkg/R/prcurve.R
===================================================================
--- pkg/R/prcurve.R	2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/prcurve.R	2011-05-24 08:47:38 UTC (rev 217)
@@ -4,13 +4,24 @@
 ## in package pcurve is too complex for our needs
 
 ## prcurve (named after prcomp): fits a principal curve to matrix X
-prcurve <- function(X, method = c("ca","pca","random"),
+prcurve <- function(X,
+                    method = c("ca","pca","random","user"),
+                    start = NULL,
                     smoother = smoothSpline,
-                    complexity, vary = FALSE, maxComp,
+                    complexity,
+                    vary = FALSE,
+                    maxComp,
                     finalCV = FALSE,
-                    axis = 1, rank = FALSE, stretch = 2,
-                    maxit = 10, trace = FALSE, thresh = 0.001,
-                    plotit = FALSE, ...) {
+                    axis = 1,
+                    rank = FALSE,
+                    stretch = 2,
+                    maxit = 10,
+                    trace = FALSE,
+                    thresh = 0.001,
+                    plotit = FALSE,
+                    ## fitFUN = c("princurve","pcurve"),
+                    ## latent = FALSE,
+                    ...) {
     ## X should be a matrix, attempt to coerce
     if(!isTRUE(all.equal(class(X), "matrix")))
         X <- data.matrix(X)
@@ -19,13 +30,21 @@
         method <- "ca"
     else
         method <- match.arg(method)
+    ## ## set/select default fitting function
+    ## if(missing(fitFUN))
+    ##     fitFUN <- "princurve"
+    ## else
+    ##     fitFUN <- match.arg(fitFUN)
+    ## if(latent && fitFUN == "princurve")
+    ##     warning("Scaling PC to a latent variable not availble with fitFUN = \"princurve\".")
     ## data stats
     n <- NROW(X) ## number of observations
     m <- NCOL(X) ## number of variables
     ## starting configuration
     config <- startConfig <- initCurve(X, method = method,
                                        rank = rank,
-                                       axis = axis)
+                                       axis = axis,
+                                       start = start)
     ## Need to sort out auto DF choices after pcurve::pcurve
     ## Vary degrees of freedom per variable?
     if(missing(complexity)) {
@@ -46,12 +65,15 @@
     }
     if(missing(maxComp))
         maxComp <- 5 * log10(n)
-    ## fix-upreset complexity > maxComp to maxComp
+    ## fix-up/reset complexity > maxComp to maxComp
     complexity[complexity > maxComp] <- maxComp
     ##
     iter <- 0L
-    if(trace)
+    if(trace) {
+        writeLines(strwrap(tmp <- paste(rep("-", options("width")[[1]]),
+                                        collapse = "")))
         writeLines(sprintf("Initial curve: d.sq: %.4f", config$dist))
+    }
     ##dist.raw <- sum(diag(var(X))) * (NROW(X) - 1)
     dist.old <- sum(diag(var(X)))
     s <- matrix(NA, nrow = n, ncol = m)
@@ -65,8 +87,15 @@
                                       complexity = complexity[j],
                                       choose = FALSE, ...))
         }
+        ##
         dist.old <- config$dist
-        config <- get.lam(X, s = s, stretch = stretch)
+        ## if(fitFUN == "princurve") {
+            config <- get.lam(X, s = s, stretch = stretch)
+        ## } else {
+        ##     uni.lam <- sort(unique(config$lambda))
+        ##     config <- pcget.lam(X, s = s, latent = latent, stretch = stretch,
+        ##                         uni.lam = uni.lam)
+        ## }
         class(config) <- "prcurve"
         ## Converged?
         converged <- (abs((dist.old - config$dist)/dist.old) <=
@@ -103,11 +132,20 @@
             plot(config, X)
         }
         if (trace)
-            writeLines(sprintf(paste("Iteration %",
-                                     max(3, nchar(maxit)),
+            writeLines(sprintf(paste("Iteration %", max(3, nchar(maxit)),
                                      "s: d.sq: %.4f", sep = ""),
                                "CV", config$dist))
     }
+    if(trace){
+        writeLines(strwrap(tmp))
+        if(converged) {
+            writeLines(strwrap(paste("PC Converged in", iter, "iterations.")))
+        } else {
+            writeLines(strwrap(paste("PC did not converge after", iter,
+                                     "iterations.")))
+        }
+        writeLines(strwrap(tmp))
+    }
     names(config$tag) <- names(config$lambda) <-
         rownames(config$s) <- rownames(X)
     colnames(config$s) <- names(complexity) <- colnames(X)
@@ -115,6 +153,7 @@
     config$iter <- iter
     config$totalDist <- startConfig$dist
     config$complexity <- complexity
+    ## config$fitFUN <- fitFUN
     config$call <- match.call()
     class(config) <- c("prcurve")
     return(config)

Modified: pkg/R/smoothSpline.R
===================================================================
--- pkg/R/smoothSpline.R	2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/smoothSpline.R	2011-05-24 08:47:38 UTC (rev 217)
@@ -18,7 +18,7 @@
                            control.spar = control.spar)
     } else { ## use specified complexity
         f <- smooth.spline(lambda, x, ..., df = complexity,
-                           penalty = penalty, cv = cv,
+                           penalty = penalty, ## no cv as specifying df
                            keep.data = keep.data,
                            control.spar = control.spar)
     }

Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog	2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/inst/ChangeLog	2011-05-24 08:47:38 UTC (rev 217)
@@ -9,7 +9,9 @@
 	are available.
 
 	* prcurve: new function to fit principal curves to sediment
-	core samples. A 'plot' method is also provided.
+	core samples. A 'plot' method is also provided. The function uses
+	functionality from the princurve package, which is now a
+	dependency.
 
 	Several support functions are also provided; 'smoothSpline' is
 	a wrapper to 'smooth.spline' for fitting splines to individual
@@ -36,7 +38,7 @@
 
 	* tran: improvements to the underlying code.
 
-	* distance: reslience to NA in "gower", "alt.gower", "mixed".
+	* distance: resilience to NA in "gower", "alt.gower", "mixed".
 
 	* cma: added methods for 'mat' and 'predict.mat' objects. These
 	allow you to retrieve the k-closest analogues for training set

Modified: pkg/man/prcurve.Rd
===================================================================
--- pkg/man/prcurve.Rd	2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/man/prcurve.Rd	2011-05-24 08:47:38 UTC (rev 217)
@@ -12,7 +12,7 @@
   cloud of data points for a certain definition of `middle'.
 }
 \usage{
-prcurve(X, method = c("ca", "pca", "random"),
+prcurve(X, method = c("ca", "pca", "random", "user"), start = NULL,
         smoother = smoothSpline, complexity, vary = FALSE,
         maxComp, finalCV = FALSE, axis = 1, rank = FALSE,
         stretch = 2, maxit = 10, trace = FALSE, thresh = 0.001,
@@ -30,11 +30,13 @@
   \item{X}{a matrix-like object containing the variables to which the
     principal curve is to be fitted.}
   \item{method}{character; method to use when initialising the principal
-  curve. \code{"ca"} fits a correspondence analysis to \code{X} and uses
-  the \code{axis}-th axis scores as the initial curve. \code{"pca"} does
-  the same but fits a principal components analysis to
-  \code{X}. \code{"random"} produces a random ordering as the initial
-  curve.}
+    curve. \code{"ca"} fits a correspondence analysis to \code{X} and uses
+    the \code{axis}-th axis scores as the initial curve. \code{"pca"} does
+    the same but fits a principal components analysis to
+    \code{X}. \code{"random"} produces a random ordering as the initial
+    curve.}
+  \item{start}{numeric vector specifying the initial curve when
+    \code{method = "user"}. Must be of length \code{nrow(X)}.}
   \item{smoother}{function; the choice of smoother used to fit the
     principal curve. Currently, the only option is \code{smoothSpline}
     which is a wrapper to \code{\link{smooth.spline}}.}



More information about the Analogue-commits mailing list