[Analogue-commits] r217 - in pkg: R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue May 24 10:47:38 CEST 2011
Author: gsimpson
Date: 2011-05-24 10:47:38 +0200 (Tue, 24 May 2011)
New Revision: 217
Modified:
pkg/R/initCurve.R
pkg/R/prcurve.R
pkg/R/smoothSpline.R
pkg/inst/ChangeLog
pkg/man/prcurve.Rd
Log:
add final version 1 of principal curve fitting code and documentation
Modified: pkg/R/initCurve.R
===================================================================
--- pkg/R/initCurve.R 2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/initCurve.R 2011-05-24 08:47:38 UTC (rev 217)
@@ -1,7 +1,7 @@
## initCurve: initialise the PC from one of several starting
## configurations
-initCurve <- function(X, method = c("ca","pca","random"),
- rank = FALSE, axis = 1) {
+initCurve <- function(X, method = c("ca","pca","random","user"),
+ rank = FALSE, axis = 1, start) {
## X must be a matrix, attempt to coerce
if(!isTRUE(all.equal(class(X), "matrix")))
X <- data.matrix(X)
@@ -24,7 +24,9 @@
scaling = 0))
},
random = {lambda <- sample.int(NROW(X))
- }
+ },
+ user = {lambda <- start
+ }
)
dist <- sum(diag(var(X))) * (NROW(X) - 1)
## Ordering of obs. along PCur
Modified: pkg/R/prcurve.R
===================================================================
--- pkg/R/prcurve.R 2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/prcurve.R 2011-05-24 08:47:38 UTC (rev 217)
@@ -4,13 +4,24 @@
## in package pcurve is too complex for our needs
## prcurve (named after prcomp): fits a principal curve to matrix X
-prcurve <- function(X, method = c("ca","pca","random"),
+prcurve <- function(X,
+ method = c("ca","pca","random","user"),
+ start = NULL,
smoother = smoothSpline,
- complexity, vary = FALSE, maxComp,
+ complexity,
+ vary = FALSE,
+ maxComp,
finalCV = FALSE,
- axis = 1, rank = FALSE, stretch = 2,
- maxit = 10, trace = FALSE, thresh = 0.001,
- plotit = FALSE, ...) {
+ axis = 1,
+ rank = FALSE,
+ stretch = 2,
+ maxit = 10,
+ trace = FALSE,
+ thresh = 0.001,
+ plotit = FALSE,
+ ## fitFUN = c("princurve","pcurve"),
+ ## latent = FALSE,
+ ...) {
## X should be a matrix, attempt to coerce
if(!isTRUE(all.equal(class(X), "matrix")))
X <- data.matrix(X)
@@ -19,13 +30,21 @@
method <- "ca"
else
method <- match.arg(method)
+ ## ## set/select default fitting function
+ ## if(missing(fitFUN))
+ ## fitFUN <- "princurve"
+ ## else
+ ## fitFUN <- match.arg(fitFUN)
+ ## if(latent && fitFUN == "princurve")
+ ## warning("Scaling PC to a latent variable not availble with fitFUN = \"princurve\".")
## data stats
n <- NROW(X) ## number of observations
m <- NCOL(X) ## number of variables
## starting configuration
config <- startConfig <- initCurve(X, method = method,
rank = rank,
- axis = axis)
+ axis = axis,
+ start = start)
## Need to sort out auto DF choices after pcurve::pcurve
## Vary degrees of freedom per variable?
if(missing(complexity)) {
@@ -46,12 +65,15 @@
}
if(missing(maxComp))
maxComp <- 5 * log10(n)
- ## fix-upreset complexity > maxComp to maxComp
+ ## fix-up/reset complexity > maxComp to maxComp
complexity[complexity > maxComp] <- maxComp
##
iter <- 0L
- if(trace)
+ if(trace) {
+ writeLines(strwrap(tmp <- paste(rep("-", options("width")[[1]]),
+ collapse = "")))
writeLines(sprintf("Initial curve: d.sq: %.4f", config$dist))
+ }
##dist.raw <- sum(diag(var(X))) * (NROW(X) - 1)
dist.old <- sum(diag(var(X)))
s <- matrix(NA, nrow = n, ncol = m)
@@ -65,8 +87,15 @@
complexity = complexity[j],
choose = FALSE, ...))
}
+ ##
dist.old <- config$dist
- config <- get.lam(X, s = s, stretch = stretch)
+ ## if(fitFUN == "princurve") {
+ config <- get.lam(X, s = s, stretch = stretch)
+ ## } else {
+ ## uni.lam <- sort(unique(config$lambda))
+ ## config <- pcget.lam(X, s = s, latent = latent, stretch = stretch,
+ ## uni.lam = uni.lam)
+ ## }
class(config) <- "prcurve"
## Converged?
converged <- (abs((dist.old - config$dist)/dist.old) <=
@@ -103,11 +132,20 @@
plot(config, X)
}
if (trace)
- writeLines(sprintf(paste("Iteration %",
- max(3, nchar(maxit)),
+ writeLines(sprintf(paste("Iteration %", max(3, nchar(maxit)),
"s: d.sq: %.4f", sep = ""),
"CV", config$dist))
}
+ if(trace){
+ writeLines(strwrap(tmp))
+ if(converged) {
+ writeLines(strwrap(paste("PC Converged in", iter, "iterations.")))
+ } else {
+ writeLines(strwrap(paste("PC did not converge after", iter,
+ "iterations.")))
+ }
+ writeLines(strwrap(tmp))
+ }
names(config$tag) <- names(config$lambda) <-
rownames(config$s) <- rownames(X)
colnames(config$s) <- names(complexity) <- colnames(X)
@@ -115,6 +153,7 @@
config$iter <- iter
config$totalDist <- startConfig$dist
config$complexity <- complexity
+ ## config$fitFUN <- fitFUN
config$call <- match.call()
class(config) <- c("prcurve")
return(config)
Modified: pkg/R/smoothSpline.R
===================================================================
--- pkg/R/smoothSpline.R 2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/R/smoothSpline.R 2011-05-24 08:47:38 UTC (rev 217)
@@ -18,7 +18,7 @@
control.spar = control.spar)
} else { ## use specified complexity
f <- smooth.spline(lambda, x, ..., df = complexity,
- penalty = penalty, cv = cv,
+ penalty = penalty, ## no cv as specifying df
keep.data = keep.data,
control.spar = control.spar)
}
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/inst/ChangeLog 2011-05-24 08:47:38 UTC (rev 217)
@@ -9,7 +9,9 @@
are available.
* prcurve: new function to fit principal curves to sediment
- core samples. A 'plot' method is also provided.
+ core samples. A 'plot' method is also provided. The function uses
+ functionality from the princurve package, which is now a
+ dependency.
Several support functions are also provided; 'smoothSpline' is
a wrapper to 'smooth.spline' for fitting splines to individual
@@ -36,7 +38,7 @@
* tran: improvements to the underlying code.
- * distance: reslience to NA in "gower", "alt.gower", "mixed".
+ * distance: resilience to NA in "gower", "alt.gower", "mixed".
* cma: added methods for 'mat' and 'predict.mat' objects. These
allow you to retrieve the k-closest analogues for training set
Modified: pkg/man/prcurve.Rd
===================================================================
--- pkg/man/prcurve.Rd 2011-03-28 19:37:50 UTC (rev 216)
+++ pkg/man/prcurve.Rd 2011-05-24 08:47:38 UTC (rev 217)
@@ -12,7 +12,7 @@
cloud of data points for a certain definition of `middle'.
}
\usage{
-prcurve(X, method = c("ca", "pca", "random"),
+prcurve(X, method = c("ca", "pca", "random", "user"), start = NULL,
smoother = smoothSpline, complexity, vary = FALSE,
maxComp, finalCV = FALSE, axis = 1, rank = FALSE,
stretch = 2, maxit = 10, trace = FALSE, thresh = 0.001,
@@ -30,11 +30,13 @@
\item{X}{a matrix-like object containing the variables to which the
principal curve is to be fitted.}
\item{method}{character; method to use when initialising the principal
- curve. \code{"ca"} fits a correspondence analysis to \code{X} and uses
- the \code{axis}-th axis scores as the initial curve. \code{"pca"} does
- the same but fits a principal components analysis to
- \code{X}. \code{"random"} produces a random ordering as the initial
- curve.}
+ curve. \code{"ca"} fits a correspondence analysis to \code{X} and uses
+ the \code{axis}-th axis scores as the initial curve. \code{"pca"} does
+ the same but fits a principal components analysis to
+ \code{X}. \code{"random"} produces a random ordering as the initial
+ curve.}
+ \item{start}{numeric vector specifying the initial curve when
+ \code{method = "user"}. Must be of length \code{nrow(X)}.}
\item{smoother}{function; the choice of smoother used to fit the
principal curve. Currently, the only option is \code{smoothSpline}
which is a wrapper to \code{\link{smooth.spline}}.}
More information about the Analogue-commits
mailing list