[Chemospec-commits] r55 - in pkg: . chemospec chemospec/R chemospec/data chemospec/inst chemospec/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Sep 24 02:40:34 CEST 2009


Author: bryanhanson
Date: 2009-09-24 02:40:34 +0200 (Thu, 24 Sep 2009)
New Revision: 55

Added:
   pkg/chemospec/
   pkg/chemospec/DESCRIPTION
   pkg/chemospec/NEWS
   pkg/chemospec/R/
   pkg/chemospec/R/HCA.R
   pkg/chemospec/R/ScoreHCA.R
   pkg/chemospec/R/chkSpectra.R
   pkg/chemospec/R/classPCA.R
   pkg/chemospec/R/colLeaf.R
   pkg/chemospec/R/getManyCsv.R
   pkg/chemospec/R/groupNcolor.R
   pkg/chemospec/R/labelExtremes.R
   pkg/chemospec/R/pcaBoot.R
   pkg/chemospec/R/pcaDiag.R
   pkg/chemospec/R/plot2Loadings.R
   pkg/chemospec/R/plotLoadings.R
   pkg/chemospec/R/plotScores.R
   pkg/chemospec/R/plotScoresCor.R
   pkg/chemospec/R/plotScoresDecoration.R
   pkg/chemospec/R/plotScree.R
   pkg/chemospec/R/plotSpectra.R
   pkg/chemospec/R/q2rPCA.R
   pkg/chemospec/R/r2qPCA.R
   pkg/chemospec/R/removeSample.R
   pkg/chemospec/R/robPCA.R
   pkg/chemospec/R/specSurvey.R
   pkg/chemospec/R/sumSpectra.R
   pkg/chemospec/R/trimNbin.R
   pkg/chemospec/data/
   pkg/chemospec/data/CuticleIR.RData
   pkg/chemospec/inst/
   pkg/chemospec/inst/CITATION
   pkg/chemospec/man/
   pkg/chemospec/man/.Rhistory
   pkg/chemospec/man/ChemoSpec-package.Rd
   pkg/chemospec/man/ChemoSpec-package.html
   pkg/chemospec/man/CuticleIR.Rd
   pkg/chemospec/man/CuticleIR.html
   pkg/chemospec/man/HCA.Rd
   pkg/chemospec/man/HCA.html
   pkg/chemospec/man/ScoreHCA.Rd
   pkg/chemospec/man/Spectra.Rd
   pkg/chemospec/man/Spectra.html
   pkg/chemospec/man/chkSpectra.Rd
   pkg/chemospec/man/chkSpectra.html
   pkg/chemospec/man/classPCA.Rd
   pkg/chemospec/man/classPCA.html
   pkg/chemospec/man/colLeaf.Rd
   pkg/chemospec/man/colLeaf.html
   pkg/chemospec/man/getManyCsv.Rd
   pkg/chemospec/man/getManyCsv.html
   pkg/chemospec/man/groupNcolor.Rd
   pkg/chemospec/man/groupNcolor.html
   pkg/chemospec/man/labelExtremes.Rd
   pkg/chemospec/man/labelExtremes.html
   pkg/chemospec/man/pcaBoot.Rd
   pkg/chemospec/man/pcaBoot.html
   pkg/chemospec/man/pcaDiag.Rd
   pkg/chemospec/man/pcaDiag.html
   pkg/chemospec/man/plot2Loadings.Rd
   pkg/chemospec/man/plot2loadings.html
   pkg/chemospec/man/plotLoadings.Rd
   pkg/chemospec/man/plotLoadings.html
   pkg/chemospec/man/plotScores.Rd
   pkg/chemospec/man/plotScores.html
   pkg/chemospec/man/plotScoresCor.Rd
   pkg/chemospec/man/plotScoresCor.html
   pkg/chemospec/man/plotScoresDecoration.Rd
   pkg/chemospec/man/plotScoresDecoration.html
   pkg/chemospec/man/plotScree.Rd
   pkg/chemospec/man/plotScree.html
   pkg/chemospec/man/plotSpectra.Rd
   pkg/chemospec/man/plotSpectra.html
   pkg/chemospec/man/q2rPCA.Rd
   pkg/chemospec/man/q2rPCA.html
   pkg/chemospec/man/removeSample.Rd
   pkg/chemospec/man/removeSample.html
   pkg/chemospec/man/robPCA.Rd
   pkg/chemospec/man/robPCA.html
   pkg/chemospec/man/scoreHCA.html
   pkg/chemospec/man/specSurvey.Rd
   pkg/chemospec/man/specSurvey.html
   pkg/chemospec/man/sumSpectra.Rd
   pkg/chemospec/man/sumSpectra.html
   pkg/chemospec/man/trimNbin.Rd
   pkg/chemospec/man/trimNbin.html
Log:
v 1.1 upload

Added: pkg/chemospec/DESCRIPTION
===================================================================
--- pkg/chemospec/DESCRIPTION	                        (rev 0)
+++ pkg/chemospec/DESCRIPTION	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,12 @@
+Package: ChemoSpec
+Type: Package
+Title: Exploratory Chemometrics for Spectroscopy
+Version: 1.1
+Date: 2009-09-23
+Author: Bryan A. Hanson DePauw University, Greencastle Indiana USA
+Maintainer: Bryan A. Hanson <hanson at depauw.edu>
+Description: A collection of functions for plotting spectra (NMR, IR etc) and carrying out various forms of exploratory data analysis, such as HCA and PCA.  The design allows comparison of data from samples which fall into groups such as treatment vs. control.  Robust methods appropriate for this type of high-dimensional data are available.  ChemoSpec is designed to be very user friendly and suitable for people with limited background in R.
+License: GPL-3
+LazyLoad: yes
+Depends: chemometrics, robustbase, RColorBrewer, plyr, pcaPP, mvtnorm, mvoutlier, pls, lattice
+URL: http://academic.depauw.edu/~hanson/ChemoSpec/ChemoSpec.html

Added: pkg/chemospec/NEWS
===================================================================
--- pkg/chemospec/NEWS	                        (rev 0)
+++ pkg/chemospec/NEWS	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,18 @@
+
+
+NEWS file for package ChemoSpec
+Exploratory Chemometrics for Spectroscopy
+Bryan A. Hanson DePauw University, Greencastle Indiana USA
+URL: http://academic.depauw.edu/~hanson/ChemoSpec/ChemoSpec.html
+
+##### Version: 1.1
+Date: 2009-09-23
+News:
+Modified colLeaf so that the leaf label size would vary with the number of leaves to be plotted.
+Added ... to plot arguments in HCA so that user can pass additional plotting parameters.
+Added function plotSpectra which was overlooked in the first build!
+Added a description of Spectra objects to the man pages.
+
+##### Version: 1.0
+Date: 2009-09-08
+News: First Release

Added: pkg/chemospec/R/HCA.R
===================================================================
--- pkg/chemospec/R/HCA.R	                        (rev 0)
+++ pkg/chemospec/R/HCA.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,28 @@
+HCA <-
+function(spectra, title = "no title provided", method = "complete", ...) {
+
+# Function to carry out HCA, basically a wrapper to existing methods
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+
+	if (missing(spectra)) stop("No spectral data provided")
+	chkSpectra(spectra)	
+	
+	distance <- dist(as.data.frame(spectra$data, row.names = spectra$names))
+	cluster <- as.dendrogram(hclust(distance), method = method)
+	cluster <- dendrapply(cluster, colLeaf, spectra)
+	title <- paste(title, ": HCA Analysis", sep = "")
+	sub.title <- paste("Clustering method: ", method, sep = "")
+	plot(cluster, main = title, sub = sub.title, horiz = FALSE, ...)
+	
+	leg.txt <- levels(spectra$group)
+	leg.col <- NULL
+	for (z in 1:length(leg.txt)) {
+		i <- match(leg.txt[z], spectra$group)
+		leg.col[z] <- spectra$colors[i]
+		}
+	leg.txt <- c("Key", leg.txt)
+	leg.col <- c("black", leg.col)
+	legend("topright", leg.txt, text.col = leg.col, bty = "n")
+	}
+

Added: pkg/chemospec/R/ScoreHCA.R
===================================================================
--- pkg/chemospec/R/ScoreHCA.R	                        (rev 0)
+++ pkg/chemospec/R/ScoreHCA.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,34 @@
+ScoreHCA <-
+function(spectra, pca, title = "no title provided", scores = c(1:5), method = "complete", ...) {
+
+# Function to carry out HCA on PCA scores
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+
+	
+	if (missing(spectra)) stop("No spectral data set provided")
+	if (missing(pca)) stop("No pca results provided")
+	if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")
+	chkSpectra(spectra)
+
+### need to add conversion of pca modes here
+
+	distance <- dist(as.data.frame(pca$x[,scores], row.names = spectra$names))
+	cluster <- as.dendrogram(hclust(distance), method = method)
+	cluster <- dendrapply(cluster, colLeaf, spectra)
+	title <- paste(title, ": HCA Analysis of PC Scores", sep = "")
+
+	sub.title <- paste("Clustering method:", method, "  PCA method:", pca$method, sep = " ")
+	plot(cluster, main = title, sub = sub.title, horiz = FALSE)
+	
+	leg.txt <- levels(spectra$groups)
+	leg.col <- NULL
+	for (z in 1:length(leg.txt)) {
+		i <- match(leg.txt[z], spectra$groups)
+		leg.col[z] <- spectra$colors[i]
+		}
+	leg.txt <- c("Key", leg.txt)
+	leg.col <- c("black", leg.col)
+	legend("topright", leg.txt, text.col = leg.col, bty = "n")
+	}
+

Added: pkg/chemospec/R/chkSpectra.R
===================================================================
--- pkg/chemospec/R/chkSpectra.R	                        (rev 0)
+++ pkg/chemospec/R/chkSpectra.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,43 @@
+chkSpectra <-
+function(spectra, confirm = FALSE) {
+
+# Function to Check the Integrity of Spectra Objects
+# Related to the concept of validObject for S4 classes
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# Many kinds of errors may be missed due to coercion:
+# e.g. spectra$colors[22] <- 1.2 creates "1.2" as the entry
+# Only complete replacement of $colors with all integers throws an error.
+	
+	if (missing(spectra)) stop("No object of supposed class Spectra provided")
+	w <- FALSE
+	if (!class(spectra) == "Spectra") { warning("The object provided was not of class Spectra"); w <- TRUE }
+	if (!class(spectra$freq) == "numeric") { warning("The frequency data appear to be corrupt"); w <- TRUE }
+	if (!class(spectra$data) == "matrix") { warning("The spectral intensities appear to be corrupt"); w <- TRUE }
+	if (!class(spectra$names) == "character") { warning("The sample names appear to be corrupt"); w <- TRUE }
+	if (!class(spectra$color) == "character") { warning("The assigned colors appear to be corrupt"); w <- TRUE }
+	if (!class(spectra$unit) == "character") { warning("The units appear to be corrupt"); w <- TRUE }
+	if (!class(spectra$desc) == "character") { warning("The description appears to be corrupt"); w <- TRUE }
+	if (!class(spectra$groups) == "factor") { warning("The assigned groups appear to be corrupt"); w <- TRUE }
+	
+	f <- length(spectra$freq)
+	d2 <- dim(spectra$data)[2]
+	n <- length(spectra$names)
+	g <- length(spectra$groups)
+	co <- length(spectra$colors)
+	d1 <- dim(spectra$data)[1]
+	
+	if (!identical(f, d2)) { warning("The dimensions don't make sense (freq, data)"); w <- TRUE }
+	if (!identical(n, g)) { warning("The dimensions don't make sense (names, group)"); w <- TRUE }
+	if (!identical(g, co)) { warning("The dimensions don't make sense (group, colors)"); w <- TRUE }
+	if (!identical(co, d1)) { warning("The dimensions don't make sense (colors, data)"); w <- TRUE }
+	
+	if ((!w) && (confirm)) cat("You must be awesome: These spectra look just dandy!")
+	if (w) {
+		cat("*** There seem to be one or more problems with these spectra!\n")
+		cat("*** Is the spectral data set loaded different from the one you tried to plot?\n")
+		stop("Sorry, we can't continue this way: It's not me, it's you!")
+		}
+	
+	}
+

Added: pkg/chemospec/R/classPCA.R
===================================================================
--- pkg/chemospec/R/classPCA.R	                        (rev 0)
+++ pkg/chemospec/R/classPCA.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,41 @@
+classPCA <-
+function(spectra, choice = "noscale") {
+
+# Function to carry out non-robust Principal Components Analysis
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Sept 2009
+	
+	if (missing(spectra)) stop("No spectral data set passed to PCA")
+	if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")
+	
+	choices <- c("noscale", "autoscale", "Pareto") # trap for invalid scaling method
+	check <- choice %in% choices
+	if (!check) stop("The choice of scaling parameter was invalid")
+	
+	# First, row scale (compensates for different dilutions/handling of samples)
+
+	t.data <- t(spectra$data)
+	sums <- colSums(t.data)
+	row.scaled <- t(scale(t.data, center = FALSE, scale = sums))
+
+	# Center & scale the data using the desired method.
+
+	if (identical(choice, "noscale")) {centscaled <- scale(row.scaled, center = TRUE, scale = FALSE)}
+	
+	if (identical(choice, "autoscale")) {
+		col.sd <- sd(row.scaled)
+		centscaled <- scale(row.scaled, center = TRUE, scale = col.sd)}
+
+	if (identical(choice, "Pareto")) {
+		col.sd <- sd(row.scaled)
+		centscaled <- scale(row.scaled, center = TRUE, scale = col.sd^0.5)}
+	
+	# Now the PCA!
+	
+	pca <- prcomp(centscaled, retx = TRUE, center = FALSE, scale. = FALSE)
+	pca$method <- paste("centered/", choice, "/", "classical", sep = "")
+	
+	pca
+				
+	}
+

Added: pkg/chemospec/R/colLeaf.R
===================================================================
--- pkg/chemospec/R/colLeaf.R	                        (rev 0)
+++ pkg/chemospec/R/colLeaf.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,20 @@
+colLeaf <-
+function(n, spectra) { # this is called iteratively by dendrapply
+
+# A little trick to color leaves properly, derived from the archives
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+
+	lab.size = 1.0
+	if(length(spectra$names) > 20) lab.size = 0.75
+	if(length(spectra$names) > 50) lab.size = 0.5
+	
+	if(is.leaf(n)) {
+		a <- attributes(n)
+		i <- match(a$label, spectra$names)
+		
+		attr(n, "nodePar") <- c(a$nodePar, list(lab.col = spectra$colors[i], pch = NA, lab.cex = lab.size))
+		}
+		n
+	}
+

Added: pkg/chemospec/R/getManyCsv.R
===================================================================
--- pkg/chemospec/R/getManyCsv.R	                        (rev 0)
+++ pkg/chemospec/R/getManyCsv.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,52 @@
+getManyCsv <-
+function(gr.crit = NULL, gr.cols = c("auto"),
+	freq.unit = "no frequency unit provided",
+	NMR = FALSE,
+	int.unit = "no intensity unit provided",
+	descrip = "no description provided",
+	out.file = "mydata") {
+		
+# Function to Read & Prep Spectroscopic Data
+# from raw data files into an RData file/object
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# This script reads a series of csv files that share a common frequency axis.
+# Files should have freq in column 1, absorbance/intensity in column 2.
+# There should be no column labels.
+# Groups & colors assigned using file names by a separate function.
+
+	if (is.null(gr.crit)) stop("No group criteria provided to encode data")
+
+	files <- list.files(pattern = "\\.(csv|CSV)$")
+		
+	# get the first file, initialize stuff, and pull out the frequency data
+
+	temp <- read.csv(files[1])
+	spectra <- list() # OK to initialize a list this way
+	spectra$freq <- temp[,1]
+	spectra$data <- matrix(data = 0.0, nrow = length(files), ncol = length(spectra$freq))
+	
+	# loop over all files (you have to read the whole file then grab
+	# just the part you want, i.e. the 2nd column)
+
+	files.noext <- substr(basename(files), 1, nchar(basename(files)) - 4)
+
+	for (i in 1:length(files)) {
+		temp <- read.csv(files[i])
+		spectra$data[i,] <- temp[,2]
+		spectra$names[i] <- files.noext[i] # need to remove the .csv
+		}
+
+	# go get group assignments & colors, to complete assembly of spectra
+
+	spectra <- groupNcolor(spectra, gr.crit, gr.cols)
+	
+	spectra$unit[1] <- freq.unit
+	spectra$unit[2] <- int.unit
+	spectra$unit[3] <- NMR
+	spectra$desc <- descrip
+	
+	datafile <- paste(out.file, ".RData", sep = "")
+	save(spectra, file = datafile)
+	}
+

Added: pkg/chemospec/R/groupNcolor.R
===================================================================
--- pkg/chemospec/R/groupNcolor.R	                        (rev 0)
+++ pkg/chemospec/R/groupNcolor.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,42 @@
+groupNcolor <-
+function(spectra, gr.crit = NULL, gr.cols = c("auto")) {
+
+# Function to Process Group Assignments & Assign Colors
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# Acts on all csv files in a directory.
+
+	# use the group criteria (gr.crit) to classify the samples
+
+	spectra$groups <- rep(NA, length(spectra$names)) # intialize
+	
+	files <- list.files(pattern = "\\.(csv|CSV)")
+	files.noext <- substr(basename(files), 1, nchar(basename(files)) - 4)
+	
+	for (i in 1:length(gr.crit)) {
+		which <- grep(gr.crit[i], files.noext)
+		spectra$groups[which] <- gr.crit[i]
+		}
+	
+	spectra$groups <- as.factor(spectra$groups)
+	
+	# assign each group a color for plotting later
+
+	spectra$colors <- rep(NA, length(spectra$names)) # initialize
+	
+	if (identical(gr.cols[1], "auto")) {
+		gr.cols <- brewer.pal(length(gr.crit), "Set1")
+		for (i in 1:length(gr.crit)) {
+			which <- grep(gr.crit[i], files)
+			spectra$colors[which] <- gr.cols[i]
+			}
+	} else # match gr.cols with gr.crit & assign spectra$colors	
+		for (i in 1:length(gr.crit)) {
+			which <- grep(gr.crit[i], spectra$groups)
+			spectra$colors[which] <- gr.cols[i]
+			}
+
+	class(spectra) <- "Spectra"
+	return(spectra) # spectra is now complete!
+	}
+

Added: pkg/chemospec/R/labelExtremes.R
===================================================================
--- pkg/chemospec/R/labelExtremes.R	                        (rev 0)
+++ pkg/chemospec/R/labelExtremes.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,34 @@
+labelExtremes <-
+function(data, names, tol) {
+
+# Function to label extreme data points
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw Univ, Aug 2009
+	
+	px <- data[,1]
+	py <- data[,2]
+	pl <- names
+	if (is.numeric(pl)) pl <- format(pl, digits = 4)
+		
+	q.x <- quantile(px, probs = c(1.0-tol, tol))
+	sel.x <- (px <= q.x[2]) | (px >= q.x[1])
+	keep.x <- subset(px, sel.x)
+	keep.x <- match(keep.x, px) # need to keep this & corresponding y
+		
+	q.y <- quantile(py, probs = c(1.0-tol, tol))
+	sel.y <- (py <= q.y[2]) | (py >= q.y[1])
+	keep.y <- subset(py, sel.y)
+	keep.y <- match(keep.y, py) # need to keep this & corresponding x
+		
+	keep <- unique(c(keep.x, keep.y))
+
+	x <- px[keep]
+	y <- py[keep]
+	l <- pl[keep]
+
+	for(n in c(1:length(x))) {
+		text(x[n], y[n], l[n], pos = 4, offset = 0.2, cex = 0.5)
+		}
+			
+	}
+

Added: pkg/chemospec/R/pcaBoot.R
===================================================================
--- pkg/chemospec/R/pcaBoot.R	                        (rev 0)
+++ pkg/chemospec/R/pcaBoot.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,108 @@
+pcaBoot <-
+function (spectra, pcs, choice = "noscale", repl = 50, segments = 4, 
+    segment.type = c("random", "consecutive", "interleaved"), 
+    length.seg, trace = FALSE, ...) {
+
+# Function for CV of PCA scores (modified from Filzmosers version in {chemometrics})
+# Part of the ChemoSpec package.  Bryan Hanson, DePauw Univ, Sept 2009
+# Conducts classical, not robust, PCA
+
+    require(pls)
+	
+	if (missing(spectra)) stop("No spectral data set passed to PCA")
+	if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")
+	
+	choices <- c("noscale", "autoscale", "Pareto") # trap for invalid scaling method
+	check <- choice %in% choices
+	if (!check) stop("The choice of scaling parameter was invalid")
+	
+	# First, row scale (compensates for different dilutions/handling of samples)
+
+	t.data <- t(spectra$data)
+	sums <- colSums(t.data)
+	row.scaled <- t(scale(t.data, center = FALSE, scale = sums))
+
+	# Center & scale the data using the desired method.
+
+	if (identical(choice, "noscale")) {centscaled <- scale(row.scaled, center = TRUE, scale = FALSE)}
+	
+	if (identical(choice, "autoscale")) {
+		col.sd <- sd(row.scaled)
+		centscaled <- scale(row.scaled, center = TRUE, scale = col.sd)}
+
+	if (identical(choice, "Pareto")) {
+		col.sd <- sd(row.scaled)
+		centscaled <- scale(row.scaled, center = TRUE, scale = col.sd^0.5)}
+
+	# Filzmoser's stuff follows... (modified slightly)
+
+	X <- centscaled
+	amax <- pcs
+    if (missing(amax)) {
+        amax <- 5
+    }
+    else {
+        if (amax < 1 || amax > min(nrow(X) - 1, ncol(X))) 
+            stop("Invalid number of components, amax")
+    }
+    amax <- min(amax, nrow(X) - max(sapply(segments, length)) - 
+        1)
+    optcomp <- matrix(NA, nrow = segments, ncol = repl)
+    MSEP <- matrix(NA, nrow = repl, ncol = amax)
+    dimnames(MSEP) <- list(paste("rep", 1:repl), paste("PC", 
+        1:amax))
+    Fit <- matrix(NA, nrow = repl, ncol = amax)
+    dimnames(Fit) <- list(paste("rep", 1:repl), 1:amax)
+    for (i in 1:repl) {
+        if (missing(length.seg)) {
+            segment <- cvsegments(nrow(X), k = segments, type = segment.type)
+        }
+        else {
+            segment <- cvsegments(nrow(X), length.seg = length.seg, 
+                type = segment.type)
+        }
+        if (trace) 
+            cat(paste("Replication: ", i))
+        MSEPj <- matrix(NA, nrow = segments, ncol = amax)
+        Fitj <- matrix(NA, nrow = segments, ncol = amax)
+        for (n.seg in 1:length(segment)) {
+            if (trace) 
+                cat(n.seg, "")
+            seg <- segment[[n.seg]]
+            obsuse <- as.numeric(unlist(segment[-n.seg]))
+            Xtrain <- X[obsuse, ]
+            obstest <- as.numeric(unlist(segment[n.seg]))
+            Xtest <- X[obstest, ]
+            if (ncol(Xtrain) > nrow(Xtrain)) {
+                e <- eigen(Xtrain %*% t(Xtrain))
+                Ttrain <- e$vectors %*% diag(sqrt(e$values))
+                Ptrain <- t(Xtrain) %*% Ttrain %*% diag(1/e$values)
+            }
+            else {
+                Xtrain_svd <- svd(Xtrain)
+                Ptrain <- Xtrain_svd$v
+            }
+            Ttest <- Xtest %*% Ptrain
+            for (j in 1:amax) {
+                MSEPj[n.seg, j] <- sum((Xtest - Ttest[, 1:j] %*% 
+                  t(Ptrain[, 1:j]))^2)
+            }
+            Fitj[n.seg, ] <- MSEPj[n.seg, ]/sum(Xtest^2)
+        }
+        MSEP[i, ] <- apply(MSEPj, 2, mean)
+        Fit[i, ] <- 1 - apply(Fitj, 2, mean)
+    }
+    
+    # The plotting details have been modified quite a bit
+    
+    boxplot(as.data.frame(Fit), ylab = "Explained variance", 
+            xlab = "Number of components",
+            main = "Bootstrap Analysis of Number of PCs", ...)
+    # construct a legend based upon values of center & scale
+	note <- paste("centered/", choice, "/", "classical", sep = "")
+    leg.txt <- paste(spectra$desc, note, sep = " ")
+	legend("bottomright", leg.txt, bty = "n", cex = 0.75)
+    
+    list(ExplVar = Fit, MSEP = MSEP)
+}
+

Added: pkg/chemospec/R/pcaDiag.R
===================================================================
--- pkg/chemospec/R/pcaDiag.R	                        (rev 0)
+++ pkg/chemospec/R/pcaDiag.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,45 @@
+pcaDiag <-
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {
+
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})
+# Part of the ChemoSpec package.  Bryan Hanson, DePauw Univ, Sept 2009
+
+	if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)
+	X <- spectra$data
+	X.pca <- pca
+	a <- pcs
+    if (is.null(a)) a <- 3
+    
+	SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))
+    ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))
+    critSD <- sqrt(qchisq(quantile, a))
+    critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)
+    
+    sub <- paste(pca$method, a, "PCs", sep = " ")
+    if ("SD" %in% plot) {
+		plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", 
+			xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",
+			col = spectra$colors, pch = 20, ...)
+		abline(h = critSD, lty = 2)
+	
+		y.data <- subset(SDist, SDist > critSD)
+		x.data <- which(SDist %in% y.data, arr.ind = TRUE)
+		data <- cbind(x.data, y.data)
+		if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)
+        }
+        
+    if ("OD" %in% plot) {
+		plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", 
+			xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",
+			col = spectra$colors, pch = 20, ...)
+		abline(h = critOD, lty = 2)
+
+		y.data <- subset(ODist, ODist > critOD)
+		x.data <- which(ODist %in% y.data, arr.ind = TRUE)
+		data <- cbind(x.data, y.data)
+		if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)
+		}
+		
+    list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)
+	}
+

Added: pkg/chemospec/R/plot2Loadings.R
===================================================================
--- pkg/chemospec/R/plot2Loadings.R	                        (rev 0)
+++ pkg/chemospec/R/plot2Loadings.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,38 @@
+plot2Loadings <-
+function(spectra, pca, title = "no title provided", loads = c(1,2), tol = 0.05, ...) {
+	
+# Function to plot loadings against each other
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+
+	if (length(loads) != 2) stop("You must choose exactly 2 loadings to plot.")
+	if (missing(spectra)) stop("No spectral data set provided")
+	if (missing(pca)) stop("No PCA results provided")
+	if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")
+	chkSpectra(spectra)
+	
+	# pull the requested loadings
+
+	loadings1 = pca$rotation[,loads[1]]
+	loadings2 = pca$rotation[,loads[2]]
+	
+	eigensum <- sum(pca$sdev*pca$sdev) # prepare axis labels
+	variance <- 100*(pca$sdev*pca$sdev/eigensum)
+	txt1 <- paste("PC", loads[1], " (", format(variance[loads[1]], digits=2), "%", ") loadings", sep = "")
+	txt2 <- paste("PC", loads[2], " (", format(variance[loads[2]], digits=2), "%", ") loadings", sep = "")
+
+	title <- paste(title, ": Covariance of Loadings", sep = "")
+	
+	xrange <- range(loadings1)*c(1.0, 1.05) # makes room for labels
+	yrange <- range(loadings2)*c(1.0, 1.05)
+
+	plot(loadings1, loadings2, main = title, xlab = txt1, ylab = txt2, pch = 20, xlim = xrange, ylim = yrange)
+	abline(v = 0.0, col = "lightgray")
+	abline(h = 0.0, col = "lightgray")
+	legend("bottomleft", y = NULL, pca$method, bty = "n", cex = 0.75)
+
+	# Next, if requested, we will label the extreme points on both dimensions
+	
+	if (is.numeric(tol)) labelExtremes(pca$rotation[,loads], spectra$freq, tol)	
+	}
+

Added: pkg/chemospec/R/plotLoadings.R
===================================================================
--- pkg/chemospec/R/plotLoadings.R	                        (rev 0)
+++ pkg/chemospec/R/plotLoadings.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,55 @@
+plotLoadings <-
+function(spectra, pca, title = "no title provided",
+	loads = c(1), ref = 1, ...) {
+	
+# Function to plot loadings vs. frequencies
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+# This is the lattice version
+
+	if (missing(spectra)) stop("No spectral data set provided")
+	if (missing(pca)) stop("No PCA results provided")
+	if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")
+	if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")
+	
+	title <- paste(title, ": Loadings Plot", sep = "")
+
+	# Stack the requested data into a data frame for plotting
+	
+	names <- paste("PC", loads, "Loadings", sep = " ")
+	names <- c("Reference Spectrum", names)
+	x <- rep(spectra$freq, length(loads) + 1)
+	
+	z <- rep(names[1], length(spectra$freq))
+	y <- spectra$data[ref,] # load in the reference spectrum
+	
+	for(n in 1:length(loads)) {
+		y <- c(y, pca$rotation[,loads[n]]) # add in each loading
+		z <- c(z, rep(names[n + 1], length(spectra$freq)))
+		}
+
+	z <- as.factor(z)
+	z <- relevel(z, "Reference Spectrum")
+	df <- data.frame(y, x, z) 
+	
+	# Do the plot
+	# Note: no way exists to plot the x axis reversed for multiple panels
+		
+	p <- xyplot(y ~ x | z, data = df,
+		main = title,
+		xlab = spectra$unit[1], ylab = "",
+		layout = c(1, length(loads) + 1),
+		strip.left = TRUE, strip = FALSE, col = "black",
+		scales = list(x = "same", y = "free"),
+		page = function(n) panel.text(lab = pca$method, x = 0.5, y = 0.95),
+		panel = function(..., type = "h") {
+			if (panel.number() == 1) {
+				panel.xyplot(..., type = "l")
+				} else {
+					panel.xyplot(..., type = type)
+					}
+			}, ...)
+		
+	plot(p)
+	}
+

Added: pkg/chemospec/R/plotScores.R
===================================================================
--- pkg/chemospec/R/plotScores.R	                        (rev 0)
+++ pkg/chemospec/R/plotScores.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,186 @@
+plotScores <-
+function(spectra, pca, title = "no title provided",
+	pcs = c(1,2), ellipse = "none", tol = "none", ...) {
+
+# Function to plot PCA scores
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# Generates 2D plots of scores specified by argument pcs
+# Part of this depends on a modified cor.plot {mvoutlier}
+# which is called plotScoresCor
+
+	if (length(pcs) != 2) stop("You must choose exactly two PC's to plot")
+	if (missing(spectra)) stop("No spectral data set provided")
+	if (missing(pca)) stop("No pca results provided")
+	if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")
+	chkSpectra(spectra)
+	
+	# Break the data into groups, clean up & give some warnings.
+	# There must be at least 3 data points per level to make a classic ellipse.
+	# More to make a robust ellipse, as at least one point may be dropped.
+
+	lvls = levels(spectra$groups)
+	count = length(lvls)
+	for (n in 1:count) {
+		ndp <- length(which(spectra$groups == lvls[n])) # How many members of each group are there?
+		if (identical(ndp, 1)) warning("Group ", which(spectra$groups == lvls[n]), "has only 1 member.")
+		if (identical(ndp, 2)) warning("Group ", which(spectra$groups == lvls[n]), "has only 2 members.")
+		if (identical(ndp, 3)) warning("Group ", which(spectra$groups == lvls[n]), "has only 3 members.")
+		}
+	
+	df <- data.frame(pca$x[,pcs], group = spectra$groups)
+	groups <- dlply(df, "group", subset, select = c(1,2))
+
+	col.lvls <- c() # set up the color scheme for ellipses, later
+	for (n in 1:count) {
+		chk <- match(lvls[n], spectra$groups) # get 1st instance
+		col.lvls[n] <- spectra$colors[chk]
+		}
+		
+### First case: plot everything by group (most general, other cases are subsets of this)
+
+	if (ellipse == "both") {
+	
+	# First, get overall plot limits.
+	# Keep in mind the ellipses may be quite flattened and hence large.
+	# At the same time, the ellipses might be quite round and
+	# the scores well outside them, if there is an outlier.
+	# Must check all cases!
+	
+	ell <- llply(groups, plotScoresCor) # these are the ellipses we'll need later
+
+	x.scores <- range(llply(groups, subset, select = 1))
+	y.scores <- range(llply(groups, subset, select = 2)) 
+	x.ell <- range(llply(ell, function(x) {range(x[1])}))
+	y.ell <- range(llply(ell, function(x) {range(x[2])}))
+	x.ell.r <- range(llply(ell, function(x) {range(x[4])}))
+	y.ell.r <- range(llply(ell, function(x) {range(x[5])}))
+	x.all <- range(x.scores, x.ell, x.ell.r)*c(1.0, 1.05) # expand slightly for labels
+	y.all <- range(y.scores, y.ell, y.ell.r)
+	y.all[2] <- y.all[2]*1.15 # leave room for annotations at top of plot
+
+	# Plot the data points
+
+	new.title <- paste(title, ": PCA Score Plot", sep = "") # prepare plot title
+
+	plot(pca$x[,pcs], main = new.title, xlab = "", ylab = "",
+	col = spectra$colors, xlim = x.all, ylim = y.all, pch = 20, ...)
+	
+	# Now plot both classic and robust ellipses, classic first
+
+	cls.coords <- llply(ell, function(x) {x[1:2]})
+	cls.coords <- llply(cls.coords, function(x) {do.call(cbind, x)})
+	m_ply(cbind(x = cls.coords, col = col.lvls, lty = 3), lines, ...)
+
+	# Now the robust ellipses
+	
+	rob.coords <- llply(ell, function(x) {x[4:5]})
+	rob.coords <- llply(rob.coords, function(x) {do.call(cbind, x)})
+	m_ply(cbind(x = rob.coords, col = col.lvls), lines, ...)
+
+	# finish with the usual annotations
+			
+	legend("top", y = NULL, "classic ellipses by group", lty = 3, bty = "n", col = "gray", cex = 0.75)
+	legend("top", y = NULL, "robust ellipses by group", lty = 1, bty = "n", col = "black", inset = c(0.0, 0.03), cex = 0.75)
+
+	}
+
+### Second case: plot only the points by group
+
+	if (ellipse == "none") {
+	
+	x.scores <- range(llply(groups, subset, select = 1))
+	y.scores <- range(llply(groups, subset, select = 2)) 
+	x.all <- range(x.scores)*c(1.0, 1.05) # expand slightly for labels
+	y.all <- range(y.scores)
+	y.all[2] <- y.all[2]*1.15 # leave room for annotations at top of plot
+
+	# Plot the data points
+
+	new.title <- paste(title, ": PCA Score Plot", sep = "") # prepare plot title
+
+	plot(pca$x[,pcs], main = new.title, xlab = "", ylab = "",
+	col = spectra$colors, xlim = x.all, ylim = y.all, pch = 20, ...)
+	
+	}
+
+### Third case: plot classical ellipses (only) by group
+
+	if (ellipse == "cls") {
+	
+	ell <- llply(groups, plotScoresCor) # these are the ellipses we'll need later
+
+	x.scores <- range(llply(groups, subset, select = 1))
+	y.scores <- range(llply(groups, subset, select = 2)) 
+	x.ell <- range(llply(ell, function(x) {range(x[1])}))
+	y.ell <- range(llply(ell, function(x) {range(x[2])}))
+	x.all <- range(x.scores, x.ell)*c(1.0, 1.05) # expand slightly for labels
+	y.all <- range(y.scores, y.ell)
+	y.all[2] <- y.all[2]*1.15 # leave room for annotations at top of plot
+
+	# Plot the data points
+
+	new.title <- paste(title, ": PCA Score Plot", sep = "") # prepare plot title
+
+	plot(pca$x[,pcs], main = new.title, xlab = "", ylab = "",
+	col = spectra$colors, xlim = x.all, ylim = y.all, pch = 20, ...)
+	
+	# Now plot classic ellipses
+
+	cls.coords <- llply(ell, function(x) {x[1:2]})
+	cls.coords <- llply(cls.coords, function(x) {do.call(cbind, x)})
+	m_ply(cbind(x = cls.coords, col = col.lvls, lty = 3), lines, ...)
+
+	# finish with the usual annotations
+			
+	legend("top", y = NULL, "classic ellipses by group", lty = 3, bty = "n", col = "gray", cex = 0.75)
+
+	}
+
+### Fourth case: plot robust ellipses (only) by group
+
+	if (ellipse == "rob") {
+	
+	ell <- llply(groups, plotScoresCor) # these are the ellipses we'll need later
+
+	x.scores <- range(llply(groups, subset, select = 1))
+	y.scores <- range(llply(groups, subset, select = 2)) 
+	x.ell.r <- range(llply(ell, function(x) {range(x[4])}))
+	y.ell.r <- range(llply(ell, function(x) {range(x[5])}))
+	x.all <- range(x.scores, x.ell.r)*c(1.0, 1.05) # expand slightly for labels
+	y.all <- range(y.scores, y.ell.r)
+	y.all[2] <- y.all[2]*1.15 # leave room for annotations at top of plot
+
+	# Plot the data points
+
+	new.title <- paste(title, ": PCA Score Plot", sep = "") # prepare plot title
+
+	plot(pca$x[,pcs], main = new.title, xlab = "", ylab = "",
+	col = spectra$colors, xlim = x.all, ylim = y.all, pch = 20, ...)
+	
+	# Now plot robust ellipses
+	
+	rob.coords <- llply(ell, function(x) {x[4:5]})
+	rob.coords <- llply(rob.coords, function(x) {do.call(cbind, x)})
+	m_ply(cbind(x = rob.coords, col = col.lvls), lines, ...)
+
+	legend("top", y = NULL, "robust ellipses by group", lty = 1, bty = "n", col = "black", cex = 0.75)
+
+	}
+
+	# Decorations that apply to all cases
+	
+	plotScoresDecoration(spectra, pca, pcs, tol)
+	
+	leg.col <- c()
+	for (z in 1:count) {
+		i <- match(lvls[z], spectra$groups)
+		leg.col[z] <- spectra$colors[i]
+		}
+
+	leg.txt <- c("Key", lvls)
+	leg.col <- c("black", leg.col)
+	legend("topright", leg.txt, text.col = leg.col, bty = "o", cex = 0.75)
+
+}
+

Added: pkg/chemospec/R/plotScoresCor.R
===================================================================
--- pkg/chemospec/R/plotScoresCor.R	                        (rev 0)
+++ pkg/chemospec/R/plotScoresCor.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,31 @@
+plotScoresCor <-
+function (x, quan = 1/2, alpha = 0.025) {
+
+# Function to plot robust correlation ellipsoids for x, y data
+# Modifed slightly from cor.plot {mvoutlier}
+# Part of the ChemoSpec Package
+# Bryan Hanson, DePauw University, Aug 2009
+
+	x <- as.matrix(x)
+    covr <- covMcd(x, cor = TRUE, alpha = quan)
+    cov.svd <- svd(cov(x), nv = 0)
+    covr.svd <- svd(covr$cov, nv = 0)
+    r <- cov.svd[["u"]] %*% diag(sqrt(cov.svd[["d"]]))
+    rr <- covr.svd[["u"]] %*% diag(sqrt(covr.svd[["d"]]))
+    e <- cbind(cos(c(0:100)/100 * 2 * pi) * sqrt(qchisq(1 - alpha, 
+        2)), sin(c(0:100)/100 * 2 * pi) * sqrt(qchisq(1 - alpha, 
+        2)))
+    tt <- t(r %*% t(e)) + rep(1, 101) %o% apply(x, 2, mean)
+    ttr <- t(rr %*% t(e)) + rep(1, 101) %o% covr$center
+    
+   	ellipse <- list()
+   	ellipse$x.cls <- tt[,1]
+   	ellipse$y.cls <- tt[,2]
+   	ellipse$c <- round(cor(x)[1, 2], 2)
+   	ellipse$x.rob <- ttr[,1]
+   	ellipse$y.rob <- ttr[,2]
+   	ellipse$r <- round(covr$cor[1, 2], 2)
+   	ellipse
+
+ }
+

Added: pkg/chemospec/R/plotScoresDecoration.R
===================================================================
--- pkg/chemospec/R/plotScoresDecoration.R	                        (rev 0)
+++ pkg/chemospec/R/plotScoresDecoration.R	2009-09-24 00:40:34 UTC (rev 55)
@@ -0,0 +1,34 @@
+plotScoresDecoration <-
+function(spectra, pca, pcs = c(1,2), tol = "none") {
+	
+# Function to do some plot annotations on the PCA score plot
+# These tasks are done in all 4 cases in plot.scores, so keeping it here makes
+# maintaining & changing code easier.
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/chemospec -r 55


More information about the Chemospec-commits mailing list