[Chemospec-commits] r11 - / R data inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Sep 16 20:09:01 CEST 2009
Author: bryanhanson
Date: 2009-09-16 20:09:00 +0200 (Wed, 16 Sep 2009)
New Revision: 11
Added:
.Rhistory
DESCRIPTION
R/
R/HCA.R
R/ScoreHCA.R
R/chkSpectra.R
R/classPCA.R
R/colLeaf.R
R/getManyCsv.R
R/groupNcolor.R
R/labelExtremes.R
R/pcaBoot.R
R/pcaDiag.R
R/plot2Loadings.R
R/plotLoadings.R
R/plotScores.R
R/plotScoresCor.R
R/plotScoresDecoration.R
R/plotScree.R
R/q2rPCA.R
R/r2qPCA.R
R/removeSample.R
R/robPCA.R
R/specSurvey.R
R/sumSpectra.R
R/trimNbin.R
data/
data/CuticleIR.RData
inst/
inst/CITATION
man/
man/ChemoSpec-package.Rd
man/ChemoSpec-package.html
man/CuticleIR.Rd
man/CuticleIR.html
man/HCA.Rd
man/HCA.html
man/ScoreHCA.Rd
man/chkSpectra.Rd
man/chkSpectra.html
man/classPCA.Rd
man/classPCA.html
man/colLeaf.Rd
man/colLeaf.html
man/getManyCsv.Rd
man/getManyCsv.html
man/groupNcolor.Rd
man/groupNcolor.html
man/labelExtremes.Rd
man/labelExtremes.html
man/pcaBoot.Rd
man/pcaBoot.html
man/pcaDiag.Rd
man/pcaDiag.html
man/plot2Loadings.Rd
man/plot2loadings.html
man/plotLoadings.Rd
man/plotLoadings.html
man/plotScores.Rd
man/plotScores.html
man/plotScoresCor.Rd
man/plotScoresCor.html
man/plotScoresDecoration.Rd
man/plotScoresDecoration.html
man/plotScree.Rd
man/plotScree.html
man/q2rPCA.Rd
man/q2rPCA.html
man/removeSample.Rd
man/removeSample.html
man/robPCA.Rd
man/robPCA.html
man/scoreHCA.html
man/specSurvey.Rd
man/specSurvey.html
man/sumSpectra.Rd
man/sumSpectra.html
man/trimNbin.Rd
man/trimNbin.html
Log:
initial upload
Added: .Rhistory
===================================================================
--- .Rhistory (rev 0)
+++ .Rhistory 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,466 @@
+#
+# Function to plot means and se's or something similar#
+# Bryan Hanson, DePauw Univ, July 2009#
+#
+#require(plotrix) # need Ben Bolker's pkg#
+# prefer gplots version; doesn't have problem with bar colors and handles y lim better#
+#
+seX <- function(x) res <- sd(x)/sqrt(length(x)) # ecologist's std error of x = seX!#
+#
+compare.means <- function(data = NULL, what = c(), fac = c(),#
+ order = list(), mycolors = c(),#
+ method = c("se", "iqr", "mad"),#
+ title = "Comparison of Means",#
+ subtitle = "optional explanatory caption", pad = 0.75) {#
+#
+# what = the parameter to have the means etc computed#
+# fac = the factors for subsetting (only 2 allowed)#
+# fac must be a number giving the "column" in data list#
+# order of 'fac' determines how they are grouped#
+# pad is the number of blank slots on either end of the plot#
+# and the number of blank slots between groups#
+#
+# check the data against the arguments#
+# need to limit total levels due to size of device#
+#
+ if (length(what) > 1) stop("Only 1 response may be specified")#
+ if (length(fac) > 2) stop("Only 2 factors may be specified")#
+ for (n in 1:length(fac)) {#
+ if (!is.factor(data[[fac[n]]])) stop(">fac< must be a factor")#
+ }#
+#
+# Case 1: one main group, with various levels#
+#
+ if (length(fac) == 1) {#
+ #
+ a <- data[[fac[1]]]#
+ a <- factor(a, levels = order[[1]])#
+ n <- length(levels(a))#
+ l <- levels(a)#
+ #
+ if (!method == "mad") {#
+ m <- aggregate(data[[what[1]]], list(a), mean)#
+ }#
+ else { #
+ m <- aggregate(data[[what[1]]], list(a), median)#
+ }#
+ #
+ #
+ if (method == "se") {#
+ sl <- su <- aggregate(data[[what[1]]], list(a), seX)#
+ ylab <- "mean +/- se"#
+ }#
+ if (method == "iqr") {#
+ sl <- aggregate(data[[what[1]]], list(a), quantile, probs = 0.25)#
+ su <- aggregate(data[[what[1]]], list(a), quantile, probs = 0.75)#
+ ylab <- "mean +/- iqr"#
+ }#
+ if (method == "mad") {#
+ sl <- su <- aggregate(data[[what[1]]], list(a), mad)#
+ ylab <- "median +/- mad"#
+ }#
+#
+ #
+ x <- 1:n#
+ y <- unlist(m$x)#
+ su <- unlist(su$x)#
+ sl <- unlist(sl$x)#
+ #
+ # push the data together for nicer appearance#
+ p <- c(-0.5, n)#
+ #
+ # width of plot at pad" per slot, including "empties"#
+ par(fin = c(diff(p)*pad, 7))#
+ #
+# plotCI(x, y, uiw = su, liw = sl, gap = TRUE,#
+# main = title, sub = subtitle, xlab = "", ylab = ylab,#
+# col = "red", bty = "n", xaxt = "n",#
+# xlim = p, ylim = c(min(y - sl), max(y + su)))#
+ plotCI(x, y, uiw = su, liw = sl,#
+ main = title, sub = subtitle, xlab = "", ylab = ylab,#
+ col = mycolors, bty = "n", xaxt = "n",#
+ xlim = p, ylim = c(min(y - sl), max(y + su)))#
+ axis(1, at = 1:n, labels = l, cex.axis = pad)#
+ }#
+#
+# Case 2: no. of main groups is the levels of fac[1]#
+#
+ if (length(fac) == 2) {#
+ #
+ a <- data[[fac[1]]]#
+ a <- factor(a, levels = order[[1]])#
+ n.a <- length(levels(a))#
+ l.a <- levels(a)#
+ #
+ #
+ b <- data[[fac[2]]]#
+ b <- factor(b, levels = order[[2]])#
+ n.b <- length(levels(b))#
+ l.b <- levels(b)#
+ #
+ m <- aggregate(data[[what[1]]], list(a, b), mean)#
+ if (method == "se") {#
+ sl <- su <- aggregate(data[[what[1]]], list(a, b), seX)#
+ ylab <- "mean +/- se"#
+ }#
+ if (method == "iqr") {#
+ sl <- aggregate(data[[what[1]]], list(a, b), quantile, probs = 0.25)#
+ su <- aggregate(data[[what[1]]], list(a, b), quantile, probs = 0.75)#
+ ylab <- "mean +/- iqr"#
+ }#
+ #
+ t <- n.a * n.b#
+ x <- 1:t#
+ y <- unlist(m$x)#
+ su <- unlist(su$x)#
+ sl <- unlist(sl$x)#
+ #
+ # push the data together for nicer appearance#
+ p <- c(-0.5, t)#
+ #
+ # construct the labels#
+ l <- paste(m$Group.1, m$Group.2, sep = "")#
+ #
+ # no. cols required = n.a#
+# col.list <- c("red", "green", "blue", "black")#
+# mycols <- c(rep(col.list[1:n.a], n.b))#
+ mycols <- c(rep(mycolors, n.b))#
+ #
+ # width of plot at pad" per slot, including "empties"#
+ par(fin = c(diff(p)*pad, 7))#
+ #
+# plotCI(x, y, uiw = su, liw = sl, gap = TRUE,#
+# main = title, sub = subtitle, xlab = "", ylab = ylab,#
+# col = mycols, bty = "n", xaxt = "n", scol <- mycols,#
+# xlim = p, ylim = c(min(y - sl), max(y + su)))#
+ #
+ plotCI(x, y, uiw = su, liw = sl,#
+ main = title, sub = subtitle, xlab = "", ylab = ylab,#
+ col = mycols, bty = "n", xaxt = "n", barcol = mycols,#
+ xlim = p, ylim = c(min(y - sl), max(y + su)))#
+ #
+ # add x axes in n.b segments showing grouping#
+ for (i in 1:n.b) {#
+ i <- i - 1#
+ seg <- c((1 + (i * n.a)):((1 + i) * n.a))#
+ axis(1, at = x[seg], labels = l[seg], cex.axis = pad)#
+ }#
+ }#
+ }
+package.skeleton(name = "ChemoSpec", list = cs.list)
+cs.list
+package.skeleton(name = "ChemoSpec", list = cs.list, force = TRUE)
+load("/Users/bryanhanson/Documents/Research/MetabolomicsProjects/Portulaca oleracea Project/Portulaca Summer 2009 Final Results/Kelly's Data/IR spectra/noNoise.RData")
+str(noNoise)
+CuticleIR <- noNoise
+save(CuticleIR, file = "CuticleIR.RData")
+prompt(CuticleIR)
+?princomp
+ls()
+getwd()
+?RDconv
+cs
+?prcomp
+cs.dir
+my.pkgs
+?tools
+library(help = "tools")
+R_PDFLATEXCMD
+PDFLATEX
+csdir
+load("/Users/bryanhanson/Documents/Research/MetabolomicsProjects/ChemometricsStuff/BAHpackages/ChemoSpec.Rcheck/ChemoSpec/data/CuticleIR.RData")
+results <- classPCA(CuticleIR, choice = "autoscale")
+pcaDiag <-#
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {#
+#
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})#
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009#
+#
+ if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)#
+ X <- spectra$data#
+ X.pca <- pca#
+ a <- pcs#
+ if (is.null(a)) a <- 3#
+ #
+ SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))#
+ ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))#
+ critSD <- sqrt(qchisq(quantile, a))#
+ critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)#
+ #
+ sub <- paste(pca$method, a, "PCs", sep = " ")#
+ if ("SD" %in% plot) {#
+ plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critSD, lty = 2)#
+ #
+ y.data <- subset(SDist, SDist > critSD)#
+ x.data <- which(SDist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ if ("OD" %in% plot) {#
+ plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critOD, lty = 2)#
+#
+ y.data <- subset(ODist, ODist > critOD)#
+ x.data <- which(ODist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)#
+ }
+results <- classPCA(CuticleIR, choice = "noscale")
+?if
+else
+pcaDiag <-#
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {#
+#
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})#
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009#
+#
+ if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)#
+ X <- spectra$data#
+ X.pca <- pca#
+ a <- pcs#
+ if (is.null(a)) a <- 3#
+ #
+ SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))#
+ ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))#
+ critSD <- sqrt(qchisq(quantile, a))#
+ critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)#
+ #
+ sub <- paste(pca$method, a, "PCs", sep = " ")#
+ if ("SD" %in% plot) {#
+ plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critSD, lty = 2)#
+ #
+ y.data <- subset(SDist, SDist > critSD)#
+ x.data <<- which(SDist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!x.data == NULL) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ if ("OD" %in% plot) {#
+ plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critOD, lty = 2)#
+#
+ y.data <- subset(ODist, ODist > critOD)#
+ x.data <<- which(ODist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!x.data == NULL) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)#
+ }
+str(x.data)
+pcaDiag <-#
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {#
+#
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})#
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009#
+#
+ if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)#
+ X <- spectra$data#
+ X.pca <- pca#
+ a <- pcs#
+ if (is.null(a)) a <- 3#
+ #
+ SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))#
+ ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))#
+ critSD <- sqrt(qchisq(quantile, a))#
+ critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)#
+ #
+ sub <- paste(pca$method, a, "PCs", sep = " ")#
+ if ("SD" %in% plot) {#
+ plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critSD, lty = 2)#
+ #
+ y.data <- subset(SDist, SDist > critSD)#
+ x.data <<- which(SDist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!x.data == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ if ("OD" %in% plot) {#
+ plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critOD, lty = 2)#
+#
+ y.data <- subset(ODist, ODist > critOD)#
+ x.data <<- which(ODist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!x.data == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)#
+ }
+x.data
+x.data == 0
+is.na(x.data)
+is.empty(x.data)
+length(x.data)
+pcaDiag <-#
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {#
+#
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})#
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009#
+#
+ if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)#
+ X <- spectra$data#
+ X.pca <- pca#
+ a <- pcs#
+ if (is.null(a)) a <- 3#
+ #
+ SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))#
+ ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))#
+ critSD <- sqrt(qchisq(quantile, a))#
+ critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)#
+ #
+ sub <- paste(pca$method, a, "PCs", sep = " ")#
+ if ("SD" %in% plot) {#
+ plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critSD, lty = 2)#
+ #
+ y.data <- subset(SDist, SDist > critSD)#
+ x.data <<- which(SDist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ if ("OD" %in% plot) {#
+ plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critOD, lty = 2)#
+#
+ y.data <- subset(ODist, ODist > critOD)#
+ x.data <<- which(ODist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)#
+ }
+temp <- pcaDiag(CuticleIR, results, pcs = 2, plot = "OD")
+temp <- pcaDiag(CuticleIR, results, pcs = 2, plot = "SD")
+plotLoadings <-#
+function(spectra, pca, title = "no title provided",#
+ loads = c(1), ref = 1, ...) {#
+ #
+# Function to plot loadings vs. frequencies#
+# Part of the ChemoSpec package#
+# Bryan Hanson, DePauw University, June 2008#
+# This is the lattice version#
+#
+ if (missing(spectra)) stop("No spectral data set provided")#
+ if (missing(pca)) stop("No PCA results provided")#
+ if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")#
+ if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")#
+ #
+ title <- paste(title, ": Loadings Plot", sep = "")#
+#
+ # Stack the requested data into a data frame for plotting#
+ #
+ names <- paste("PC", loads, "Loadings", sep = " ")#
+ names <- c("Reference Spectrum", names)#
+ x <- rep(spectra$freq, length(loads) + 1)#
+ #
+ z <- rep(names[1], length(spectra$freq))#
+ y <- spectra$data[ref,] # load in the reference spectrum#
+ #
+ for(n in 1:length(loads)) {#
+ y <- c(y, pca$rotation[,loads[n]]) # add in each loading#
+ z <- c(z, rep(names[n + 1], length(spectra$freq)))#
+ }#
+#
+ z <- as.factor(z)#
+ z <- relevel(z, "Reference Spectrum")#
+ df <- data.frame(y, x, z) #
+ #
+ # Do the plot#
+ # Note: no way exists to plot the x axis reversed for multiple panels#
+ #
+ p <- xyplot(y ~ x | z, data = df,#
+ main = title,#
+ xlab = spectra$unit[1], ylab = "",#
+ layout = c(1, length(loads) + 1),#
+ strip.left = TRUE, strip = FALSE, col = "black",#
+ scales = list(x = "same", y = "free"),#
+ page = function(n) panel.text(lab = pca$method, x = 0.5, y = 0.95),#
+ panel = function(..., type = "h") {#
+ if (panel.number() == 1) {#
+ panel.xyplot(..., type = "l")#
+ } else {#
+ panel.xyplot(..., type = type)#
+ }#
+ }, ...)#
+ #
+ plot(p)#
+ }
+sumSpectra(CuticleIR)
+test <- trimNbin(CuticleIR, rem.freq = c(500:1000))
+test <- trimNbin(CuticleIR, rem.freq = c(501:1000))
+test <- trimNbin(CuticleIR, rem.freq = c("min", 1000))
+sumSpectra(test)
+library(survival)
+?coxph
+coxph
+pcaDiag <-#
+function (spectra, pca, pcs = 3, quantile = 0.975, plot = c("OD", "SD"), ...) {#
+#
+# Function for PCA Diagnostic Plots (modified from Filzmosers version in {chemometrics})#
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009#
+#
+ if ("prcomp" %in% class(pca)) pca <- q2rPCA(pca)#
+ X <- spectra$data#
+ X.pca <- pca#
+ a <- pcs#
+ if (is.null(a)) a <- 3#
+ #
+ SDist <- sqrt(apply(t(t(X.pca$sco[, 1:a]^2)/X.pca$sdev[1:a]^2), 1, sum))#
+ ODist <- sqrt(apply((X - X.pca$sco[, 1:a] %*% t(X.pca$loa[, 1:a]))^2, 1, sum))#
+ critSD <- sqrt(qchisq(quantile, a))#
+ critOD <- (median(ODist^(2/3)) + mad(ODist^(2/3)) * qnorm(quantile))^(3/2)#
+ #
+ sub <- paste(pca$method, a, "PCs", sep = " ")#
+ if ("SD" %in% plot) {#
+ plot(SDist, ylim = c(0, max(SDist)), ylab = "score distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Score Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critSD, lty = 2)#
+ #
+ y.data <- subset(SDist, SDist > critSD)#
+ x.data <- which(SDist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ if ("OD" %in% plot) {#
+ plot(ODist, ylim = c(0, max(ODist)), ylab = "orthogonal distance", #
+ xlab = spectra$desc, sub = sub, main = "Possible PCA Outliers based on Orthogonal Distance",#
+ col = spectra$colors, pch = 20, ...)#
+ abline(h = critOD, lty = 2)#
+#
+ y.data <- subset(ODist, ODist > critOD)#
+ x.data <- which(ODist %in% y.data, arr.ind = TRUE)#
+ data <- cbind(x.data, y.data)#
+ if (!length(x.data) == 0) labelExtremes(data, names = spectra$names[x.data], tol = 1.0)#
+ }#
+ #
+ list(SDist = SDist, ODist = ODist, critSD = critSD, critOD = critOD)#
+ }
+?install
Added: DESCRIPTION
===================================================================
--- DESCRIPTION (rev 0)
+++ DESCRIPTION 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,12 @@
+Package: ChemoSpec
+Type: Package
+Title: Exploratory Chemometrics for Spectroscopy
+Version: 1.0
+Date: 2009-09-08
+Author: Bryan A. Hanson DePauw University, Greencastle Indiana USA
+Maintainer: Bryan A. Hanson <hanson at depauw.edu>
+Description: A collection of functions for plotting spectra (NMR, IR etc) and carrying out various forms of exploratory data analysis, such as HCA and PCA. The design allows comparison of data from samples which fall into groups such as treatment vs. control. Robust methods appropriate for this type of high-dimensional data are available. ChemoSpec is designed to be very user friendly and suitable for people with limited background in R.
+License: GPL-3
+LazyLoad: yes
+Depends: chemometrics, robustbase, RColorBrewer, plyr, pcaPP, mvtnorm, mvoutlier, pls, lattice
+URL: http://academic.depauw.edu/~hanson/ChemoSpec/ChemoSpec.html
Added: R/HCA.R
===================================================================
--- R/HCA.R (rev 0)
+++ R/HCA.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,28 @@
+HCA <-
+function(spectra, title = "no title provided", method = "complete", ...) {
+
+# Function to carry out HCA, basically a wrapper to existing methods
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+
+ if (missing(spectra)) stop("No spectral data provided")
+ chkSpectra(spectra)
+
+ distance <- dist(as.data.frame(spectra$data, row.names = spectra$names))
+ cluster <- as.dendrogram(hclust(distance), method = method)
+ cluster <- dendrapply(cluster, colLeaf, spectra)
+ title <- paste(title, ": HCA Analysis", sep = "")
+ sub.title <- paste("Clustering method: ", method, sep = "")
+ plot(cluster, main = title, sub = sub.title, horiz = FALSE)
+
+ leg.txt <- levels(spectra$group)
+ leg.col <- NULL
+ for (z in 1:length(leg.txt)) {
+ i <- match(leg.txt[z], spectra$group)
+ leg.col[z] <- spectra$colors[i]
+ }
+ leg.txt <- c("Key", leg.txt)
+ leg.col <- c("black", leg.col)
+ legend("topright", leg.txt, text.col = leg.col, bty = "n")
+ }
+
Added: R/ScoreHCA.R
===================================================================
--- R/ScoreHCA.R (rev 0)
+++ R/ScoreHCA.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,34 @@
+ScoreHCA <-
+function(spectra, pca, title = "no title provided", scores = c(1:5), method = "complete", ...) {
+
+# Function to carry out HCA on PCA scores
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+
+
+ if (missing(spectra)) stop("No spectral data set provided")
+ if (missing(pca)) stop("No pca results provided")
+ if (!("princomp" %in% class(pca) || "prcomp" %in% class(pca))) stop("Your pca results look corrupt!")
+ chkSpectra(spectra)
+
+### need to add conversion of pca modes here
+
+ distance <- dist(as.data.frame(pca$x[,scores], row.names = spectra$names))
+ cluster <- as.dendrogram(hclust(distance), method = method)
+ cluster <- dendrapply(cluster, colLeaf, spectra)
+ title <- paste(title, ": HCA Analysis of PC Scores", sep = "")
+
+ sub.title <- paste("Clustering method:", method, " PCA method:", pca$method, sep = " ")
+ plot(cluster, main = title, sub = sub.title, horiz = FALSE)
+
+ leg.txt <- levels(spectra$groups)
+ leg.col <- NULL
+ for (z in 1:length(leg.txt)) {
+ i <- match(leg.txt[z], spectra$groups)
+ leg.col[z] <- spectra$colors[i]
+ }
+ leg.txt <- c("Key", leg.txt)
+ leg.col <- c("black", leg.col)
+ legend("topright", leg.txt, text.col = leg.col, bty = "n")
+ }
+
Added: R/chkSpectra.R
===================================================================
--- R/chkSpectra.R (rev 0)
+++ R/chkSpectra.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,43 @@
+chkSpectra <-
+function(spectra, confirm = FALSE) {
+
+# Function to Check the Integrity of Spectra Objects
+# Related to the concept of validObject for S4 classes
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# Many kinds of errors may be missed due to coercion:
+# e.g. spectra$colors[22] <- 1.2 creates "1.2" as the entry
+# Only complete replacement of $colors with all integers throws an error.
+
+ if (missing(spectra)) stop("No object of supposed class Spectra provided")
+ w <- FALSE
+ if (!class(spectra) == "Spectra") { warning("The object provided was not of class Spectra"); w <- TRUE }
+ if (!class(spectra$freq) == "numeric") { warning("The frequency data appear to be corrupt"); w <- TRUE }
+ if (!class(spectra$data) == "matrix") { warning("The spectral intensities appear to be corrupt"); w <- TRUE }
+ if (!class(spectra$names) == "character") { warning("The sample names appear to be corrupt"); w <- TRUE }
+ if (!class(spectra$color) == "character") { warning("The assigned colors appear to be corrupt"); w <- TRUE }
+ if (!class(spectra$unit) == "character") { warning("The units appear to be corrupt"); w <- TRUE }
+ if (!class(spectra$desc) == "character") { warning("The description appears to be corrupt"); w <- TRUE }
+ if (!class(spectra$groups) == "factor") { warning("The assigned groups appear to be corrupt"); w <- TRUE }
+
+ f <- length(spectra$freq)
+ d2 <- dim(spectra$data)[2]
+ n <- length(spectra$names)
+ g <- length(spectra$groups)
+ co <- length(spectra$colors)
+ d1 <- dim(spectra$data)[1]
+
+ if (!identical(f, d2)) { warning("The dimensions don't make sense (freq, data)"); w <- TRUE }
+ if (!identical(n, g)) { warning("The dimensions don't make sense (names, group)"); w <- TRUE }
+ if (!identical(g, co)) { warning("The dimensions don't make sense (group, colors)"); w <- TRUE }
+ if (!identical(co, d1)) { warning("The dimensions don't make sense (colors, data)"); w <- TRUE }
+
+ if ((!w) && (confirm)) cat("You must be awesome: These spectra look just dandy!")
+ if (w) {
+ cat("*** There seem to be one or more problems with these spectra!\n")
+ cat("*** Is the spectral data set loaded different from the one you tried to plot?\n")
+ stop("Sorry, we can't continue this way: It's not me, it's you!")
+ }
+
+ }
+
Added: R/classPCA.R
===================================================================
--- R/classPCA.R (rev 0)
+++ R/classPCA.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,41 @@
+classPCA <-
+function(spectra, choice = "noscale") {
+
+# Function to carry out non-robust Principal Components Analysis
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Sept 2009
+
+ if (missing(spectra)) stop("No spectral data set passed to PCA")
+ if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")
+
+ choices <- c("noscale", "autoscale", "Pareto") # trap for invalid scaling method
+ check <- choice %in% choices
+ if (!check) stop("The choice of scaling parameter was invalid")
+
+ # First, row scale (compensates for different dilutions/handling of samples)
+
+ t.data <- t(spectra$data)
+ sums <- colSums(t.data)
+ row.scaled <- t(scale(t.data, center = FALSE, scale = sums))
+
+ # Center & scale the data using the desired method.
+
+ if (identical(choice, "noscale")) {centscaled <- scale(row.scaled, center = TRUE, scale = FALSE)}
+
+ if (identical(choice, "autoscale")) {
+ col.sd <- sd(row.scaled)
+ centscaled <- scale(row.scaled, center = TRUE, scale = col.sd)}
+
+ if (identical(choice, "Pareto")) {
+ col.sd <- sd(row.scaled)
+ centscaled <- scale(row.scaled, center = TRUE, scale = col.sd^0.5)}
+
+ # Now the PCA!
+
+ pca <- prcomp(centscaled, retx = TRUE, center = FALSE, scale. = FALSE)
+ pca$method <- paste("centered/", choice, "/", "classical", sep = "")
+
+ pca
+
+ }
+
Added: R/colLeaf.R
===================================================================
--- R/colLeaf.R (rev 0)
+++ R/colLeaf.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,15 @@
+colLeaf <-
+function(n, spectra) { # this is called iteratively by dendrapply
+
+# A little trick to color leaves properly, derived from the archives
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, June 2008
+
+ if(is.leaf(n)) {
+ a <- attributes(n)
+ i <- match(a$label, spectra$names)
+ attr(n, "nodePar") <- c(a$nodePar, list(lab.col = spectra$colors[i], pch = NA, lab.cex = 1.0))
+ }
+ n
+ }
+
Added: R/getManyCsv.R
===================================================================
--- R/getManyCsv.R (rev 0)
+++ R/getManyCsv.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,52 @@
+getManyCsv <-
+function(gr.crit = NULL, gr.cols = c("auto"),
+ freq.unit = "no frequency unit provided",
+ NMR = FALSE,
+ int.unit = "no intensity unit provided",
+ descrip = "no description provided",
+ out.file = "mydata") {
+
+# Function to Read & Prep Spectroscopic Data
+# from raw data files into an RData file/object
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# This script reads a series of csv files that share a common frequency axis.
+# Files should have freq in column 1, absorbance/intensity in column 2.
+# There should be no column labels.
+# Groups & colors assigned using file names by a separate function.
+
+ if (is.null(gr.crit)) stop("No group criteria provided to encode data")
+
+ files <- list.files(pattern = "\\.(csv|CSV)$")
+
+ # get the first file, initialize stuff, and pull out the frequency data
+
+ temp <- read.csv(files[1])
+ spectra <- list() # OK to initialize a list this way
+ spectra$freq <- temp[,1]
+ spectra$data <- matrix(data = 0.0, nrow = length(files), ncol = length(spectra$freq))
+
+ # loop over all files (you have to read the whole file then grab
+ # just the part you want, i.e. the 2nd column)
+
+ files.noext <- substr(basename(files), 1, nchar(basename(files)) - 4)
+
+ for (i in 1:length(files)) {
+ temp <- read.csv(files[i])
+ spectra$data[i,] <- temp[,2]
+ spectra$names[i] <- files.noext[i] # need to remove the .csv
+ }
+
+ # go get group assignments & colors, to complete assembly of spectra
+
+ spectra <- groupNcolor(spectra, gr.crit, gr.cols)
+
+ spectra$unit[1] <- freq.unit
+ spectra$unit[2] <- int.unit
+ spectra$unit[3] <- NMR
+ spectra$desc <- descrip
+
+ datafile <- paste(out.file, ".RData", sep = "")
+ save(spectra, file = datafile)
+ }
+
Added: R/groupNcolor.R
===================================================================
--- R/groupNcolor.R (rev 0)
+++ R/groupNcolor.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,42 @@
+groupNcolor <-
+function(spectra, gr.crit = NULL, gr.cols = c("auto")) {
+
+# Function to Process Group Assignments & Assign Colors
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw University, Aug 2009
+# Acts on all csv files in a directory.
+
+ # use the group criteria (gr.crit) to classify the samples
+
+ spectra$groups <- rep(NA, length(spectra$names)) # intialize
+
+ files <- list.files(pattern = "\\.(csv|CSV)")
+ files.noext <- substr(basename(files), 1, nchar(basename(files)) - 4)
+
+ for (i in 1:length(gr.crit)) {
+ which <- grep(gr.crit[i], files.noext)
+ spectra$groups[which] <- gr.crit[i]
+ }
+
+ spectra$groups <- as.factor(spectra$groups)
+
+ # assign each group a color for plotting later
+
+ spectra$colors <- rep(NA, length(spectra$names)) # initialize
+
+ if (identical(gr.cols[1], "auto")) {
+ gr.cols <- brewer.pal(length(gr.crit), "Set1")
+ for (i in 1:length(gr.crit)) {
+ which <- grep(gr.crit[i], files)
+ spectra$colors[which] <- gr.cols[i]
+ }
+ } else # match gr.cols with gr.crit & assign spectra$colors
+ for (i in 1:length(gr.crit)) {
+ which <- grep(gr.crit[i], spectra$groups)
+ spectra$colors[which] <- gr.cols[i]
+ }
+
+ class(spectra) <- "Spectra"
+ return(spectra) # spectra is now complete!
+ }
+
Added: R/labelExtremes.R
===================================================================
--- R/labelExtremes.R (rev 0)
+++ R/labelExtremes.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,34 @@
+labelExtremes <-
+function(data, names, tol) {
+
+# Function to label extreme data points
+# Part of the ChemoSpec package
+# Bryan Hanson, DePauw Univ, Aug 2009
+
+ px <- data[,1]
+ py <- data[,2]
+ pl <- names
+ if (is.numeric(pl)) pl <- format(pl, digits = 4)
+
+ q.x <- quantile(px, probs = c(1.0-tol, tol))
+ sel.x <- (px <= q.x[2]) | (px >= q.x[1])
+ keep.x <- subset(px, sel.x)
+ keep.x <- match(keep.x, px) # need to keep this & corresponding y
+
+ q.y <- quantile(py, probs = c(1.0-tol, tol))
+ sel.y <- (py <= q.y[2]) | (py >= q.y[1])
+ keep.y <- subset(py, sel.y)
+ keep.y <- match(keep.y, py) # need to keep this & corresponding x
+
+ keep <- unique(c(keep.x, keep.y))
+
+ x <- px[keep]
+ y <- py[keep]
+ l <- pl[keep]
+
+ for(n in c(1:length(x))) {
+ text(x[n], y[n], l[n], pos = 4, offset = 0.2, cex = 0.5)
+ }
+
+ }
+
Added: R/pcaBoot.R
===================================================================
--- R/pcaBoot.R (rev 0)
+++ R/pcaBoot.R 2009-09-16 18:09:00 UTC (rev 11)
@@ -0,0 +1,108 @@
+pcaBoot <-
+function (spectra, pcs, choice = "noscale", repl = 50, segments = 4,
+ segment.type = c("random", "consecutive", "interleaved"),
+ length.seg, trace = FALSE, ...) {
+
+# Function for CV of PCA scores (modified from Filzmosers version in {chemometrics})
+# Part of the ChemoSpec package. Bryan Hanson, DePauw Univ, Sept 2009
+# Conducts classical, not robust, PCA
+
+ require(pls)
+
+ if (missing(spectra)) stop("No spectral data set passed to PCA")
+ if (!class(spectra) == "Spectra") stop("Your spectral data set looks corrupt!")
+
+ choices <- c("noscale", "autoscale", "Pareto") # trap for invalid scaling method
+ check <- choice %in% choices
+ if (!check) stop("The choice of scaling parameter was invalid")
+
+ # First, row scale (compensates for different dilutions/handling of samples)
+
+ t.data <- t(spectra$data)
+ sums <- colSums(t.data)
+ row.scaled <- t(scale(t.data, center = FALSE, scale = sums))
+
+ # Center & scale the data using the desired method.
+
+ if (identical(choice, "noscale")) {centscaled <- scale(row.scaled, center = TRUE, scale = FALSE)}
+
+ if (identical(choice, "autoscale")) {
+ col.sd <- sd(row.scaled)
+ centscaled <- scale(row.scaled, center = TRUE, scale = col.sd)}
+
+ if (identical(choice, "Pareto")) {
+ col.sd <- sd(row.scaled)
+ centscaled <- scale(row.scaled, center = TRUE, scale = col.sd^0.5)}
+
+ # Filzmoser's stuff follows... (modified slightly)
+
+ X <- centscaled
+ amax <- pcs
+ if (missing(amax)) {
+ amax <- 5
+ }
+ else {
+ if (amax < 1 || amax > min(nrow(X) - 1, ncol(X)))
+ stop("Invalid number of components, amax")
+ }
+ amax <- min(amax, nrow(X) - max(sapply(segments, length)) -
+ 1)
+ optcomp <- matrix(NA, nrow = segments, ncol = repl)
+ MSEP <- matrix(NA, nrow = repl, ncol = amax)
+ dimnames(MSEP) <- list(paste("rep", 1:repl), paste("PC",
+ 1:amax))
+ Fit <- matrix(NA, nrow = repl, ncol = amax)
+ dimnames(Fit) <- list(paste("rep", 1:repl), 1:amax)
+ for (i in 1:repl) {
+ if (missing(length.seg)) {
+ segment <- cvsegments(nrow(X), k = segments, type = segment.type)
+ }
+ else {
+ segment <- cvsegments(nrow(X), length.seg = length.seg,
+ type = segment.type)
+ }
+ if (trace)
+ cat(paste("Replication: ", i))
+ MSEPj <- matrix(NA, nrow = segments, ncol = amax)
+ Fitj <- matrix(NA, nrow = segments, ncol = amax)
+ for (n.seg in 1:length(segment)) {
+ if (trace)
+ cat(n.seg, "")
+ seg <- segment[[n.seg]]
+ obsuse <- as.numeric(unlist(segment[-n.seg]))
+ Xtrain <- X[obsuse, ]
+ obstest <- as.numeric(unlist(segment[n.seg]))
+ Xtest <- X[obstest, ]
+ if (ncol(Xtrain) > nrow(Xtrain)) {
+ e <- eigen(Xtrain %*% t(Xtrain))
+ Ttrain <- e$vectors %*% diag(sqrt(e$values))
+ Ptrain <- t(Xtrain) %*% Ttrain %*% diag(1/e$values)
+ }
+ else {
+ Xtrain_svd <- svd(Xtrain)
+ Ptrain <- Xtrain_svd$v
+ }
+ Ttest <- Xtest %*% Ptrain
+ for (j in 1:amax) {
+ MSEPj[n.seg, j] <- sum((Xtest - Ttest[, 1:j] %*%
+ t(Ptrain[, 1:j]))^2)
+ }
+ Fitj[n.seg, ] <- MSEPj[n.seg, ]/sum(Xtest^2)
+ }
+ MSEP[i, ] <- apply(MSEPj, 2, mean)
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/chemospec -r 11
More information about the Chemospec-commits
mailing list