[Zooimage-commits] r228 - in pkg/mlearning: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Aug 2 21:23:08 CEST 2012


Author: phgrosjean
Date: 2012-08-02 21:23:08 +0200 (Thu, 02 Aug 2012)
New Revision: 228

Modified:
   pkg/mlearning/DESCRIPTION
   pkg/mlearning/NAMESPACE
   pkg/mlearning/R/confusion.R
   pkg/mlearning/R/mlearning.R
   pkg/mlearning/man/confusion.Rd
   pkg/mlearning/man/mlearning.Rd
Log:
New stars confusion plot + sorted out plot() method for confusion object

Modified: pkg/mlearning/DESCRIPTION
===================================================================
--- pkg/mlearning/DESCRIPTION	2012-08-02 08:49:02 UTC (rev 227)
+++ pkg/mlearning/DESCRIPTION	2012-08-02 19:23:08 UTC (rev 228)
@@ -6,8 +6,8 @@
 Author: Ph. Grosjean & K. Denis
 Maintainer: Ph. Grosjean <Philippe.Grosjean at umons.ac.be>
 Depends: R (>= 2.14.0)
-Imports: dataframe, stats, nnet, class, MASS, e1071, randomForest, RWeka, RColorBrewer, gplots, grDevices 
-Suggests: rJava, RWekajars, mlbench, datasets
+Imports: dataframe, stats, nnet, class, MASS, e1071, randomForest, RWeka, ipred, grDevices 
+Suggests: rJava, RWekajars, mlbench, datasets, RColorBrewer
 Description: This package provides alternate interface to various machine
     learning algorithms in order to offer a unified, formula-based, interface.
     However, given the caveats of the formula interface in R (leading to

Modified: pkg/mlearning/NAMESPACE
===================================================================
--- pkg/mlearning/NAMESPACE	2012-08-02 08:49:02 UTC (rev 227)
+++ pkg/mlearning/NAMESPACE	2012-08-02 19:23:08 UTC (rev 228)
@@ -6,8 +6,9 @@
 import(e1071)
 import(randomForest)
 import(RWeka)
-import(RColorBrewer)
-importFrom(gplots, heatmap.2)
+import(ipred)
+#import(RColorBrewer)
+#importFrom(gplots, heatmap.2)
 import(grDevices)
 
 #importFrom(gdata, combine)
@@ -21,13 +22,13 @@
 export(mlNaiveBayes)
 export(mlNaiveBayesWeka)
 
+export(cvpredict)
 export(summary.lvq)
 
 export(response)
 export(train)
 
 export(confusion)
-export(comparisonPlot)
 
 S3method(confusion, default)
 S3method(confusion, mlearning)
@@ -41,6 +42,7 @@
 S3method(print, summary.mlearning)
 S3method(plot, mlearning)
 S3method(predict, mlearning)
+S3method(cvpredict, mlearning)
 
 S3method(response, default)
 S3method(train, default)

Modified: pkg/mlearning/R/confusion.R
===================================================================
--- pkg/mlearning/R/confusion.R	2012-08-02 08:49:02 UTC (rev 227)
+++ pkg/mlearning/R/confusion.R	2012-08-02 19:23:08 UTC (rev 228)
@@ -128,59 +128,64 @@
 	X <- x
 	class(X) <- "table"
 	if (isTRUE(as.logical(error.col))) {
-		print(cbind(X, `Error (%)` = round((1 - diag(X) / apply(X, 1, sum)) * 100, 1)))
+		print(cbind(X, `Error (FNR)` = round((1 - diag(X) / apply(X, 1, sum)), 3)))
 	} else print(X)
 	
 	## Return the original object invisibly
 	invisible(x)
 }
 
-plot.confusion <- function (x, y,
-type = c("image", "image2", "tree_image", "precision_recall",
-"precision_recall2", "dendrogram"), ...)
+plot.confusion <- function (x, y = NULL,
+type = c("image", "barplot", "stars", "dendrogram"), stat1 = "Recall",
+stat2 = "Precision", ...)
 {
-	type <- match.arg(type)
-	res <- switch(type[1],
-		image = .confusionMap(x, ...),
-		image2 = .confusionMap2(x, ...),
-		tree_image = .confusionTree(x, ...),
-		precision_recall = .confusionBar(x, ...),
-		precision_recall2 = .confusionBar2(x, ...),
-		dendrogram = .confusionDendro(x, ...),
-		stop("'type' must be 'image', 'tree_image', 'precision_recall', 'precision_recall2' or 'dendrogram'"))
+	if (is.null(y)) type <- match.arg(type)[1] else type <- "stars"
+	res <- switch(type,
+		image = .confusionImage(x, y, ...),
+		barplot = .confusionBar(x, y, ...),
+		stars = .confusionStars(x, y, stat1 = stat1, stat2 = stat2, ...),
+		dendrogram = .confusionDendro(x, y, ...),
+		stop("'type' must be 'image', 'barplot', 'stars' or 'dendrogram'"))
 	invisible(res)
 }
 
 ## These functions do the respective graphs for confusion objects
-.confusionMap <- function (x, col = heat.colors(10),
-mar = c(5.1, 12.1, 4.1, 2.1), ...)
-{
-	if (!inherits(x, "confusion"))
-		stop("'x' must be a 'confusion' object")
-	omar  <- par("mar")
-	on.exit(par(omar))
-    par(mar = mar)
-	n <- ncol(x)
-	image(1:n, 1:n, 1 / (t(x[n:1, 1:n])), col = col, xlab = "", ylab = "",
-		xaxt = "n", yaxt = "n", ...)
-    axis(1, at = 1:n, las = 2)
-    axis(2, at = n:1, labels = paste(names(attr(x, "nbr.cols")), 1:n),
-		las = 1)
-    abline(h = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
-    abline(v = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
-	invisible(x)
-}
+## Old (simpler) version
+#.confusionImage <- function (x, y = NULL, col = heat.colors(10),
+#mar = c(5.1, 12.1, 4.1, 2.1), ...)
+#{
+#	if (!inherits(x, "confusion"))
+#		stop("'x' must be a 'confusion' object")
 
-## Alternate graphical representation of the confusion matrix (modif K. Denis)
-.confusionMap2 <- function (x, mar = c(3.1, 10.1, 3.1, 3.1), asp = 1, 
+#	if (!is.null(y))
+#		stop("cannot use a second classifier 'y' for this plot")
+#	omar  <- par("mar")
+#	on.exit(par(omar))
+#    par(mar = mar)
+#	n <- ncol(x)
+#	image(1:n, 1:n, 1 / (t(x[n:1, 1:n])), col = col, xlab = "", ylab = "",
+#		xaxt = "n", yaxt = "n", ...)
+#    axis(1, at = 1:n, las = 2)
+#    axis(2, at = n:1, labels = paste(names(attr(x, "nbr.cols")), 1:n),
+#		las = 1)
+#    abline(h = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
+#    abline(v = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
+#	invisible(x)
+#}
+
+## Representation of the confusion matrix
+.confusionImage <- function (x, y = NULL, mar = c(3.1, 10.1, 3.1, 3.1), asp = 1, 
 label = "Actual \\ Predicted", sort = "complete", cex = 1, colfun = NULL,
 ncols = 41, col0 = FALSE, grid.col = "gray", ...)
 {
 	if (!inherits(x, "confusion"))
         stop("'x' must be a 'confusion' object")
+
+	if (!is.null(y))
+		stop("cannot use a second classifier 'y' for this plot")
 	
 	## Default color function
-	rwb.colors <- function (n, alpha = 1, s = 0.9, v = 0.9) {
+	if (!length(colfun)) colfun <- function (n, alpha = 1, s = 0.9, v = 0.9) {
 		if ((n <- as.integer(n[1L])) <= 0) return(character(0L))
 		## Define the initial (red) and final (blue) colors with white in between
 		cols <- c(hsv(h = 0, s = s, v = v, alpha = alpha),   # Red
@@ -189,7 +194,6 @@
 		## Use a color ramp from red to white to blue
 		return(colorRampPalette(cols)(n))
 	}
-	if (!length(colfun)) colfun <- rwb.colors
 	
     manuLev <- sub("...", "", rownames(x))
     autoLev <- manuLev
@@ -222,8 +226,8 @@
 	## Negative values (in blue) on the diagonal (correct IDs)
 	diag(confuCol) <- -diag(confuCol)	
 	## Make an image of this matrix
-	omar <- par(no.readonly = TRUE)
-	on.exit(par(mar = omar))
+	opar <- par(no.readonly = TRUE)
+	on.exit(par(opar))
 	par(mar = mar, cex = cex)
 	image(1:n, 1:n, -t(confuCol[nrow(confuCol):1, ]), zlim = c(-10, 10),
 		asp = asp, bty = "n", col = colfun(ncols), xaxt = "n", yaxt = "n",
@@ -249,29 +253,38 @@
 	invisible(x)
 }
 
-.confusionTree <- function (x, maxval = 10, margins = c(2, 10),
-row.v = TRUE, col.v = TRUE, ...)
-{
-	if (!inherits(x, "confusion"))
-		stop("'x' must be a 'confusion' object")
-	nX <- nrow(x)
-	nY <- ncol(x)
-	nZ <- nX * nY
-	confmat <- pmin(x, maxval)
-	mypalette <- brewer.pal(maxval - 1, "Spectral")
-	heatmap.2(x, col= c(0, mypalette), symm = TRUE, margins = margins,
-		trace = "both", Rowv = row.v, Colv = col.v, cexRow = 0.2 + 1 / log10(nX),
-		cexCol = 0.2 + 1 / log10(nY), tracecol = "Black", linecol = FALSE, ...)
-}
+## Eliminated to avoid dependency on RColorBrewer and gplots!
+#.confusionTree <- function (x, y = NULL, maxval = 10, margins = c(2, 10),
+#row.v = TRUE, col.v = TRUE, ...)
+#{
+#	if (!inherits(x, "confusion"))
+#		stop("'x' must be a 'confusion' object")
+#	if (!is.null(y))
+#		stop("cannot use a second classifier 'y' for this plot")
+#
+#	nX <- nrow(x)
+#	nY <- ncol(x)
+#	nZ <- nX * nY
+#	confmat <- pmin(x, maxval)
+#	mypalette <- brewer.pal(maxval - 1, "Spectral")
+#	heatmap.2(x, col= c(0, mypalette), symm = TRUE, margins = margins,
+#		trace = "both", Rowv = row.v, Colv = col.v, cexRow = 0.2 + 1 / log10(nX),
+#		cexCol = 0.2 + 1 / log10(nY), tracecol = "Black", linecol = FALSE, ...)
+#}
 
 # Confusion bar with recall and precision in green bar and not outside as before
 # function modified for publication hclust 
-.confusionBar <- function (x, col = c("PeachPuff2", "green3", "lemonChiffon2"),
-mar = c(1.1, 8.1, 4.1, 2.1), cex = 0.7, cex.axis = cex, cex.legend = cex,
-main = "Precision versus Recall", min.width = 17, ...)
+.confusionBar <- function (x, y = NULL,
+col = c("PeachPuff2", "green3", "lemonChiffon2"), mar = c(1.1, 8.1, 4.1, 2.1),
+cex = 0.7, cex.axis = cex, cex.legend = cex, main = "Precision versus Recall",
+min.width = 17, ...)
 {
     if (!inherits(x, "confusion"))
         stop("'x' must be a 'confusion' object")
+		
+	if (!is.null(y))
+		stop("cannot use a second classifier 'y' for this plot")
+	
     TP <- diag(x)
     fn <- rowSums(x) - TP
     fp <- colSums(x) - TP
@@ -346,55 +359,118 @@
 }
 
 ## Precision vs Recall, alternate presentation
-.confusionBar2 <- function (x,
-col = c("PeachPuff2", "green",  "green3", "lemonChiffon2"),
-mar = c(2.1, 8.1, 4.1, 2.1), cex = 0.7, cex.axis = cex, cex.legend = cex,
-main = "Precision versus Recall", ...)
+## Note used, but saved for now
+#.confusionBar <- function (x, y = NULL,
+#col = c("PeachPuff2", "green",  "green3", "lemonChiffon2"),
+#mar = c(2.1, 8.1, 4.1, 2.1), cex = 0.7, cex.axis = cex, cex.legend = cex,
+#main = "Precision versus Recall", ...)
+#{
+#	if (!inherits(x, "confusion"))
+#		stop("'x' must be a 'confusion' object")
+#	if (!is.null(y))
+#		stop("cannot use a second classifier 'y' for this plot")
+#
+#	## Calculation of statistics
+#	Stats <- summary(x)
+#	FDR <- Stats$FDR * 100
+#	Precision <- Stats$Precision * 100
+#	Recall <- Stats$Recall * 100
+#	FNR <- Stats$FNR * 100
+#	## Order statistics according to Precision + recall
+#	pos <- order(Recall + Precision, decreasing = TRUE)
+#	## Results to plot
+#	res <- cbind(FDR, Precision, Recall, FNR)
+#	## Do the plot
+#	omar <- par("mar")
+#	on.exit(par(omar))
+#	par(mar = mar)
+#	barplot(t(res[pos, ]), horiz = TRUE, col = col, xaxt = "n", las = 1,
+#		space = 0, ...)
+#	## Add information
+#	n <- nrow(Stats)
+#	Labels <- names(attr(x, "nbr.cols"))
+#	axis(2, 1:n - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
+#		labels = Labels[pos])
+#	title(main = main)
+#	text(rep(1, n), 1:n - 0.5, paste(round(FDR[pos]), "%", sep = ""),
+#		adj = c(0, 0.5), cex = cex)
+#	text(FDR[pos] + Precision[pos]/2, 1:n - 0.5, paste(round(Precision[pos]),
+#		"%", sep = ""), adj = c(0, 0.5), cex = cex)
+#	text(FDR[pos] + Precision[pos] + Recall[pos]/2, 1:n - 0.5,
+#		paste(round(Recall[pos]), "%", sep = ""), adj = c(0, 0.5), cex = cex)
+#	text(rep(191, n), 1:n - 0.5, paste(round(FNR[pos]), "%", sep = ""),
+#		adj = c(0, 0.5), cex = cex)
+#	legend("top", legend = c("False Discovery ", "Precision", "Recall",
+#		"False Negative"), cex = cex.legend, fill = col, bty = "n", horiz = TRUE)
+#	text(96, -0.5, "< higher precision - underestimate <=> overestimate - higher recall >  ",
+#		cex = cex)
+#	invisible(res)
+#}
+
+.confusionStars <- function(x, y = NULL, stat1 = "Recall", stat2 = "Precision",
+main = NULL, col = NULL, ...)
 {
-	if (!inherits(x, "confusion"))
-		stop("'x' must be a 'confusion' object")
-	## Calculation of statistics
-	Stats <- summary(x)
-	FDR <- Stats$FDR * 100
-	Precision <- Stats$Precision * 100
-	Recall <- Stats$Recall * 100
-	FNR <- Stats$FNR * 100
-	## Order statistics according to Precision + recall
-	pos <- order(Recall + Precision, decreasing = TRUE)
-	## Results to plot
-	res <- cbind(FDR, Precision, Recall, FNR)
-	## Do the plot
-	omar <- par("mar")
-	on.exit(par(omar))
-	par(mar = mar)
-	barplot(t(res[pos, ]), horiz = TRUE, col = col, xaxt = "n", las = 1,
-		space = 0, ...)
-	## Add information
-	n <- nrow(Stats)
-	Labels <- names(attr(x, "nbr.cols"))
-	axis(2, 1:n - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
-		labels = Labels[pos])
-	title(main = main)
-	text(rep(1, n), 1:n - 0.5, paste(round(FDR[pos]), "%", sep = ""),
-		adj = c(0, 0.5), cex = cex)
-	text(FDR[pos] + Precision[pos]/2, 1:n - 0.5, paste(round(Precision[pos]),
-		"%", sep = ""), adj = c(0, 0.5), cex = cex)
-	text(FDR[pos] + Precision[pos] + Recall[pos]/2, 1:n - 0.5,
-		paste(round(Recall[pos]), "%", sep = ""), adj = c(0, 0.5), cex = cex)
-	text(rep(191, n), 1:n - 0.5, paste(round(FNR[pos]), "%", sep = ""),
-		adj = c(0, 0.5), cex = cex)
-	legend("top", legend = c("False Discovery ", "Precision", "Recall",
-		"False Negative"), cex = cex.legend, fill = col, bty = "n", horiz = TRUE)
-	text(96, -0.5, "< higher precision - underestimate <=> overestimate - higher recall >  ",
-		cex = cex)
-	invisible(res)
+    if (!inherits(x, "confusion"))
+        stop("'x' must be a 'confusion' object")
+    if (!is.null(y) && !inherits(x, "confusion"))
+        stop("'y' must be NULL or a 'confusion' object")
+	
+	SupportedStats <- c("Recall", "Precision", "Specificity",
+        "NPV", "FPR", "FNR", "FDR", "FOR")
+    if (!stat1 %in% SupportedStats)
+        stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+    if (!stat2 %in% SupportedStats)
+        stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+    Blue <- topo.colors(16)
+    Green <- terrain.colors(16)
+    Stat <- summary(x)
+    if (!is.null(y)) { # Comparison of two confusion matrices
+		Stat2 <- summary(y)
+		Data <- data.frame(Stat2[, stat1], Stat[, stat1], Stat[, stat2],
+			Stat2[, stat2])
+		Data <- rbind(Data, rep(0, 4))
+		colnames(Data) <- paste(rep(c(stat1, stat2), each = 2), c(2, 1, 1, 2))
+		if (!length(main))
+			main <- paste("Groups comparison between classifier 1 and 2\nAccuracy 1 =",
+				round(attr(Stat, "Accuracy") * 100), "%, accuracy 2 =",
+				round(attr(Stat2, "Accuracy") * 100), "%")
+		if (!length(col))
+			col <- c("green", Green[1], Blue[2], Blue[6])
+	} else { # Single confusion matrix
+		Data <- data.frame(Stat[, stat1], Stat[, stat2])
+		Data <- rbind(Data, rep(0, 2))
+		colnames(Data) <- c(stat1, stat2)
+		if (!length(main))
+			main <- paste("Groups comparison\nAccuracy =",
+				round(attr(Stat, "Accuracy") * 100), "%")
+		if (!length(col))
+			col <- c(Green[1], Blue[2])
+	}
+    rownames(Data) <- c(rownames(Stat), " ")
+    
+	## Calculate key location
+		kl <- stars(Data, draw.segments = TRUE, scale = FALSE, # key.loc = c(13, 1.5),
+			len = 0.8, main =  main, col.segments = col, plot = FALSE, ...)
+		kcoords <- c(max(kl[, 1]), min(kl[, 2]))
+		kspan <- apply(kl, 2, min) / 1.95
+	## Draw the plot	
+	res <- stars(Data, draw.segments = TRUE, scale = FALSE, key.loc = kcoords,
+		len = 0.8, main =  main, col.segments = col, ...)
+	## Draw a rectangle around key to differentiate it from the rest
+	rect(kcoords[1] - kspan[1], kcoords[2] - kspan[2], kcoords[1] + kspan[1],
+		kcoords[2] + kspan[2])
+	
+	res
 }
 
-## New graphical representation of the confusion matrix as a dendrogram
-.confusionDendro <- function (x, method = "ward")
+## Representation of the confusion matrix as a dendrogram
+.confusionDendro <- function (x, y = NULL, method = "ward")
 {
     if (!inherits(x, "confusion"))
         stop("'x' must be a 'confusion' object")
+	if (!is.null(y))
+		stop("cannot use a second classifier 'y' for this plot")	
+	
     ## Transform the confusion matrix into a symmetric matrix by adding its
 	## transposed matrix
     ConfuSim <- x + t(x)
@@ -627,88 +703,115 @@
 	return(invisible(x))
 }
 
-## Compare statistics between two classifiers for all groups
-comparisonPlot <- function (x, y, stat1 = "Recall",
-stat2 = "Precision", barplot = TRUE)
-{
-    SupportedStats <- c("Recall", "Precision", "Specificity", "NPV", "FPR",
-		"FNR", "FDR", "FOR")
-    if (!stat1 %in% SupportedStats)
-        stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
-    if (!stat2 %in% SupportedStats)
-        stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
-	
-    n <- nrow(x)
-    ## Select columns
-    xstat1 <- x[, stat1]
-    xstat2 <- x[, stat2]
-    ystat1 <- y[, stat1]
-    ystat2 <- y[, stat2]
-     
-    if (!isTRUE(as.logical(barplot))) {
-        plot(xstat1, ylim = c(-1, 1.1), ylab = paste("<==", stat2, "/",
-			stat1, "==>", sep = " "), xlab = "Groups", axes = FALSE,
-			col = "red", main = "Comparison of two statistics for two classifiers",
-			lwd = 2, cex = 1.5, pch = 3)
-        points(ystat1, pch = 4, col = "blue", lwd = 2, cex = 1.5)
-        points(-xstat2, pch = 3, col = "red", lwd = 2, cex = 1.5)
-        points(-ystat2, pch = 4, col = "blue", lwd = 2, cex = 1.5)
-        
-		## Add lines for more comprehensive interpretation
-        for (i in 1:n) abline(v = i, lty = 3, col = "lightgray")
-        abline(h = 0, lty = 1)
-        abline(h = 0.25, lty = 2)
-        abline(h = 0.5, lty = 2)
-        abline(h = 0.75, lty = 2)
-        abline(h = -0.25, lty = 2)
-        abline(h = -0.5, lty = 2)
-        abline(h = -0.75, lty = 2)                    
-        
-		## Add axes
-        axis(1, at = 1:n, labels = 1:n)
-        axis(2, at = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
-			labels = c(1, 0.75, 0.5, 0.25, 0, 0.25, 0.5, 0.75, 1))
-        
-		## Add legend
-        legend("topright", legend = c("Classifier 1", "Classifier 2"),
-			pch = c(3, 4), col = c("red", "blue"), horiz = TRUE, bg = "white",
-			cex = 0.75, pt.cex = 1.5, pt.lwd = 2)    
-    
-	} else { # Barplot
-        barplot(xstat1, ylim = c(-1.05, 1.15), axes = FALSE,
-			ylab = paste("<==", stat2, "/", stat1, "==>", sep = " "),
-            xlab = "Groups", main = "Comparison of two statistics for two classifiers")
-        barplot(-xstat2, add = TRUE, axes = FALSE)
-        
-		## Add lines for more comprehensive interpretation
-        for (i in 1:n) abline(v = i + i * 0.2 - 0.5, lty = 3, col = "lightgray")
-        abline(h = 0, lty = 1)
-        abline(h = 0.25, lty = 2)
-        abline(h = 0.5, lty = 2)
-        abline(h = 0.75, lty = 2)
-        abline(h = 1, lty = 3)
-        abline(h = -0.25, lty = 2)
-        abline(h = -0.5, lty = 2)
-        abline(h = -0.75, lty = 2)                    
-        abline(h = -1, lty = 3)
-        X <- 1:n + 1:n * 0.2 - 0.5
-        
-		## Add arrows (suppress wqrnings in case of zero length arrows)
-        suppressWarnings(arrows(x0 = X, y0 = xstat1, x1 = X, y1 = ystat1,
-			length = 0.1))
-        suppressWarnings(arrows(x0 = X, y0 = -xstat2, x1 = X, y1 = -ystat2,
-			length = 0.1))
-        
-		## Add axes
-        axis(1, at = X, labels = 1:n)
-        axis(2, at = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
-			labels = c(1, 0.75, 0.5, 0.25, 0, 0.25, 0.5, 0.75, 1))
-        
-		## Add legend
-		legend("topright", legend = c("Classifier 1", "Classifier 2"),
-			pch = c(15, 4), col = c("darkgray", "black"), horiz = TRUE,
-			bg = "white", cex = 0.75, pt.cex = 1.5, pt.lwd = 2)
-    }
-	invisible(list(xstat1 = xstat1, xstat2 = xstat2,
-		ystat1 = ystat1, ystat2 = ystat2))
-}
+#comparisonPlot <-
+#function (x, y, stat1 = "Recall", stat2 = "Precision", type = c("barplot", "p", "stars"), ...)
+#{
+#    type <- match.arg(type)
+#    res <- switch(type[1], barplot = barplot.comparison(x, y, stat1, stat2, ...),
+#        p = plot.comparison(x, y, stat1, stat2, ...),
+#        stars = stars.comparison(x, y, stat1, stat2, ...), stop("'type' must be 'barplot', 'p' or 'stars'"))
+#    invisible(res)
+#}
+#
+#stars.comparison <- function(x, y, stat1 = "Recall", stat2 = "Precision", ...)
+#{
+#    if(!inherits(x, "summary.confusion"))
+#        stop("x must be a summary.confusion object")
+#    if(!inherits(y, "summary.confusion"))
+#        stop("y must be a summary.confusion object")
+#    SupportedStats <- c("Recall", "Precision", "Specificity",
+#        "NPV", "FPR", "FNR", "FDR", "FOR")
+#    if (!stat1 %in% SupportedStats)
+#        stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    if (!stat2 %in% SupportedStats)
+#        stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    Blue <- topo.colors(16)
+#    Green <- terrain.colors(16)
+#    Data <- data.frame(y[, stat1], x[, stat1], x[, stat2], y[, stat2])
+#    rownames(Data) <- rownames(x)
+#    colnames(Data) <- c(paste(stat1, "_2", sep = ""), paste(stat1, "_1", sep = ""), paste(stat2, "_1", sep = ""), paste(stat2, "_2", sep = ""))
+#    stars(Data, draw.segments = TRUE, scale = FALSE, key.loc = c(13,1.5), len = 0.8,
+#        main = paste("Groups comparison between classifier 1 and 2", "\n", "Accuracy 1 =", round(attr(Stats, "Accuracy") *100), "%,",  "Accuracy 2 =", round(attr(Stats2, "Accuracy") *100), "%"),
+#        col.segments = c("green", Green[1], Blue[2], Blue[6]), ...)
+#}
+
+#barplot.comparison <- function(x, y, stat1 = "Recall", stat2 = "Precision", ...){
+#    SupportedStats <- c("Recall", "Precision", "Specificity",
+#        "NPV", "FPR", "FNR", "FDR", "FOR")
+#    if (!stat1 %in% SupportedStats)
+#        stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    if (!stat2 %in% SupportedStats)
+#        stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    n <- nrow(x)
+#    xstat1 <- x[, stat1]
+#    xstat2 <- x[, stat2]
+#    ystat1 <- y[, stat1]
+#    ystat2 <- y[, stat2]
+#    barplot(xstat1, ylim = c(-1.05, 1.15), axes = FALSE,
+#            ylab = paste("<==", stat2, "/", stat1, "==>", sep = " "),
+#            xlab = "Groups", main = "Comparison of two statistics for two classifiers", ...)
+#    barplot(-xstat2, add = TRUE, axes = FALSE)
+#    for (i in 1:n) abline(v = i + i * 0.2 - 0.5, lty = 3,
+#        col = "lightgray")
+#    abline(h = 0, lty = 1)
+#    abline(h = 0.25, lty = 2)
+#    abline(h = 0.5, lty = 2)
+#    abline(h = 0.75, lty = 2)
+#    abline(h = 1, lty = 3)
+#    abline(h = -0.25, lty = 2)
+#    abline(h = -0.5, lty = 2)
+#    abline(h = -0.75, lty = 2)
+#    abline(h = -1, lty = 3)
+#    X <- 1:n + 1:n * 0.2 - 0.5
+#    suppressWarnings(arrows(x0 = X, y0 = xstat1, x1 = X,
+#        y1 = ystat1, length = 0.1))
+#    suppressWarnings(arrows(x0 = X, y0 = -xstat2, x1 = X,
+#        y1 = -ystat2, length = 0.1))
+#    axis(1, at = X, labels = 1:n)
+#    axis(2, at = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5,
+#        0.75, 1), labels = c(1, 0.75, 0.5, 0.25, 0, 0.25,
+#        0.5, 0.75, 1))
+#    legend("topright", legend = c("Classifier 1", "Classifier 2"),
+#        pch = c(15, 4), col = c("darkgray", "black"), horiz = TRUE,
+#        bg = "white", cex = 0.75, pt.cex = 1.5, pt.lwd = 2)
+#    invisible(list(xstat1 = xstat1, xstat2 = xstat2, ystat1 = ystat1,
+#        ystat2 = ystat2))
+#}
+#
+#plot.comparison <- function(x, y, stat1 = "Recall", stat2 = "Precision", ...){
+#    SupportedStats <- c("Recall", "Precision", "Specificity",
+#        "NPV", "FPR", "FNR", "FDR", "FOR")
+#    if (!stat1 %in% SupportedStats)
+#        stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    if (!stat2 %in% SupportedStats)
+#        stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+#    n <- nrow(x)
+#    xstat1 <- x[, stat1]
+#    xstat2 <- x[, stat2]
+#    ystat1 <- y[, stat1]
+#    ystat2 <- y[, stat2]
+#    plot(xstat1, ylim = c(-1, 1.1), ylab = paste("<==", stat2,
+#        "/", stat1, "==>", sep = " "), xlab = "Groups", axes = FALSE,
+#        col = "red", main = "Comparison of two statistics for two classifiers",
+#        lwd = 2, cex = 1.5, pch = 3, ...)
+#    points(ystat1, pch = 4, col = "blue", lwd = 2, cex = 1.5)
+#    points(-xstat2, pch = 3, col = "red", lwd = 2, cex = 1.5)
+#    points(-ystat2, pch = 4, col = "blue", lwd = 2, cex = 1.5)
+#    for (i in 1:n) abline(v = i, lty = 3, col = "lightgray")
+#    abline(h = 0, lty = 1)
+#    abline(h = 0.25, lty = 2)
+#    abline(h = 0.5, lty = 2)
+#    abline(h = 0.75, lty = 2)
+#    abline(h = -0.25, lty = 2)
+#    abline(h = -0.5, lty = 2)
+#    abline(h = -0.75, lty = 2)
+#    axis(1, at = 1:n, labels = 1:n)
+#    axis(2, at = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5,
+#        0.75, 1), labels = c(1, 0.75, 0.5, 0.25, 0, 0.25,
+#        0.5, 0.75, 1))
+#    legend("topright", legend = c("Classifier 1", "Classifier 2"),
+#        pch = c(3, 4), col = c("red", "blue"), horiz = TRUE,
+#        bg = "white", cex = 0.75, pt.cex = 1.5, pt.lwd = 2)
+#    invisible(list(xstat1 = xstat1, xstat2 = xstat2, ystat1 = ystat1,
+#        ystat2 = ystat2))
+#}

Modified: pkg/mlearning/R/mlearning.R
===================================================================
--- pkg/mlearning/R/mlearning.R	2012-08-02 08:49:02 UTC (rev 227)
+++ pkg/mlearning/R/mlearning.R	2012-08-02 19:23:08 UTC (rev 228)
@@ -252,7 +252,7 @@
 }
 
 predict.mlearning <- function(object, newdata,
-type = c("class", "member", "both"), scale = TRUE, na.action = na.exclude, ...)
+type = c("class", "member", "both"), na.action = na.exclude, ...)
 {
 	## Not usable for unsupervised type
 	if (attr(object, "type") == "unsupervised")
@@ -308,8 +308,7 @@
 		levels <- levels(object)
 		return(list(class = .expandFactor(factor(as.character(classes),
 			levels = levels), n, ndrop),
-			member = .expandMatrix(.membership(members, levels = levels,
-			scale = scale), n, ndrop)))
+			member = .expandMatrix(.membership(members, levels = levels), n, ndrop)))
 	} else {
 		res <- predict(object, newdata = newdata, type = pred.type[type], ...)
 	}
@@ -318,8 +317,8 @@
 	res <- switch(type,
 		class = .expandFactor(factor(as.character(res), levels = levels(object)),
 			n, ndrop),
-		member = .expandMatrix(.membership(res, levels = levels(object),
-			scale = scale), n, ndrop),
+		member = .expandMatrix(.membership(res, levels = levels(object)),
+			n, ndrop),
 		switch(class(res)[1],
 			factor = .expandFactor(res, n, ndrop),
 			matrix = .expandMatrix(res, n, ndrop),
@@ -328,6 +327,91 @@
 	res
 }
 
+cvpredict <- function (object, ...)
+	UseMethod("cvpredict")
+
+cvpredict.mlearning <- function(object, type = c("class", "member", "both"),
+cv.k = 10, cv.strat = TRUE, ...)
+{
+	type <- switch(attr(object, "type"),
+		regression = "class", # Another way to ignore 'type' for regressions
+		classification = as.character(type)[1],
+		stop("works only for classification or regression mlearning objects"))
+	
+	if (type == "class") {
+		predictions <- TRUE
+		getmodels <- FALSE
+	} else if (type == "member") {
+		predictions <- FALSE
+		getmodels <- TRUE
+	} else if (type == "both") {
+		predictions <- TRUE
+		getmodels <- TRUE
+	} else stop("type must be 'class', 'member' or 'both'")
+	
+	## Create data, using numbers are rownames
+	data <- data.frame(.response. = response(object), train(object))
+	rn <- rownames(data)
+	rownames(data) <- 1:NROW(data)
+	
+	## The predict() method with ... arguments added to the call
+	constructPredict <- function (...) {
+		fun <- function (object, newdata) return()
+		body(fun) <- as.call(c(list(substitute(predict),
+			object = substitute(object), newdata = substitute(newdata)), list(...)))
+		fun
+	}
+	Predict <- constructPredict(...)
+	
+	## Perform cross-validation or bootstrap for prediction
+	args <- attr(object, "args")
+	if (!is.list(args)) args <- list()
+	args$formula <- substitute(.response. ~ .)
+	args$data <- substitute(data)
+	args$model <- substitute(mlearning)
+	args$method <- attr(object, "method")
+	args$predict <- substitute(Predict)
+	args$estimator <- "cv"
+	args$est.para <- control.errorest(predictions = predictions,
+		getmodels = getmodels, k = cv.k, strat = cv.strat)
+	est <- do.call(errorest, args)
+	
+	## Only class
+	if (type == "class") {
+		res <- est$predictions
+	} else {
+		## Need to calculate member
+		predMember <- function (x, object, ...)
+			suppressWarnings(predict(x, newdata =
+				train(object)[-as.numeric(rownames(train(x))), ], ...))
+	
+		## Apply predict on all model and collect results together
+		member <- lapply(est$models, predMember, object = object, type = "member",
+			na.action = na.exclude, ...)
+	
+		## Concatenate results
+		member <- do.call(rbind, member)
+	
+		## Sort in correct order and replace initial rownames
+		ord <- as.numeric(rownames(member))
+		rownames(member) <- rn[ord]
+		member <- member[order(ord), ]
+	
+		if (type == "member") res <- member else
+			res <- list(class = est$predictions, member = member)	
+	}
+	
+	## Add est object as "method" attribute, without predictions or models
+	est$name <- "cross-validation"
+	est$predictions <- NULL
+	est$models <- NULL
+	est$call <- match.call()
+	est$strat <- cv.strat
+	attr(res, "method") <- est
+	
+	res
+}
+
 ## Note: ldahist() in MASS (when only one LD) seems to be broken!
 mlLda <- function (...)
 	UseMethod("mlLda")
@@ -343,7 +427,9 @@
 	if (!is.factor(response))
 		stop("only factor response (classification) accepted for mlLda")
 
-	.args. <- list(...)$.args.
+	dots <- list(...)
+	.args. <- dots$.args.
+	dots$.args. <- NULL
 	if (!length(.args.)) .args. <- list(levels = levels(response),
 		n = c(intial = NROW(train), final = NROW(train)),
 		type = "classification", na.action = "na.pass",
@@ -352,7 +438,7 @@
 	## Return a mlearning object
 	structure(MASS:::lda.default(x = sapply(train, as.numeric),
 		grouping = response, ...), formula = .args.$formula, train = train,
-		response = response, levels = .args.$levels, n = .args.$n,
+		response = response, levels = .args.$levels, n = .args.$n, args = dots,
 		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
 		pred.type = c(class = "class", member = "posterior", projection = "x"),
 		summary = NULL, na.action = .args.$na.action,
@@ -362,9 +448,8 @@
 }
 
 predict.mlLda <- function(object, newdata,
-type = c("class", "member", "both", "projection"), scale = TRUE,
-prior = object$prior, dimension,
-method = c("plug-in", "predictive", "debiased"), ...)
+type = c("class", "member", "both", "projection"), prior = object$prior,
+dimension, method = c("plug-in", "predictive", "debiased"), ...)
 {
 	if (!inherits(object, "mlLda"))
 		stop("'object' must be a 'mlLda' object")
@@ -405,11 +490,10 @@
 	## Rework results according to what we want
 	switch(as.character(type)[1],
 		class = factor(as.character(res$class), levels = levels(object)),
-		member = .membership(res$posterior, levels = levels(object),
-			scale = scale),
+		member = .membership(res$posterior, levels = levels(object)),
 		both = list(class = factor(as.character(res$class),
 			levels = levels(object)), member = .membership(res$posterior,
-			levels = levels(object), scale = scale)),
+			levels = levels(object))),
 		projection = res$x,
 		stop("unrecognized 'type' (must be 'class', 'member', 'both' or 'projection')"))
 }
@@ -428,7 +512,9 @@
 	if (!is.factor(response))
 		stop("only factor response (classification) accepted for mlQda")
 
-	.args. <- list(...)$.args.
+	dots <- list(...)
+	.args. <- dots$.args.
+	dots$.args. <- NULL
 	if (!length(.args.)) .args. <- list(levels = levels(response),
 		n = c(intial = NROW(train), final = NROW(train)),
 		type = "classification", na.action = "na.pass",
@@ -437,7 +523,7 @@
 	## Return a mlearning object
 	structure(MASS:::qda.default(x = sapply(train, as.numeric),
 		grouping = response, ...), formula = .args.$formula, train = train,
-		response = response, levels = .args.$levels, n = .args.$n,
+		response = response, levels = .args.$levels, n = .args.$n, args = dots,
 		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
 		pred.type = c(class = "class", member = "posterior"),
 		summary = NULL, na.action = .args.$na.action,
@@ -484,11 +570,10 @@
 	## Rework results according to what we want
 	switch(as.character(type)[1],
 		class = factor(as.character(res$class), levels = levels(object)),
-		member = .membership(res$posterior, levels = levels(object),
-			scale = scale),
+		member = .membership(res$posterior, levels = levels(object)),
 		both = list(class = factor(as.character(res$class),
 			levels = levels(object)), member = .membership(res$posterior,
-			levels = levels(object), scale = scale)),
+			levels = levels(object))),
 		stop("unrecognized 'type' (must be 'class', 'member' or 'both')"))
 }
 
@@ -516,7 +601,9 @@
 mlRforest.default <- function (train, response, ntree = 500, mtry,
 replace = TRUE, classwt = NULL, ...)
 {
-	.args. <- list(...)$.args.
+	dots <- list(...)
+	.args. <- dots$.args.
+	dots$.args. <- NULL
 	if (!length(.args.)) {
 		if (!length(response)) {
 			type <- "unsupervised"
@@ -528,6 +615,9 @@
 		type = type, na.action = "na.pass",
 		mlearning.call = match.call(), method = "mlRforest")
 	}
+	dots$ntree <- ntree
+	dots$replace <- replace
+	dots$classwt <- classwt
 	
 	## Return a mlearning object
 	if (missing(mtry) || !length(mtry)) {
@@ -535,13 +625,14 @@
 		y = response, ntree = ntree, replace = replace,
 		classwt = classwt, ...)
 	} else {
+		dots$mtry <- mtry
 		res <- randomForest:::randomForest.default(x = train,
 		y = response, ntree = ntree, mtry = mtry, replace = replace,
 		classwt = classwt, ...)
 	}
 	 
 	structure(res, formula = .args.$formula, train = train,
-		response = response, levels = .args.$levels, n = .args.$n,
+		response = response, levels = .args.$levels, n = .args.$n, args = dots,
 		optim = .args.$optim, numeric.only = FALSE, type = .args.$type,
 		pred.type = c(class = "response", member = "prob", vote ="vote"),
 		summary = NULL, na.action = .args.$na.action,
@@ -551,13 +642,14 @@
 }
 
 predict.mlRforest <- function(object, newdata,
-type = c("class", "member", "both", "vote"),
-scale = TRUE, norm.votes = FALSE, oob = FALSE, ...) {
+type = c("class", "member", "both", "vote"), norm.votes = FALSE,
+method = c("direct", "oob"), ...)
+{
 	type <- as.character(type)[1]
 	
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/zooimage -r 228


More information about the Zooimage-commits mailing list