[Zooimage-commits] r229 - in pkg/mlearning: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Aug 5 01:42:46 CEST 2012
Author: phgrosjean
Date: 2012-08-05 01:42:45 +0200 (Sun, 05 Aug 2012)
New Revision: 229
Modified:
pkg/mlearning/DESCRIPTION
pkg/mlearning/NAMESPACE
pkg/mlearning/R/confusion.R
pkg/mlearning/R/mlearning.R
pkg/mlearning/man/confusion.Rd
pkg/mlearning/man/mlearning.Rd
pkg/mlearning/man/mlearning.package.Rd
Log:
Further changes, mainly to confusion plots
Modified: pkg/mlearning/DESCRIPTION
===================================================================
--- pkg/mlearning/DESCRIPTION 2012-08-02 19:23:08 UTC (rev 228)
+++ pkg/mlearning/DESCRIPTION 2012-08-04 23:42:45 UTC (rev 229)
@@ -1,18 +1,14 @@
Package: mlearning
Type: Package
-Title: Machine learning algorithms with unified formula interface and confusion matrices
+Title: Machine learning algorithms with unified interface and confusion matrices
Version: 1.0-0
-Date: 2012-07-18
+Date: 2012-08-04
Author: Ph. Grosjean & K. Denis
Maintainer: Ph. Grosjean <Philippe.Grosjean at umons.ac.be>
Depends: R (>= 2.14.0)
-Imports: dataframe, stats, nnet, class, MASS, e1071, randomForest, RWeka, ipred, grDevices
-Suggests: rJava, RWekajars, mlbench, datasets, RColorBrewer
-Description: This package provides alternate interface to various machine
- learning algorithms in order to offer a unified, formula-based, interface.
- However, given the caveats of the formula interface in R (leading to
- multiple copies of the data), if a simplified formula is provided (like
- Class ~ .), a more performant code is used, if possible. Confusion matrices
- can be calculated and viewed as tables or plots, too.
+Imports: stats, grDevices, class, nnet, MASS, e1071, randomForest, ipred
+Suggests: mlbench, datasets, RColorBrewer
+Description: This package provides a unified interface to various machine
+ learning algorithms. Confusion matrices are provided too.
License: GPL (>= 2)
URL: http://www.sciviews.org/zooimage
\ No newline at end of file
Modified: pkg/mlearning/NAMESPACE
===================================================================
--- pkg/mlearning/NAMESPACE 2012-08-02 19:23:08 UTC (rev 228)
+++ pkg/mlearning/NAMESPACE 2012-08-04 23:42:45 UTC (rev 229)
@@ -1,18 +1,15 @@
-import(dataframe)
+#import(dataframe)
import(stats)
+import(grDevices)
import(class)
import(nnet)
import(MASS)
import(e1071)
import(randomForest)
-import(RWeka)
import(ipred)
-#import(RColorBrewer)
-#importFrom(gplots, heatmap.2)
-import(grDevices)
+#import(klaR)
+#import(RWeka)
-#importFrom(gdata, combine)
-
export(mlearning)
export(mlLda)
export(mlQda)
@@ -20,7 +17,7 @@
export(mlLvq)
export(mlNnet)
export(mlNaiveBayes)
-export(mlNaiveBayesWeka)
+#export(mlNaiveBayesWeka)
export(cvpredict)
export(summary.lvq)
@@ -28,7 +25,13 @@
export(response)
export(train)
+export("weights<-")
+
export(confusion)
+export(confusionImage)
+export(confusionBarplot)
+export(confusionStars)
+export(confusionDendrogram)
S3method(confusion, default)
S3method(confusion, mlearning)
@@ -47,6 +50,9 @@
S3method(response, default)
S3method(train, default)
+S3method(weights, confusion)
+S3method("weights<-", confusion)
+
S3method(summary, lvq)
S3method(print, summary.lvq)
@@ -56,7 +62,7 @@
S3method(mlLvq, default)
S3method(mlNnet, default)
S3method(mlNaiveBayes, default)
-S3method(mlNaiveBayesWeka, default)
+#S3method(mlNaiveBayesWeka, default)
S3method(mlLda, formula)
S3method(mlQda, formula)
@@ -64,7 +70,7 @@
S3method(mlLvq, formula)
S3method(mlNnet, formula)
S3method(mlNaiveBayes, formula)
-S3method(mlNaiveBayesWeka, formula)
+#S3method(mlNaiveBayesWeka, formula)
S3method(predict, mlLda)
S3method(predict, mlQda)
Modified: pkg/mlearning/R/confusion.R
===================================================================
--- pkg/mlearning/R/confusion.R 2012-08-02 19:23:08 UTC (rev 228)
+++ pkg/mlearning/R/confusion.R 2012-08-04 23:42:45 UTC (rev 229)
@@ -1,182 +1,302 @@
-## TODO: add the possibility to droplevels() in confusion object!... or in print()/plot()?
confusion <- function (x, ...)
UseMethod("confusion")
-.confusion <- function (classes, labels, ...)
+.confusion <- function (classes, labels, weights, ...)
{
- if (!length(labels)) {
- labels <- c("Predicted", "Actual")
- } else {
- labels <- as.character(labels)
- if (length(labels) != 2)
- stop("You must provide exactly 2 character strings for 'labels'")
- }
- ## Make sure both variables are correctly named
- names(classes) <- labels
- ## How many objects by level?
- NbrPerClass1 <- table(classes[, 1])
- ## How many predicted objects
- NbrPerClass2 <- table(classes[, 2])
- ## Confusion matrix
- Conf <- table(classes)
- ## Further stats: total, true positives, accuracy
- Total <- sum(Conf)
- TruePos <- sum(diag(Conf))
- Stats <- c(total = Total, truepos = TruePos,
- accuracy = TruePos / Total * 100)
-
- ## Change labels to get a more compact presentation
- colnames(Conf) <- formatC(1:ncol(Conf), digits = 1, flag = "0")
- rownames(Conf) <- paste(colnames(Conf), rownames(Conf))
-
+ res <- table(classes, dnn = labels)
+ total <- sum(res)
+ truePos <- sum(diag(res))
+ row.freqs <- rowSums(res)
+
## Additional data as attributes
- attr(Conf, "stats") <- Stats
- attr(Conf, "nbr.rows") <- NbrPerClass1
- attr(Conf, "nbr.cols") <- NbrPerClass2
+ attr(res, "row.freqs") <- row.freqs
+ attr(res, "col.freqs") <- colSums(res)
+ attr(res, "levels") <- levels(classes[1, ]) # These are *initial* levels!
+ ## Final levels may differ if there are empty levels, or NAs!
+ attr(res, "weights") <- row.freqs # Initial weights are row.freqs
+ attr(res, "stats") <- c(total = total, truepos = truePos,
+ error = 1 - (truePos / total))
- ## This is a confusion object
- class(Conf) <- c("confusion", "table")
- Conf
+ ## This is a confusion object, inheriting from table
+ class(res) <- c("confusion", "table")
+
+ ## Do we reweight the confusion matrix?
+ if (!missing(weights)) weights(res) <- weights
+
+ res
}
confusion.default <- function (x, y = NULL, vars = c("Actual", "Predicted"),
-labels = vars, merge.by = "Id", ...)
+labels = vars, merge.by = "Id", weights, ...)
{
## If the object is already a 'confusion' object, return it
if (inherits(x, "confusion")) {
if (!missing(y))
- warning("you cannot provide 'y' when 'x' is already a 'confusion' object")
+ warning("you cannot provide 'y' when 'x' is a 'confusion' object")
+ ## Possibly reweight it
+ if (!missing(weights)) weights(x) <- weights
return(x)
}
## Idem if there is a 'confusion' attribute and no y
conf <- attr(x, "confusion")
- if (!is.null(conf) && missing(y)) return(conf)
-
- ## Check/convert vars and labels
- if (!length(vars)) {
- vars <- c("Class", "Ident")
- } else {
- vars <- as.character(vars)
- if (length(vars) != 2)
- stop("You must provide exactly 2 strings for 'vars'")
- }
+ if (!is.null(conf) && missing(y)) {
+ ## Possibly reweight it
+ if (!missing(weights)) weights(conf) <- weights
+ return(conf)
+ }
+
+ ## Reworks and check arguments
+ vars <- as.character(vars)
+ if (length(vars) != 2)
+ stop("You must provide exactly 2 strings for 'vars'")
merge.by <- as.character(merge.by)
## There are three possibilities:
- ## 1) a single data frame => use vars
+ ## 1) A single data frame => use vars
if (missing(y)) {
## Special case of a data frame or list of two factors: keep as it is
if (is.list(x) && length(x) == 2 && is.null(vars)) {
clCompa <- as.data.frame(x)
- labels <- names(clCompa)
+ if (missing(labels)) labels <- names(clCompa)
} else {
x <- as.data.frame(x)
- ## Check that vars exist and levels of two vars do match
+ ## Check that vars exist
if (is.null(names(x)) || !all(vars %in% names(x)))
stop("'vars' are not among column names of 'x'")
- if (!all(sort(levels(x[[vars[1]]])) == sort(levels(x[[vars[2]]]))))
- stop("the levels of the two variables in 'x' do not match")
+ ## Check that levels of two vars do match
+ lev1 <- levels(x[[vars[1]]])
+ lev2 <- levels(x[[vars[2]]])
+ if (!all(lev1 == lev2)) {
+ ## If difference is only in the order of both levels, reorder #2
+ if (!all(sort(lev1) == sort(lev2))) {
+ stop("levels of the two variables in 'x' do not match")
+ } else x[[vars[2]]] <- factor(as.character(x[[vars[2]]]),
+ levels = lev1)
+ }
clCompa <- data.frame(class1 = x[[vars[1]]], class2 = x[[vars[2]]])
}
} else { # y is provided
- ## 2) two vectors of factors to compare (must have same length/same levels)
+ ## 2) Two vectors of factors (must have same length/same levels)
if (is.factor(x) && is.factor(y)) {
+ ## Check length match
if (length(x) != length(x))
- stop("not the same number of items in 'x' and 'y'")
- if (!all(sort(levels(x)) == sort(levels(y))))
- stop("'x' and 'y' levels do not match")
+ stop("lengths of 'x' and 'y' are not the same")
+ ## Check levels match
+ lev1 <- levels(x)
+ lev2 <- levels(y)
+ if (!all(lev1 == lev2)) {
+ ## If difference is only in the order of both levels, reorder #2
+ if (!all(sort(lev1) == sort(lev1))) {
+ stop("'x' and 'y' levels do not match")
+ } else y <- factor(as.character(y), levels = lev1)
+ }
clCompa <- data.frame(class1 = y, class2 = x)
} else {
- ## 3) two data frames => merge first, then use vars
- ## Check levels match
- ## Note: if one is a subset of the other,
- ## would it be possible to match them???
+ ## 3) Two data frames => merge first, then use vars
+ ## Check vars exist
if (is.null(names(x)) || !(vars[1] %in% names(x)))
stop("first item of 'vars' is not among names of 'x'")
if (is.null(names(y)) || !(vars[2] %in% names(y)))
stop("second item of 'vars' is not among names of 'y'")
- if (!all(sort(levels(x[[vars[1]]])) == sort(levels(y[[vars[2]]]))))
- stop("levels of the two variables in 'x' and 'y' do not match")
+ ## Check that levels of two vars do match
+ lev1 <- levels(x[[vars[1]]])
+ lev2 <- levels(y[[vars[2]]])
+ if (!all(lev1 == lev2)) {
+ ## If difference is only in the order of both levels, reorder #2
+ if (!all(sort(lev1) == sort(lev2))) {
+ stop("levels of the variables in 'x' and 'y' do not match")
+ } else x[[vars[2]]] <- factor(as.character(x[[vars[2]]]),
+ levels = lev1)
+ }
## Merge data according to merge.by
- clCompa <- merge(y[, c(vars[2], merge.by)], x[, c(vars[1], merge.by)],
- by = merge.by)
- clCompa <- clCompa[, c(ncol(clCompa) - 1, ncol(clCompa))]
+ clCompa <- merge(y[, c(vars[2], merge.by)],
+ x[, c(vars[1], merge.by)], by = merge.by)
+ nc <- ncol(clCompa)
+ clCompa <- clCompa[, c(nc - 1, nc)]
## Are there common objects left?
- if (nrow(clCompa) == 0)
- stop("no common objects between 'x' and 'y'")
+ if (!nrow(clCompa)) stop("no common objects between 'x' and 'y'")
}
}
- .confusion(clCompa, labels, ...)
+ ## Construct the confusion object
+ if (missing(weights)) {
+ .confusion(classes = clCompa, labels = labels, ...)
+ } else {
+ .confusion(classes = clCompa, labels = labels, weights = weights, ...)
+ }
}
confusion.mlearning <- function (x, y = response(x),
-labels = c("Actual", "Predicted"), ...)
- .confusion(data.frame(class1 = y, class2 = predict(x, ...)),
- labels = labels, ...)
+labels = c("Actual", "Predicted"), weights, ...) {
+ ## Check labels
+ labels <- as.character(labels)
+ if (length(labels) != 2)
+ stop("You must provide exactly 2 character strings for 'labels'")
+
+ ## Extract class2 by using predict on the mlearning object
+ class2 <- predict(x, ...)
+
+ ## Check that both variables are of same length and same levels
+ if (length(y) != length(class2))
+ stop("lengths of 'x' and 'y' are not the same")
+ lev1 <- levels(y)
+ lev2 <- levels(class2)
+ if (!all(lev1 == lev2)) {
+ ## If difference is only in the order of both levels, reorder #2
+ if (!all(sort(lev1) == sort(lev2))) {
+ stop("levels of 'x' and 'y' do not match")
+ } else class2 <- factor(as.character(class2), levels = lev1)
+ }
+
+ ## Construct the confusion object
+ if (missing(weights)) {
+ .confusion(data.frame(class1 = y, class2 = class2),
+ labels = labels, ...)
+ } else {
+ .confusion(data.frame(class1 = y, class2 = class2),
+ labels = labels, weights = weights, ...)
+ }
+}
-print.confusion <- function (x, error.col = TRUE, ...)
+weights.confusion <- function (object, ...)
+ attr(object, "weights")
+
+`weights<-`<- function (object, ..., value)
+ UseMethod("weights<-")
+
+`weights<-.confusion`<- function (object, ..., value)
{
+ if (!length(value)) { # value is NULL or of zero length
+ ## Reset weights to original frequencies
+ value <- attr(object, "row.freqs")
+ attr(object, "weights") <- value
+ round(object / apply(object, 1, sum) * value)
+
+ } else if (is.numeric(value)) { # value is numeric
+
+ if (length(value) == 1) { # value is a single number
+ if (is.na(value) || !is.finite(value) || value <= 0)
+ stop("value must be a finite positive number")
+ res <- object / apply(object, 1, sum) * as.numeric(value)
+
+ } else { # value is a vector of numerics
+ ## It must be either of the same length as nrow(object) or of
+ ## levels(objects)
+ l <- length(value)
+ n <- names(value)
+ l2 <- levels(object)
+
+ if (l == nrow(object)) {
+ ## If the vector is named, check names and possibly reorder it
+ if (length(n))
+ if (all(n %in% rownames(object))) {
+ value <- value[rownames(object)]
+ } else stop("Names of the values do not match levels in the confusion matrix")
+
+ } else if (l == length(l2)) {
+ ## Assume names as levels(object), if they are not provides
+ if (!length(n)) names(value) <- n <- l2
+
+ ## If the vector is named, check names match levels
+ if (length(n))
+ if (all(n %in% l2)) {
+ ## Extract levels used in the confusion matrix
+ value <- value[rownames(object)]
+ } else stop("Names of the values do not match levels in the confusion matrix")
+
+ } else stop("length of 'value' do not match the number of levels in the confusion matrix")
+
+ res <- object / apply(object, 1, sum) * as.numeric(value)
+ }
+ attr(res, "weights") <- rowSums(res)
+ res
+
+ } else stop("value must be a numeric vector, a single number or NULL")
+}
+
+print.confusion <- function (x, sums = TRUE, error.col = sums, digits = 0,
+sort = "ward", ...)
+{
## General stats on the confusion matrix
Stats <- attr(x, "stats")
+ Error <- round(Stats["error"] * 100, 1)
cat(Stats["total"], " items classified with ", Stats["truepos"],
- " true positives (", round(Stats["accuracy"], 1), "% accuracy)\n",
+ " true positives (error rate = ", Error, "%)\n",
sep = "")
+ row.freqs <- attr(x, "row.freqs")
+ if (!all(attr(x, "weights") == row.freqs)) {
+ cat("with initial row weights (frequencies):\n")
+ print(row.freqs)
+ cat("Reweighted to:\n")
+ }
## Print the confusion matrix itself
X <- x
class(X) <- "table"
+
+ n <- ncol(X)
+
+ ## Do we sort items?
+ if (length(sort) && !is.na(sort) && sort != FALSE && sort != "") {
+ ## Grouping of items
+ confuSim <- X + t(X)
+ confuSim <- 1 - (confuSim / sum(confuSim) * 2)
+ confuDist <- structure(confuSim[lower.tri(confuSim)], Size = n,
+ Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
+ class = "dist")
+ order <- hclust(confuDist, method = sort)$order
+ X <- X[order, order]
+ }
+
+ ## Change row and column names to a more compact representation
+ nbrs <- formatC(1:ncol(X), digits = 1, flag = "0")
+ colnames(X) <- nbrs
+ rownames(X) <- paste(nbrs, rownames(X))
+
+ ## Add sums?
+ if (isTRUE(as.logical(sums))) {
+ ## Calculate error (%)
+ ErrorTot <- (1 - (sum(diag(x)) / sum(x))) * 100
+ Errors <- as.integer(round(c((1 - diag(X) / apply(X, 1, sum)) * 100,
+ ErrorTot), 0))
+ ## ... and add row and column sums
+ X <- addmargins(X, FUN = list(`(sum)` = sum), quiet = TRUE)
+ } else Errors <- as.integer(round((1 - diag(X) / apply(X, 1, sum)) * 100, 0))
+
+ ## Add class errors?
if (isTRUE(as.logical(error.col))) {
- print(cbind(X, `Error (FNR)` = round((1 - diag(X) / apply(X, 1, sum)), 3)))
- } else print(X)
+ X <- as.table(cbind(X, `(FNR%)` = Errors))
+ dn <- dimnames(X)
+ names(dn) <- names(dimnames(x))
+ dimnames(X) <- dn
+ }
+ print(round(X, digits))
## Return the original object invisibly
invisible(x)
}
+## TODO: a precision-recall diagram for all groups with F1-score lines
plot.confusion <- function (x, y = NULL,
type = c("image", "barplot", "stars", "dendrogram"), stat1 = "Recall",
-stat2 = "Precision", ...)
+stat2 = "Precision", names, ...)
{
if (is.null(y)) type <- match.arg(type)[1] else type <- "stars"
+ if (missing(names)) names <- c(substitute(x), substitute(y))
res <- switch(type,
- image = .confusionImage(x, y, ...),
- barplot = .confusionBar(x, y, ...),
- stars = .confusionStars(x, y, stat1 = stat1, stat2 = stat2, ...),
- dendrogram = .confusionDendro(x, y, ...),
+ image = confusionImage(x, y, ...),
+ barplot = confusionBarplot(x, y, ...),
+ stars = confusionStars(x, y, stat1 = stat1, stat2 = stat2, names, ...),
+ dendrogram = confusionDendrogram(x, y, ...),
stop("'type' must be 'image', 'barplot', 'stars' or 'dendrogram'"))
invisible(res)
}
-## These functions do the respective graphs for confusion objects
-## Old (simpler) version
-#.confusionImage <- function (x, y = NULL, col = heat.colors(10),
-#mar = c(5.1, 12.1, 4.1, 2.1), ...)
-#{
-# if (!inherits(x, "confusion"))
-# stop("'x' must be a 'confusion' object")
-
-# if (!is.null(y))
-# stop("cannot use a second classifier 'y' for this plot")
-# omar <- par("mar")
-# on.exit(par(omar))
-# par(mar = mar)
-# n <- ncol(x)
-# image(1:n, 1:n, 1 / (t(x[n:1, 1:n])), col = col, xlab = "", ylab = "",
-# xaxt = "n", yaxt = "n", ...)
-# axis(1, at = 1:n, las = 2)
-# axis(2, at = n:1, labels = paste(names(attr(x, "nbr.cols")), 1:n),
-# las = 1)
-# abline(h = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
-# abline(v = (1:(n + 1)) - 0.5, lty = 2, col = "gray")
-# invisible(x)
-#}
-
## Representation of the confusion matrix
-.confusionImage <- function (x, y = NULL, mar = c(3.1, 10.1, 3.1, 3.1), asp = 1,
-label = "Actual \\ Predicted", sort = "complete", cex = 1, colfun = NULL,
-ncols = 41, col0 = FALSE, grid.col = "gray", ...)
+confusionImage <- function (x, y = NULL, labels = names(dimnames(x)),
+sort = "ward", numbers = TRUE, digits = 0, mar = c(3.1, 10.1, 3.1, 3.1),
+cex = 1, asp = 1, colfun, ncols = 41, col0 = FALSE, grid.col = "gray", ...)
{
if (!inherits(x, "confusion"))
stop("'x' must be a 'confusion' object")
@@ -184,100 +304,95 @@
if (!is.null(y))
stop("cannot use a second classifier 'y' for this plot")
+ ## Default labels in case none provided
+ if (is.null(labels)) labels <- c("Actual", "Predicted")
+
## Default color function
- if (!length(colfun)) colfun <- function (n, alpha = 1, s = 0.9, v = 0.9) {
+ ## (greens for correct values, reds for errors, white for zero)
+ if (missing(colfun)) colfun <- function (n, alpha = 1, s = 0.9, v = 0.9) {
if ((n <- as.integer(n[1L])) <= 0) return(character(0L))
- ## Define the initial (red) and final (blue) colors with white in between
+ ## Initial (red) and final (green) colors with white in between
cols <- c(hsv(h = 0, s = s, v = v, alpha = alpha), # Red
hsv(h = 0, s = 0, v = v, alpha = alpha), # White
- hsv(h = 2/3, s = s, v = v, alpha = alpha)) # Blue
- ## Use a color ramp from red to white to blue
+ hsv(h = 2/6, s = s, v = v, alpha = alpha)) # Green
+ ## Use a color ramp from red to white to green
return(colorRampPalette(cols)(n))
}
- manuLev <- sub("...", "", rownames(x))
- autoLev <- manuLev
n <- ncol(x)
## Do we sort items?
if (length(sort) && !is.na(sort) && sort != FALSE && sort != "") {
## Grouping of items
confuSim <- x + t(x)
- confuSim <- max(confuSim) - confuSim
+ confuSim <- 1 - (confuSim / sum(confuSim) * 2)
confuDist <- structure(confuSim[lower.tri(confuSim)], Size = n,
Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
class = "dist")
order <- hclust(confuDist, method = sort)$order
x <- x[order, order]
- autoLev <- autoLev[order]
- manuLev <- manuLev[order]
}
- ## Recode levels so that a number is used in front of manu labels
- ## and shown in auto
- autoLev <- formatC(1:length(autoLev), width = 2, flag = "0")
- manuLev <- paste(manuLev, autoLev, sep = "-")
- row.names(x) <- manuLev
- colnames(x) <- autoLev
- ## Calculate colors (use a transfo to get 0, 1, 2, 3, 4, 7, 10, 15, 25+)
+
+ ## Recode row and column names for more compact display
+ colnames(x) <- names2 <- formatC(1:n, digits = 1, flag = "0")
+ rownames(x) <- names1 <- paste(names2, rownames(x))
+
+ ## Transform for better colorization
+ ## (use a transfo to get 0, 1, 2, 3, 4, 7, 10, 15, 25+)
confuCol <- x
confuCol <- log(confuCol + .5) * 2.33
confuCol[confuCol < 0] <- if (isTRUE(as.logical(col0))) 0 else NA
confuCol[confuCol > 10] <- 10
- ## Negative values (in blue) on the diagonal (correct IDs)
+
+ ## Negative values (in green) on the diagonal (correct IDs)
diag(confuCol) <- -diag(confuCol)
+
## Make an image of this matrix
opar <- par(no.readonly = TRUE)
on.exit(par(opar))
par(mar = mar, cex = cex)
image(1:n, 1:n, -t(confuCol[nrow(confuCol):1, ]), zlim = c(-10, 10),
asp = asp, bty = "n", col = colfun(ncols), xaxt = "n", yaxt = "n",
- xlab = "", ylab = "", main = "", ...)
- ## Print the actual numbers
- confuTxt <- as.character(x[n:1, ])
- confuTxt[confuTxt == "0"] <- ""
- text(rep(1:n, each = n), 1:n, labels = confuTxt)
- ## The grid
- abline(h = 0:n + 0.5, col = grid.col)
- abline(v = 0:n + 0.5, col = grid.col)
- ## The axis labels
- axis(1, 1:n, labels = autoLev, tick = FALSE, padj = 0)
- axis(2, 1:n, labels = manuLev[n:1], tick = FALSE, las = 1, hadj = 1)
- axis(3, 1:n, labels = autoLev, tick = FALSE) #, cex.lab = cex)
- axis(4, 1:n, labels = autoLev[n:1], tick = FALSE, las = 1, hadj = 0)
- ## Legend at top-left
- mar[2] <- 1.1
- par (mar = mar, new = TRUE)
- plot(0, 0, type = "n", xaxt = "n", yaxt = "n", bty = "n")
- mtext(label, adj = 0, line = 1, cex = cex)
+ xlab = "", ylab = "", ...)
+
+ ## Indicate the actual numbers
+ if (isTRUE(as.logical(numbers))) {
+ confuTxt <- as.character(round(x[n:1, ], digits = digits))
+ confuTxt[confuTxt == "0"] <- ""
+ text(rep(1:n, each = n), 1:n, labels = confuTxt)
+ }
+
+ ## Add the grid
+ if (length(grid.col)) {
+ abline(h = 0:n + 0.5, col = grid.col)
+ abline(v = 0:n + 0.5, col = grid.col)
+ }
+
+ ## Add the axis labels
+ axis(1, 1:n, labels = names2, tick = FALSE, padj = 0)
+ axis(2, 1:n, labels = names1[n:1], tick = FALSE, las = 1, hadj = 1)
+ axis(3, 1:n, labels = names2, tick = FALSE)
+ axis(4, 1:n, labels = names2[n:1], tick = FALSE, las = 1, hadj = 0)
+
+ ## Add labels at top-left
+ if (length(labels)) {
+ if (length(labels) != 2) stop("You must provide two labels")
+ mar[2] <- 1.1
+ par (mar = mar, new = TRUE)
+ plot(0, 0, type = "n", xaxt = "n", yaxt = "n", bty = "n")
+ mtext(paste(labels, collapse = " // "), adj = 0, line = 1, cex = cex)
+ }
+
## Return the confusion matrix, as displayed, in text format
invisible(x)
}
-## Eliminated to avoid dependency on RColorBrewer and gplots!
-#.confusionTree <- function (x, y = NULL, maxval = 10, margins = c(2, 10),
-#row.v = TRUE, col.v = TRUE, ...)
-#{
-# if (!inherits(x, "confusion"))
-# stop("'x' must be a 'confusion' object")
-# if (!is.null(y))
-# stop("cannot use a second classifier 'y' for this plot")
-#
-# nX <- nrow(x)
-# nY <- ncol(x)
-# nZ <- nX * nY
-# confmat <- pmin(x, maxval)
-# mypalette <- brewer.pal(maxval - 1, "Spectral")
-# heatmap.2(x, col= c(0, mypalette), symm = TRUE, margins = margins,
-# trace = "both", Rowv = row.v, Colv = col.v, cexRow = 0.2 + 1 / log10(nX),
-# cexCol = 0.2 + 1 / log10(nY), tracecol = "Black", linecol = FALSE, ...)
-#}
-
-# Confusion bar with recall and precision in green bar and not outside as before
-# function modified for publication hclust
-.confusionBar <- function (x, y = NULL,
+## Confusion barplot with recall and precision in green bars
+## TODO: various bar rescaling possibilities!!!
+confusionBarplot <- function (x, y = NULL,
col = c("PeachPuff2", "green3", "lemonChiffon2"), mar = c(1.1, 8.1, 4.1, 2.1),
-cex = 0.7, cex.axis = cex, cex.legend = cex, main = "Precision versus Recall",
-min.width = 17, ...)
+cex = 1, cex.axis = cex, cex.legend = cex,
+main = "F-score (precision versus recall)", numbers = TRUE, min.width = 17, ...)
{
if (!inherits(x, "confusion"))
stop("'x' must be a 'confusion' object")
@@ -285,143 +400,89 @@
if (!is.null(y))
stop("cannot use a second classifier 'y' for this plot")
- TP <- diag(x)
- fn <- rowSums(x) - TP
- fp <- colSums(x) - TP
- FN <- fn <- fn/(fn + TP)
- FP <- fp <- fp/(TP + fp)
- FP[is.na(FP)] <- 1
- tp <- 1 - fn
- fp <- tp/(1 - fp) * fp
- scale <- fn + tp + fp
- fn <- fn/scale * 100
- tp <- tp/scale * 100
- fp <- fp/scale * 100
- fn[is.na(tp)] <- 50
+ ## F-score is 2 * recall * precision / (recall + precision), ... but also
+ ## F-score = TP / (TP + FP/2 + FN/2). We represent this in a barplot
+ TP <- tp <- diag(x)
+ FP <- fp <- colSums(x) - tp
+ FN <- fn <- rowSums(x) - tp
+ ## In case we have missing data...
+ fn[is.na(tp)] <- 50
fp[is.na(tp)] <- 50
tp[is.na(tp)] <- 0
- res <- matrix(c(fp, tp, fn), ncol = 3)
- colnames(res) <- c("fp", "tp", "fn")
- Labels <- names(attr(x, "nbr.cols"))
- pos <- order(res[, 2], decreasing = TRUE)
+
+ ## We scale these values, so that the sum fp/2 + tp + fn/2 makes 100
+ scale <- fp/2 + tp + fn/2
+ res <- matrix(c(fp/2 / scale * 100, tp / scale * 100, fn/2 / scale * 100),
+ ncol = 3)
+ colnames(res) <- c("FPcontrib", "Fscore", "FNcontrib") # In %
+ Labels <- names(attr(x, "col.freqs"))
+
+ ## The graph is ordered in decreasing F-score values
+ pos <- order(res[, 2], decreasing = TRUE)
res <- res[pos, ]
FN <- FN[pos]
FP <- FP[pos]
TP <- TP[pos]
Labels <- Labels[pos]
- L <- length(FN)
- omar <- par("mar")
+ l <- length(FN)
+
+ ## Plot the graph
+ omar <- par("mar")
on.exit(par(omar))
par(mar = mar)
- ## Plot the graph
- barplot(t(res), horiz = TRUE, col = col, xaxt = "n", las = 1, space = 0, ...)
- lines(c(50, 50), c(0, L), lwd = 1)
- xpos <- res[, 1] + res[, 2]/2
- text(xpos, 1:L - 0.5, round(TP), adj = c(0.5, 0.5), cex = cex)
- # Add recall and precision if enough place to print it...
- NotPlace <- res[,"tp"] <= min.width
- if (any(NotPlace)) {
- ## Special case if not enough place to print precision and recall
- ## Add Precision
- PrecTxt <- paste(round((1 - FP) * 100), "%", sep = "")
- PrecTxt[NotPlace] <- ""
- text(res[,"fp"] + 1, 1:L - 0.5, PrecTxt, adj = c(0, 0.5), cex = cex)
- ## Add FDR
- FDTxt <- paste(round((FP) * 100), "%", sep = "")
- FDTxt[!NotPlace] <- ""
- text(rep(1, length(FP)), 1:L - 0.5, FDTxt, adj = c(0, 0.5), cex = cex)
- ## Add Recall
- RecTxt <- paste(round((1 - FN) * 100), "%", sep = "")
- RecTxt[NotPlace] <- ""
- text(res[,"fp"] + res[, "tp"] - 5, 1:L - 0.5, RecTxt, adj = c(0, 0.5),
- cex = cex)
- ## Add FN
- FNTxt <- paste(round((FN) * 100), "%", sep = "")
- FNTxt[!NotPlace] <- ""
- text(rep(99, length(FN)), 1:L - 0.5, FNTxt, adj = c(1, 0.5), cex = cex)
- } else {
- ## Add Precision
- text(res[,"fp"] + 1, 1:L - 0.5, paste(round((1 - FP) *
- 100), "%", sep = ""), adj = c(0, 0.5), cex = cex)
- ## Add Recall
- text(res[,"fp"] + res[, "tp"] - 5, 1:L - 0.5, paste(round((1 - FN) *
- 100), "%", sep = ""), adj = c(0, 0.5), cex = cex)
- }
- legend(50, L * 1.05, legend = c("False Discovery : (1 - Prec.)",
- "True Positive (TP)", "False Negative : (1 - Rec.)"), cex = cex.legend,
- xjust = 0.5, yjust = 1, fill = col, bty = "n", horiz = TRUE)
- axis(2, 1:L - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
- labels = Labels)
- title(main = main)
- text(50, -0.5, "< higher precision : TP/(TP+FP) - underestimate <=> overestimate - higher recall : TP/(TP+FN) > ",
- cex = cex)
+ ## The barplot
+ barplot(t(res), horiz = TRUE, col = col, xaxt = "n", las = 1, space = 0,
+ main = main, ...)
+ ## The line that shows where symmetry is
+ lines(c(50, 50), c(0, l), lwd = 1)
+
+ ## Do we add figures into the plot?
+ if (isTRUE(as.logical(numbers))) {
+ ## F-score is written in the middle of the central bar
+ xpos <- res[, 1] + res[, 2] / 2
+ text(xpos, 1:l - 0.5, paste("(", round(res[, 2]), "%)", sep = ""),
+ adj = c(0.5, 0.5), cex = cex)
+
+ ## Add the number of FP and FN to the left and right, respectively
+ text(rep(1, l), 1:l - 0.5, round(FP), adj = c(0, 0.5), cex = cex)
+ text(rep(99, l), 1:l - 0.5, round(FN), adj = c(1, 0.5), cex = cex)
+ }
+
+ ## Add a legend (if cex.legend is not NULL)
+ if (length(cex.legend)) {
+ legend(50, l * 1.05, legend = c("False Positives",
+ "2*TP (F-score %)", "False Negatives"), cex = cex.legend, xjust = 0.5, yjust = 1,
+ fill = col, bty = "n", horiz = TRUE)
+ }
+
+ ## Add axes if cex.axis is not NULL
+ if (length(cex.axis))
+ axis(2, 1:l - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
+ labels = Labels)
+
invisible(res)
}
-## Precision vs Recall, alternate presentation
-## Note used, but saved for now
-#.confusionBar <- function (x, y = NULL,
-#col = c("PeachPuff2", "green", "green3", "lemonChiffon2"),
-#mar = c(2.1, 8.1, 4.1, 2.1), cex = 0.7, cex.axis = cex, cex.legend = cex,
-#main = "Precision versus Recall", ...)
-#{
-# if (!inherits(x, "confusion"))
-# stop("'x' must be a 'confusion' object")
-# if (!is.null(y))
-# stop("cannot use a second classifier 'y' for this plot")
-#
-# ## Calculation of statistics
-# Stats <- summary(x)
-# FDR <- Stats$FDR * 100
-# Precision <- Stats$Precision * 100
-# Recall <- Stats$Recall * 100
-# FNR <- Stats$FNR * 100
-# ## Order statistics according to Precision + recall
-# pos <- order(Recall + Precision, decreasing = TRUE)
-# ## Results to plot
-# res <- cbind(FDR, Precision, Recall, FNR)
-# ## Do the plot
-# omar <- par("mar")
-# on.exit(par(omar))
-# par(mar = mar)
-# barplot(t(res[pos, ]), horiz = TRUE, col = col, xaxt = "n", las = 1,
-# space = 0, ...)
-# ## Add information
-# n <- nrow(Stats)
-# Labels <- names(attr(x, "nbr.cols"))
-# axis(2, 1:n - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
-# labels = Labels[pos])
-# title(main = main)
-# text(rep(1, n), 1:n - 0.5, paste(round(FDR[pos]), "%", sep = ""),
-# adj = c(0, 0.5), cex = cex)
-# text(FDR[pos] + Precision[pos]/2, 1:n - 0.5, paste(round(Precision[pos]),
-# "%", sep = ""), adj = c(0, 0.5), cex = cex)
-# text(FDR[pos] + Precision[pos] + Recall[pos]/2, 1:n - 0.5,
-# paste(round(Recall[pos]), "%", sep = ""), adj = c(0, 0.5), cex = cex)
-# text(rep(191, n), 1:n - 0.5, paste(round(FNR[pos]), "%", sep = ""),
-# adj = c(0, 0.5), cex = cex)
-# legend("top", legend = c("False Discovery ", "Precision", "Recall",
-# "False Negative"), cex = cex.legend, fill = col, bty = "n", horiz = TRUE)
-# text(96, -0.5, "< higher precision - underestimate <=> overestimate - higher recall > ",
-# cex = cex)
-# invisible(res)
-#}
-
-.confusionStars <- function(x, y = NULL, stat1 = "Recall", stat2 = "Precision",
-main = NULL, col = NULL, ...)
+## TODO: check the box around the legend
+confusionStars <- function(x, y = NULL, stat1 = "Recall", stat2 = "Precision",
+names, main, col = c("green2", "blue2", "green4", "blue4"), ...)
{
- if (!inherits(x, "confusion"))
+ ## Check objects
+ if (!inherits(x, "confusion"))
stop("'x' must be a 'confusion' object")
if (!is.null(y) && !inherits(x, "confusion"))
stop("'y' must be NULL or a 'confusion' object")
+ ## Check stats
SupportedStats <- c("Recall", "Precision", "Specificity",
"NPV", "FPR", "FNR", "FDR", "FOR")
if (!stat1 %in% SupportedStats)
- stop("stats1 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+ stop("stats1 must be one of Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
if (!stat2 %in% SupportedStats)
- stop("stats2 must be one of followed stats: Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
- Blue <- topo.colors(16)
+ stop("stats2 must be one of Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
+
+ ## Choose colors TODO: add a colors argument!
+ Blue <- topo.colors(16)
Green <- terrain.colors(16)
Stat <- summary(x)
if (!is.null(y)) { # Comparison of two confusion matrices
@@ -430,32 +491,45 @@
Stat2[, stat2])
Data <- rbind(Data, rep(0, 4))
colnames(Data) <- paste(rep(c(stat1, stat2), each = 2), c(2, 1, 1, 2))
- if (!length(main))
- main <- paste("Groups comparison between classifier 1 and 2\nAccuracy 1 =",
- round(attr(Stat, "Accuracy") * 100), "%, accuracy 2 =",
- round(attr(Stat2, "Accuracy") * 100), "%")
- if (!length(col))
- col <- c("green", Green[1], Blue[2], Blue[6])
+ if (missing(main)) { # Calculate a suitable title
+ if (missing(names)) {
+ names <- c(substitute(x), substitute(y))
+ } else if (length(names) != 2)
+ stop("you must provide two nmaes for the two compared classifiers")
+ names <- as.character(names)
+ main <- paste("Groups comparison (1 =", names[1], ", 2 =",
+ names[2], ")")
+ }
+ if (length(col) >= 4) {
+ col <- col[c(3, 1, 2, 4)]
+ } else stop("you must provide four colors for the two statistics and the two classifiers")
} else { # Single confusion matrix
Data <- data.frame(Stat[, stat1], Stat[, stat2])
Data <- rbind(Data, rep(0, 2))
colnames(Data) <- c(stat1, stat2)
- if (!length(main))
- main <- paste("Groups comparison\nAccuracy =",
- round(attr(Stat, "Accuracy") * 100), "%")
- if (!length(col))
- col <- c(Green[1], Blue[2])
+ if (missing(main))
+ main <- paste("Groups comparison")
+ if (length(col) >= 2) {
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/zooimage -r 229
More information about the Zooimage-commits
mailing list