[Zooimage-commits] r192 - pkg/zooimage/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Sep 1 15:55:51 CEST 2010
Author: kevin
Date: 2010-09-01 15:55:50 +0200 (Wed, 01 Sep 2010)
New Revision: 192
Modified:
pkg/zooimage/R/ZIClass.R
pkg/zooimage/R/ZIConf.R
pkg/zooimage/R/ZITrain.R
pkg/zooimage/R/gui.R
Log:
- ZIClass.R: variables selection changed to select variables to avoid for the model
- ZIConf.R: add confuPlot function to plot a new version fo the confusion matrix and confuMatStats to print all statistics calculated by group
- ZITrain.R: Add.Vign modified with the new code of ZooImagev2
- gui.R: change in function to allow the new added functionnalities
Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R 2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZIClass.R 2010-09-01 13:55:50 UTC (rev 192)
@@ -210,30 +210,41 @@
}
# Formula calculation by variables selection for the classifier creation v1.2-2
-FormVarsSelect <- function (x)
+FormVarsSelect <- function (ZITrain)
{
- # x must be a ZItrain object
- if (!inherits(x, "ZITrain"))
- stop("'x' must be a 'ZITrain' object")
+ # ZITrain must be a ZItrain object
+ if (!inherits(ZITrain, "ZITrain"))
+ stop("'ZITrain' must be a 'ZITrain' object")
# Parameters measured on particles and new variables calculated
- mes <- as.vector(colnames(calc.vars(x)))
+ mes <- as.vector(colnames(calc.vars(ZITrain)))
# Selection of features for the creation of the classifier
- keep <- select.list(list = mes, preselect = c("ECD", "FIT_Area_ABD",
- "FIT_Diameter_ABD", "FIT_Volume_ABD", "FIT_Diameter_ESD",
- "FIT_Volume_ESD", "FIT_Length", "FIT_Width", "FIT_Aspect_Ratio",
- "FIT_Transparency", "FIT_Intensity", "FIT_Sigma_Intensity",
- "FIT_Sum_Intensity", "FIT_Compactness", "FIT_Elongation",
- "FIT_Perimeter", "FIT_Convex_Perimeter", "FIT_Roughness",
- "FIT_Ch1_Peak", "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF",
- "Area", "Mean", "StdDev", "Mode", "Min", "Max", "Perim.", "Width",
- "Height", "Major", "Minor", "Circ.", "Feret", "IntDen", "Median",
- "Skew", "Kurt", "Elongation", "CentBoxD", "GrayCentBoxD", "CentroidsD",
- "Range", "MeanPos", "SDNorm", "CV", "logArea", "logPerim.", "logMajor",
- "logMinor", "logFeret"),
- multiple = TRUE, title = "Select variables to keep")
+# keep <- select.list(list = mes, preselect = c("ECD", "FIT_Area_ABD",
+# "FIT_Diameter_ABD", "FIT_Volume_ABD", "FIT_Diameter_ESD",
+# "FIT_Volume_ESD", "FIT_Length", "FIT_Width", "FIT_Aspect_Ratio",
+# "FIT_Transparency", "FIT_Intensity", "FIT_Sigma_Intensity",
+# "FIT_Sum_Intensity", "FIT_Compactness", "FIT_Elongation",
+# "FIT_Perimeter", "FIT_Convex_Perimeter", "FIT_Roughness",
+# "FIT_Ch1_Peak", "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF",
+# "Area", "Mean", "StdDev", "Mode", "Min", "Max", "Perim.", "Width",
+# "Height", "Major", "Minor", "Circ.", "Feret", "IntDen", "Median",
+# "Skew", "Kurt", "Elongation", "CentBoxD", "GrayCentBoxD", "CentroidsD",
+# "Range", "MeanPos", "SDNorm", "CV", "logArea", "logPerim.", "logMajor",
+# "logMinor", "logFeret"),
+# multiple = TRUE, title = "Select variables to keep")
+ Notkeep <- select.list(list = mes, preselect = c("Id", "FIT_Cal_Const", "Item", "FIT_Raw_Area", "FIT_Raw_Feret_Max",
+ "FIT_Raw_Feret_Min", "FIT_Raw_Feret_Mean", "FIT_Raw_Perim", "FIT_Raw_Convex_Perim", "FIT_Feret_Max_Angle",
+ "FIT_Feret_Min_Angle", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue", "FIT_PPC", "FIT_Ch3_Peak", "FIT_Ch3_TOF",
+ "FIT_Ch4_Peak", "FIT_Ch4_TOF", "FIT_SaveX", "FIT_SaveY", "FIT_PixelW", "FIT_PixelH", "FIT_CaptureX",
+ "FIT_CaptureY", "FIT_Edge_Gradient", "FIT_Timestamp1", "FIT_Timestamp2", "FIT_Source_Image", "FIT_Calibration_Image",
+ "FIT_High_U32", "FIT_Low_U32", "FIT_Total", "FIT_Red_Green_Ratio", "FIT_Blue_Green_Ratio", "FIT_Red_Blue_Ratio",
+ "FIT_Ch2_Ch1_Ratio" , "X.Item.1", "X", "Y", "XM", "YM", "BX", "BY", "Width", "Height", "Angle", "XStart", "YStart",
+ "Count", "Label", "Dil", "Class"),
+ multiple = TRUE, title = "Select variables to avoid")
+
# Creation of one formula for classifier calculation
+ keep <- mes[!mes %in% Notkeep]
res <- as.formula(paste("Class ~ ", paste(keep, collapse= "+")))
return(res)
}
Modified: pkg/zooimage/R/ZIConf.R
===================================================================
--- pkg/zooimage/R/ZIConf.R 2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZIConf.R 2010-09-01 13:55:50 UTC (rev 192)
@@ -248,3 +248,153 @@
cex = cex)
return(invisible(res))
}
+
+# Modif K. Denis
+# Graphical representation of the confusion matrix
+"confuPlot" <- function (manual, automatic, label = "manual \\ auto",
+sort = "complete", cex = 1, left.mar = 10, colfun = NULL,
+ncols = 41, col0 = FALSE, grid.col = "gray", asp = 1, ...)
+{
+ # Default color function
+ rwb.colors <- function (n, alpha = 1, gamma = 1, s = 0.9, v = 0.9)
+ {
+ if ((n <- as.integer(n[1L])) <= 0) return(character(0L))
+ # Define the initial (red) and final (blue) colors with white in between
+ cols <- c(hsv(0, s, v, gamma, alpha), # Red
+ hsv(0, 0, v, gamma, alpha), # White
+ hsv(2/3, s, v, gamma, alpha)) # Blue
+ # Use a color ramp from red to white to blue
+ return(colorRampPalette(cols)(n))
+ }
+ if (is.null(colfun)) colfun <- rwb.colors
+
+ # Calculate margins
+ mar <- c(3, left.mar, 3, 3) + 0.1
+ # Check manual and automatic
+ if (missing(automatic) || is.null(automatic)) {
+ # This must be a ZIClass object, or something equivalent
+ automatic <- attr(manual, "kfold.predict")
+ manual <- attr(manual, "classes")
+ }
+ # Get levels
+ manuLev <- levels(manual)
+ autoLev <- levels(automatic)
+ if (!identical(manuLev, autoLev))
+ stop("Factor levels for 'manu' and 'auto' must be the same!")
+ l <- length(manuLev)
+ # Calculate confusion matrix
+ confu <- table(manual, automatic)
+ # Do we sort items?
+ if (!is.null(sort) && !is.na(sort) && sort != FALSE && sort != "") {
+ # Grouping of items
+ confuSim <- confu + t(confu)
+ confuSim <- max(confuSim) - confuSim
+ confuDist <- structure(confuSim[lower.tri(confuSim)], Size = l, Diag = FALSE,
+ Upper = FALSE, method = "confusion", call = "", class = "dist")
+ order <- hclust(confuDist, method = sort)$order
+ confu <- confu[order, order]
+ autoLev <- autoLev[order]
+ manuLev <- manuLev[order]
+ }
+ # Recode levels so that a number is used in front of manu labels
+ # and shown in auto
+ autoLev <- formatC(1:length(autoLev), width = 2, flag = "0")
+ manuLev <- paste(manuLev, autoLev, sep = "-")
+ row.names(confu) <- manuLev
+ colnames(confu) <- autoLev
+ # Calculate colors (use a transfo to get 0, 1, 2, 3, 4, 7, 10, 15, 25+)
+ confuCol <- confu
+ confuCol <- log(confuCol + .5) * 2.33
+ confuCol[confuCol < 0] <- if (isTRUE(col0)) 0 else NA
+ confuCol[confuCol > 10] <- 10
+ # Negative values (in blue) on the diagonal (correct IDs)
+ diag(confuCol) <- -diag(confuCol)
+ # Make an image of this matrix
+ omar <- par(no.readonly = TRUE)
+ on.exit(par(mar = omar))
+ par(mar = mar, cex = cex)
+ image(1:l, 1:l, -t(confuCol[nrow(confuCol):1, ]), zlim = c(-10, 10), asp = asp, bty = "n",
+ col = colfun(ncols), xaxt = "n", yaxt = "n", xlab = "", ylab = "", main = "")
+ # Print the actual numbers
+ confuTxt <- as.character(confu[l:1, ])
+ confuTxt[confuTxt == "0"] <- ""
+ text(rep(1:l, each = l), 1:l, labels = confuTxt)
+ # The grid
+ abline(h = 0:l + 0.5, col = grid.col)
+ abline(v = 0:l + 0.5, col = grid.col)
+ # The axis labels
+ axis(1, 1:l, labels = autoLev, tick = FALSE, padj = 0)
+ axis(2, 1:l, labels = manuLev[l:1], tick = FALSE, las = 1, hadj = 1)
+ axis(3, 1:l, labels = autoLev, tick = FALSE) #, cex.lab = cex)
+ axis(4, 1:l, labels = autoLev[l:1], tick = FALSE, las = 1, hadj = 0)
+ # Legend at top-left
+ mar[2] <- 1.1
+ par (mar = mar, new = TRUE)
+ plot(0, 0, type = "n", xaxt = "n", yaxt = "n", bty = "n")
+ mtext(label, adj = 0, line = 1, cex = cex)
+ # Return the confusion matrix, as displayed, in text format
+ return(invisible(confu))
+}
+
+# Table with stats per groupe precision, recall, etc
+ConfMatStats <- function(ZIClass, ZIConf = NULL, sort.by = "FN"){
+ if(is.null(ZIConf)){
+ ZIConf <- ZIConf(ZIClass)
+ }
+ # True positive --> all organism on diagonal!
+ TP <- diag(ZIConf)
+ # Sum of true positive
+ SumTP <- sum(TP)
+
+ # Sum rows and columns
+ SumRow <- rowSums(ZIConf) # TP + FN
+ SumCol <- colSums(ZIConf) # TP + FP
+
+ # Out of diagonal
+ # False negative item
+ FN <- SumRow - TP
+ # False positive items
+ FP <- SumCol - TP
+
+ # Total
+ Tot <- sum(ZIConf)
+
+ # General stats
+ # Accuracy (TN + TP) / (TP + TN + FP + FN)
+ Accuracy <- SumTP / Tot * 100
+ Error <- 100 - Accuracy
+
+ # Stats by group
+
+ # Proportion of false negative
+ FalseNeg <- FN / SumRow * 100
+
+ # Proportion of false positive
+ FalsePos <- FP / SumCol * 100
+
+ # Recall = True positive rate = Sensitivity = Probability of detection = TP / (TP + FN)
+ Recall <- TP / (TP + FN)
+
+ # Precision = TP / (TP + FP)
+ Precision <- TP / (TP + FP)
+
+ # Specificity = 1 - FP = TN / (TN + FP)
+ TN <- numeric()
+ for(i in 1:length(TP)){
+ TN[i] <- SumTP - TP[i]
+ }
+ Specificity = TN / (TN + FP) # 100 - FalsePos
+
+ # Bias
+ Bias <- SumCol - SumRow
+
+ res <- data.frame(FN = round(FalseNeg, digit = 3), FP = round(FalsePos, digit = 3),
+ Recall = round(Recall, digit = 3), Precision = round(Precision, digit = 3), SumTS = SumRow, SumPred = SumCol, Bias = Bias)
+
+ # Sort the table in function of one parameter by default FN
+ res <- res[order(res[, sort.by]), ]
+
+ attr(res, "GeneralStats") <- c(Accuracy = Accuracy, Error = Error)
+ cat(paste("Accuracy:", round(Accuracy, digits = 2), "%", "\n", "Error:", round(Error, digits = 2), "%", "\n"))
+ return(res)
+}
Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R 2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZITrain.R 2010-09-01 13:55:50 UTC (rev 192)
@@ -278,12 +278,12 @@
{
# Check if selected zid files are already classified in the training set
Rdata <- list.files(train, pattern = ".RData")
- Rdata_New <- paste(gsub(".zid", "", basename(zidfiles)),
- "_dat1.RData", sep = "")
+ Rdata_New <- paste(gsub(".zid", "", basename(zidfiles)), "_dat1.RData", sep = "")
NewZid <- Rdata_New %in% Rdata
+
if (length(unique(!NewZid)) == 1) {
# All new or existing zid files
- if (unique(NewZid) == TRUE) # All zids are already in the training set
+ if (isTRUE(unique(NewZid))) # All zids are already in the training set
stop("All selected zid files are already included in the training set")
} else {
# Select only new zid files
@@ -291,32 +291,41 @@
cat("You have selected only", length(zidfiles),
"new zid files. The others are already used in the training set")
}
+
# Extract vignettes to a new subdir in '_' and Rdata to the parental directory
# Create the new directory
NewDir <- "_/New_Vign_1"
+
# Check if the new directory name already exists
if (file.exists(file.path(train, NewDir))) {
Vign_lst <- dir(file.path(train, "_"), pattern = "New_Vign_")
NewDir <- paste("_/New_Vign_", (length(Vign_lst)+1), sep = "")
}
+
+ # Check if NewDir exist
+ ToPath <- file.path(dir, NewDir)
+ if(!file.exists(ToPath)){
+ # Create this directory
+ force.dir.create(ToPath)
+ }
+
zmax <- length(zidfiles)
# Extract Rdata in the root directory
for (i in 1:zmax) {
logProcess("data", zidfiles[i])
Progress(i, zmax)
- # Unzip data (*.RData files) there
- cmd <- paste('"', ZIpgm("unzip", "misc"), '" -jqq "', zidfiles[i],
- '" *.RData -d "', dir = train, '"', sep = "")
- system(cmd, show.output.on.console = TRUE, invisible = TRUE)
+ # Using a temporary directory to unzip all files and then copy
+ # the RData files to the train directory
+ td <- tempfile()
+ unzip(zipfile = zidfiles[i], exdir = td, overwrite = FALSE)
+ datafiles <- file.path(td, list.files(td,
+ pattern = extensionPattern(".RData"), recursive = TRUE))
+ if (length(datafiles)) file.copy(datafiles, dir)
+ vignettes <- file.path(td, list.files(td,
+ pattern = extensionPattern(".jpg"), recursive = TRUE))
+ if (length(vignettes)) file.copy(vignettes, file.path(ToPath, basename(vignettes)))
+ unlink(td, recursive = TRUE)
}
- # Extract vignettes in the new directory
- for (i in 1:zmax) {
- logProcess("vignettes", zidfiles[i])
- Progress(i, zmax)
- # Unzip vignettes (*.jpg files) there
- cmd <- paste('"', ZIpgm("unzip", "misc"), '" -jqq "', zidfiles[i],
- '" *.jpg -d "', file.path(train, NewDir), '"', sep = "")
- system(cmd, show.output.on.console = TRUE, invisible = TRUE)
- }
+ ClearProgress()
cat("-- Done --\n")
}
Modified: pkg/zooimage/R/gui.R
===================================================================
--- pkg/zooimage/R/gui.R 2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/gui.R 2010-09-01 13:55:50 UTC (rev 192)
@@ -703,9 +703,12 @@
{
# Analyze a classifier, using a ZI1Class object (new version)
# Ask for an option of analysis
- defval <- "Confusion matrix"
- opts <- c("Print", "Plot (simple)", "Plot (with tree)",
- "Precision/recall")
+# Modif K. Denis
+ defval <- "Print Confusion Matrix"
+# opts <- c("Print", "Plot (simple)", "Plot (with tree)",
+# "Precision/recall")
+ opts <- c("Print Confusion Matrix", "Plot Confusion Matrix", "Print Precision/recall",
+ "Plot Precision/recall")
# Then, show the dialog box
res <- modalAssistant(paste(getTemp("ZIClass"), "Analyze a classifier"),
c("This is a simplified version of the analysis of classifiers",
@@ -722,10 +725,15 @@
if (is.null(ZIC)) stop("No current classifier. Please, make one first!")
ZIC <- get(ZIC, envir = .GlobalEnv)
conf <- ZIConf(ZIC)
+# switch(res,
+# `Print` = print(conf),
+# `Plot (simple)` = plot(conf, type = "image"),
+# `Plot (with tree)` = plot(conf, type = "tree_image"),
+# `Precision/recall` = plot(conf, type = "precision_recall"))
switch(res,
- `Print` = print(conf),
- `Plot (simple)` = plot(conf, type = "image"),
- `Plot (with tree)` = plot(conf, type = "tree_image"),
+ `Print Confusion Matrix` = print(conf),
+ `Plot Confusion Matrix` = confuPlot(ZIC),
+ `Print Precision/recall` = print(ConfMatStats(ZIConf = conf)),
`Precision/recall` = plot(conf, type = "precision_recall"))
return(invisible(res))
}
More information about the Zooimage-commits
mailing list