[Zooimage-commits] r192 - pkg/zooimage/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Sep 1 15:55:51 CEST 2010


Author: kevin
Date: 2010-09-01 15:55:50 +0200 (Wed, 01 Sep 2010)
New Revision: 192

Modified:
   pkg/zooimage/R/ZIClass.R
   pkg/zooimage/R/ZIConf.R
   pkg/zooimage/R/ZITrain.R
   pkg/zooimage/R/gui.R
Log:
- ZIClass.R: variables selection changed to select variables to avoid for the model
- ZIConf.R: add confuPlot function to plot a new version fo the confusion matrix and confuMatStats to print all statistics calculated by group
- ZITrain.R: Add.Vign modified with the new code of ZooImagev2
- gui.R: change in function to allow the new added functionnalities

Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R	2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZIClass.R	2010-09-01 13:55:50 UTC (rev 192)
@@ -210,30 +210,41 @@
 }
 
 # Formula calculation by variables selection for the classifier creation v1.2-2
-FormVarsSelect <- function (x)
+FormVarsSelect <- function (ZITrain)
 {
-	# x must be a ZItrain object
-	if (!inherits(x, "ZITrain"))
-		stop("'x' must be a 'ZITrain' object")
+	# ZITrain must be a ZItrain object
+	if (!inherits(ZITrain, "ZITrain"))
+		stop("'ZITrain' must be a 'ZITrain' object")
 
 	# Parameters measured on particles and new variables calculated
-	mes <- as.vector(colnames(calc.vars(x)))
+	mes <- as.vector(colnames(calc.vars(ZITrain)))
 
 	# Selection of features for the creation of the classifier
-	keep <- select.list(list = mes, preselect = c("ECD", "FIT_Area_ABD",
-		"FIT_Diameter_ABD", "FIT_Volume_ABD", "FIT_Diameter_ESD",
-		"FIT_Volume_ESD", "FIT_Length", "FIT_Width", "FIT_Aspect_Ratio",
-		"FIT_Transparency", "FIT_Intensity", "FIT_Sigma_Intensity",
-		"FIT_Sum_Intensity", "FIT_Compactness", "FIT_Elongation",
-		"FIT_Perimeter", "FIT_Convex_Perimeter", "FIT_Roughness",
-		"FIT_Ch1_Peak", "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF",
-		"Area", "Mean", "StdDev", "Mode", "Min", "Max", "Perim.", "Width",
-		"Height", "Major", "Minor", "Circ.", "Feret", "IntDen", "Median",
-		"Skew", "Kurt", "Elongation", "CentBoxD", "GrayCentBoxD", "CentroidsD",
-		"Range", "MeanPos", "SDNorm", "CV", "logArea", "logPerim.", "logMajor",
-		"logMinor", "logFeret"),
-		multiple = TRUE, title = "Select variables to keep")
+#	keep <- select.list(list = mes, preselect = c("ECD", "FIT_Area_ABD",
+#		"FIT_Diameter_ABD", "FIT_Volume_ABD", "FIT_Diameter_ESD",
+#		"FIT_Volume_ESD", "FIT_Length", "FIT_Width", "FIT_Aspect_Ratio",
+#		"FIT_Transparency", "FIT_Intensity", "FIT_Sigma_Intensity",
+#		"FIT_Sum_Intensity", "FIT_Compactness", "FIT_Elongation",
+#		"FIT_Perimeter", "FIT_Convex_Perimeter", "FIT_Roughness",
+#		"FIT_Ch1_Peak", "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF",
+#		"Area", "Mean", "StdDev", "Mode", "Min", "Max", "Perim.", "Width",
+#		"Height", "Major", "Minor", "Circ.", "Feret", "IntDen", "Median",
+#		"Skew", "Kurt", "Elongation", "CentBoxD", "GrayCentBoxD", "CentroidsD",
+#		"Range", "MeanPos", "SDNorm", "CV", "logArea", "logPerim.", "logMajor",
+#		"logMinor", "logFeret"),
+#		multiple = TRUE, title = "Select variables to keep")
+	Notkeep <- select.list(list = mes, preselect = c("Id", "FIT_Cal_Const", "Item", "FIT_Raw_Area", "FIT_Raw_Feret_Max",
+		"FIT_Raw_Feret_Min", "FIT_Raw_Feret_Mean", "FIT_Raw_Perim", "FIT_Raw_Convex_Perim", "FIT_Feret_Max_Angle",
+		"FIT_Feret_Min_Angle", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue", "FIT_PPC", "FIT_Ch3_Peak", "FIT_Ch3_TOF",
+		"FIT_Ch4_Peak", "FIT_Ch4_TOF", "FIT_SaveX", "FIT_SaveY", "FIT_PixelW", "FIT_PixelH", "FIT_CaptureX",         
+		"FIT_CaptureY", "FIT_Edge_Gradient", "FIT_Timestamp1", "FIT_Timestamp2", "FIT_Source_Image", "FIT_Calibration_Image",
+		"FIT_High_U32", "FIT_Low_U32", "FIT_Total", "FIT_Red_Green_Ratio", "FIT_Blue_Green_Ratio", "FIT_Red_Blue_Ratio",   
+		"FIT_Ch2_Ch1_Ratio" , "X.Item.1", "X", "Y", "XM", "YM", "BX", "BY", "Width", "Height", "Angle", "XStart", "YStart",
+		"Count",  "Label", "Dil", "Class"),
+		multiple = TRUE, title = "Select variables to avoid")
+	
 	# Creation of one formula for classifier calculation
+	keep <- mes[!mes %in% Notkeep]
 	res <- as.formula(paste("Class ~ ", paste(keep, collapse= "+")))
 	return(res)
 }

Modified: pkg/zooimage/R/ZIConf.R
===================================================================
--- pkg/zooimage/R/ZIConf.R	2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZIConf.R	2010-09-01 13:55:50 UTC (rev 192)
@@ -248,3 +248,153 @@
 		cex = cex)
 	return(invisible(res))
 }
+
+# Modif K. Denis
+# Graphical representation of the confusion matrix
+"confuPlot" <- function (manual, automatic, label = "manual \\ auto",
+sort = "complete", cex = 1, left.mar = 10, colfun = NULL,
+ncols = 41, col0 = FALSE, grid.col = "gray", asp = 1, ...)
+{
+	# Default color function
+	rwb.colors <- function (n, alpha = 1, gamma = 1, s = 0.9, v = 0.9)
+	{
+		if ((n <- as.integer(n[1L])) <= 0) return(character(0L))
+		# Define the initial (red) and final (blue) colors with white in between
+		cols <- c(hsv(0, s, v, gamma, alpha),   # Red
+				  hsv(0, 0, v, gamma, alpha),   # White
+				  hsv(2/3, s, v, gamma, alpha)) # Blue
+		# Use a color ramp from red to white to blue
+		return(colorRampPalette(cols)(n))
+	}
+	if (is.null(colfun)) colfun <- rwb.colors
+	
+	# Calculate margins
+	mar <- c(3, left.mar, 3, 3) + 0.1
+	# Check manual and automatic
+	if (missing(automatic) || is.null(automatic)) {
+		# This must be a ZIClass object, or something equivalent
+		automatic <- attr(manual, "kfold.predict")
+		manual <- attr(manual, "classes")
+	}	
+	# Get levels
+	manuLev <- levels(manual)
+	autoLev <- levels(automatic)
+	if (!identical(manuLev, autoLev))
+		stop("Factor levels for 'manu' and 'auto' must be the same!")
+	l <- length(manuLev)
+	# Calculate confusion matrix
+	confu <- table(manual, automatic)
+	# Do we sort items?
+	if (!is.null(sort) && !is.na(sort) && sort != FALSE && sort != "") {
+		# Grouping of items
+		confuSim <- confu + t(confu)
+		confuSim <- max(confuSim) - confuSim
+		confuDist <- structure(confuSim[lower.tri(confuSim)], Size = l, Diag = FALSE,
+			Upper = FALSE, method = "confusion", call = "", class = "dist")
+		order <- hclust(confuDist, method = sort)$order
+		confu <- confu[order, order]
+		autoLev <- autoLev[order]
+		manuLev <- manuLev[order]
+	}
+	# Recode levels so that a number is used in front of manu labels
+	# and shown in auto
+	autoLev <- formatC(1:length(autoLev), width = 2, flag = "0")
+	manuLev <- paste(manuLev, autoLev, sep = "-")
+	row.names(confu) <- manuLev
+	colnames(confu) <- autoLev
+	# Calculate colors (use a transfo to get 0, 1, 2, 3, 4, 7, 10, 15, 25+)
+	confuCol <- confu
+	confuCol <- log(confuCol + .5) * 2.33
+	confuCol[confuCol < 0] <- if (isTRUE(col0)) 0 else NA
+	confuCol[confuCol > 10] <- 10
+	# Negative values (in blue) on the diagonal (correct IDs)
+	diag(confuCol) <- -diag(confuCol)	
+	# Make an image of this matrix
+	omar <- par(no.readonly = TRUE)
+	on.exit(par(mar = omar))
+	par(mar = mar, cex = cex)
+	image(1:l, 1:l, -t(confuCol[nrow(confuCol):1, ]), zlim = c(-10, 10), asp = asp, bty = "n",
+		col = colfun(ncols), xaxt = "n", yaxt = "n", xlab = "", ylab = "", main = "")
+	# Print the actual numbers
+	confuTxt <- as.character(confu[l:1, ])
+	confuTxt[confuTxt == "0"] <- ""
+	text(rep(1:l, each = l), 1:l, labels = confuTxt)
+	# The grid
+	abline(h = 0:l + 0.5, col = grid.col)
+	abline(v = 0:l + 0.5, col = grid.col)
+	# The axis labels
+	axis(1, 1:l, labels = autoLev, tick =  FALSE, padj = 0)
+	axis(2, 1:l, labels = manuLev[l:1], tick =  FALSE, las = 1, hadj = 1)
+	axis(3, 1:l, labels = autoLev, tick =  FALSE) #, cex.lab = cex)
+	axis(4, 1:l, labels = autoLev[l:1], tick =  FALSE, las = 1, hadj = 0)
+	# Legend at top-left
+	mar[2] <- 1.1
+	par (mar = mar, new = TRUE)
+	plot(0, 0, type = "n", xaxt = "n", yaxt = "n", bty = "n")
+	mtext(label, adj = 0, line = 1, cex = cex)
+	# Return the confusion matrix, as displayed, in text format
+	return(invisible(confu))
+}
+
+# Table with stats per groupe precision, recall, etc
+ConfMatStats <- function(ZIClass, ZIConf = NULL, sort.by = "FN"){
+    if(is.null(ZIConf)){
+        ZIConf <- ZIConf(ZIClass)
+    }
+    # True positive --> all organism on diagonal!
+    TP <- diag(ZIConf)
+    # Sum of true positive
+    SumTP <- sum(TP)
+    
+    # Sum rows and columns
+    SumRow <- rowSums(ZIConf) # TP + FN
+    SumCol <- colSums(ZIConf) # TP + FP
+    
+    # Out of diagonal
+    # False negative item
+    FN <- SumRow - TP
+    # False positive items
+    FP <- SumCol - TP
+    
+    # Total
+    Tot <- sum(ZIConf)
+    
+    # General stats
+    # Accuracy (TN + TP) / (TP + TN + FP + FN)
+    Accuracy <- SumTP / Tot * 100
+    Error <- 100 - Accuracy
+    
+    # Stats by group
+
+    # Proportion of false negative
+    FalseNeg <- FN / SumRow * 100
+
+    # Proportion of false positive
+    FalsePos <- FP / SumCol * 100
+
+    # Recall = True positive rate = Sensitivity = Probability of detection = TP / (TP + FN)
+    Recall <- TP / (TP + FN)
+
+    # Precision = TP / (TP + FP)
+    Precision <- TP / (TP + FP)
+
+    # Specificity = 1 - FP = TN / (TN + FP)
+    TN <- numeric()
+    for(i in 1:length(TP)){
+        TN[i] <- SumTP - TP[i]
+    }
+    Specificity = TN / (TN + FP) # 100 - FalsePos
+
+    # Bias
+    Bias <- SumCol - SumRow
+    
+    res <- data.frame(FN = round(FalseNeg, digit = 3), FP = round(FalsePos, digit = 3),
+        Recall = round(Recall, digit = 3), Precision = round(Precision, digit = 3), SumTS = SumRow, SumPred = SumCol, Bias = Bias)
+    
+    # Sort the table in function of one parameter by default FN
+    res <- res[order(res[, sort.by]), ]
+    
+    attr(res, "GeneralStats") <- c(Accuracy = Accuracy, Error = Error)
+    cat(paste("Accuracy:", round(Accuracy, digits = 2), "%", "\n", "Error:", round(Error, digits = 2), "%", "\n"))
+    return(res)
+}

Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R	2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/ZITrain.R	2010-09-01 13:55:50 UTC (rev 192)
@@ -278,12 +278,12 @@
 {
 	# Check if selected zid files are already classified in the training set
 	Rdata <- list.files(train, pattern = ".RData")
-	Rdata_New <- paste(gsub(".zid", "", basename(zidfiles)),
-		"_dat1.RData", sep = "")
+	Rdata_New <- paste(gsub(".zid", "", basename(zidfiles)), "_dat1.RData", sep = "")
 	NewZid <- Rdata_New %in% Rdata
+	
 	if (length(unique(!NewZid)) == 1) {
 		# All new or existing zid files
-		if (unique(NewZid) == TRUE) # All zids are already in the training set
+		if (isTRUE(unique(NewZid))) # All zids are already in the training set
 			stop("All selected zid files are already included in the training set")
 	} else {
 		# Select only new zid files
@@ -291,32 +291,41 @@
 		cat("You have selected only", length(zidfiles),
 			"new zid files. The others are already used in the training set")
 	}
+	
 	# Extract vignettes to a new subdir in '_' and Rdata to the parental directory
 	# Create the new directory
 	NewDir <- "_/New_Vign_1"
+	
 	# Check if the new directory name already exists
 	if (file.exists(file.path(train, NewDir))) {
 		Vign_lst <- dir(file.path(train, "_"), pattern = "New_Vign_")
 		NewDir <- paste("_/New_Vign_", (length(Vign_lst)+1), sep = "")
 	}
+	
+	# Check if NewDir exist
+	ToPath <- file.path(dir, NewDir)
+	if(!file.exists(ToPath)){
+		# Create this directory
+		force.dir.create(ToPath)
+	}
+	
 	zmax <- length(zidfiles)
 	# Extract Rdata in the root directory
 	for (i in 1:zmax) {
 		logProcess("data", zidfiles[i])
 		Progress(i, zmax)
-		# Unzip data (*.RData files) there
-		cmd <- paste('"', ZIpgm("unzip", "misc"), '" -jqq "', zidfiles[i],
-			'" *.RData -d "', dir = train, '"', sep = "")
-		system(cmd, show.output.on.console = TRUE, invisible = TRUE)
+		# Using a temporary directory to unzip all files and then copy
+		# the RData files to the train directory
+		td <- tempfile()
+		unzip(zipfile = zidfiles[i], exdir = td, overwrite = FALSE)
+		datafiles <- file.path(td, list.files(td,
+			pattern = extensionPattern(".RData"), recursive = TRUE))
+		if (length(datafiles)) file.copy(datafiles, dir)
+		vignettes <- file.path(td, list.files(td,
+			pattern = extensionPattern(".jpg"), recursive = TRUE))
+		if (length(vignettes)) file.copy(vignettes, file.path(ToPath, basename(vignettes)))
+		unlink(td, recursive = TRUE)
 	}
-	# Extract vignettes in the new directory
-	for (i in 1:zmax) {
-		logProcess("vignettes", zidfiles[i])
-		Progress(i, zmax)
-		# Unzip vignettes (*.jpg files) there
-		cmd <- paste('"', ZIpgm("unzip", "misc"), '" -jqq "', zidfiles[i],
-			'" *.jpg -d "', file.path(train, NewDir), '"', sep = "")
-		system(cmd, show.output.on.console = TRUE, invisible = TRUE)
-	}
+	ClearProgress()
 	cat("-- Done --\n")
 }

Modified: pkg/zooimage/R/gui.R
===================================================================
--- pkg/zooimage/R/gui.R	2010-08-25 10:25:35 UTC (rev 191)
+++ pkg/zooimage/R/gui.R	2010-09-01 13:55:50 UTC (rev 192)
@@ -703,9 +703,12 @@
 {
 	# Analyze a classifier, using a ZI1Class object (new version)
 	# Ask for an option of analysis
- 	defval <- "Confusion matrix"
-	opts <- c("Print", "Plot (simple)", "Plot (with tree)",
-		"Precision/recall")
+# Modif K. Denis
+ 	defval <- "Print Confusion Matrix"
+#	opts <- c("Print", "Plot (simple)", "Plot (with tree)",
+#		"Precision/recall")
+	opts <- c("Print Confusion Matrix", "Plot Confusion Matrix", "Print Precision/recall",
+		"Plot Precision/recall")
 	# Then, show the dialog box
  	res <- modalAssistant(paste(getTemp("ZIClass"), "Analyze a classifier"),
 		c("This is a simplified version of the analysis of classifiers",
@@ -722,10 +725,15 @@
 	if (is.null(ZIC)) stop("No current classifier. Please, make one first!")
 	ZIC <- get(ZIC, envir = .GlobalEnv)
 	conf <- ZIConf(ZIC)
+#	switch(res,
+#		`Print` = print(conf),
+#		`Plot (simple)` = plot(conf, type = "image"),
+#		`Plot (with tree)` = plot(conf, type = "tree_image"),
+#		`Precision/recall` = plot(conf, type = "precision_recall"))
 	switch(res,
-		`Print` = print(conf),
-		`Plot (simple)` = plot(conf, type = "image"),
-		`Plot (with tree)` = plot(conf, type = "tree_image"),
+		`Print Confusion Matrix` = print(conf),
+		`Plot Confusion Matrix` = confuPlot(ZIC),
+		`Print Precision/recall` = print(ConfMatStats(ZIConf = conf)),
 		`Precision/recall` = plot(conf, type = "precision_recall"))
 	return(invisible(res))
 }



More information about the Zooimage-commits mailing list