[Zooimage-commits] r253 - in pkg/zooimage: . R inst/etc inst/gui inst/gui/errorcorrection inst/planktonSorter man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 29 15:22:00 CEST 2015


Author: phgrosjean
Date: 2015-09-29 15:21:59 +0200 (Tue, 29 Sep 2015)
New Revision: 253

Modified:
   pkg/zooimage/DESCRIPTION
   pkg/zooimage/NAMESPACE
   pkg/zooimage/NEWS
   pkg/zooimage/R/ZIClass.R
   pkg/zooimage/R/ZIRes.R
   pkg/zooimage/R/ZITrain.R
   pkg/zooimage/R/correction.R
   pkg/zooimage/R/fileutils.R
   pkg/zooimage/R/gui.R
   pkg/zooimage/R/guiutils.R
   pkg/zooimage/R/import.R
   pkg/zooimage/R/planktonSorter.R
   pkg/zooimage/R/utilities.R
   pkg/zooimage/R/zic.R
   pkg/zooimage/R/zid.R
   pkg/zooimage/R/zidb.R
   pkg/zooimage/R/zie.R
   pkg/zooimage/R/zim.R
   pkg/zooimage/R/zip.R
   pkg/zooimage/R/zis.R
   pkg/zooimage/R/zzz.R
   pkg/zooimage/inst/etc/ZooImageManual.pdf
   pkg/zooimage/inst/gui/Menus.txt
   pkg/zooimage/inst/gui/MenusZIDlgWin.txt
   pkg/zooimage/inst/gui/ToolbarsZIDlgWin.txt
   pkg/zooimage/inst/gui/errorcorrection/global.R
   pkg/zooimage/inst/gui/errorcorrection/server.R
   pkg/zooimage/inst/gui/errorcorrection/ui.R
   pkg/zooimage/inst/planktonSorter/planktonSorter.js
   pkg/zooimage/man/ZITrain.Rd
   pkg/zooimage/man/correctError.Rd
   pkg/zooimage/man/gui.Rd
   pkg/zooimage/man/import.Rd
   pkg/zooimage/man/utilities.Rd
   pkg/zooimage/man/zip.Rd
   pkg/zooimage/man/zooimage.package.Rd
Log:
Version 5.4-1 with lots of changes preparing version 6.0-0

Modified: pkg/zooimage/DESCRIPTION
===================================================================
--- pkg/zooimage/DESCRIPTION	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/DESCRIPTION	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,22 +1,25 @@
 Package: zooimage
 Type: Package
-Title: Analysis of numerical zooplankton images
-Version: 5.1-2
-Date: 2014-12-09
+Title: Analysis of Numerical Zooplankton Images
+Version: 5.4-1
+Date: 2015-09-29
 Author: Philippe Grosjean [aut, cre],
-  Kevin Denis [aut]
+  Kevin Denis [aut], Guillaume Wacquet [aut]
 Authors at R: c(person("Philippe", "Grosjean", role = c("aut", "cre"),
   email = "phgrosjean at sciviews.org"),
   person("Kevin", "Denis", role = "aut",
-  email = "kevin.denis at umons.ac.be"))
+  email = "kevin.denis at umons.ac.be"),
+  person("Guillaume", "Wacquet", role = "aut",
+  email = "guillaume.wacquet at umons.ac.be"))
 Maintainer: Philippe Grosjean <phgrosjean at sciviews.org>
 Depends: R (>= 2.14.0), svMisc (>= 0.9-67), svDialogs (>= 0.9-53), mlearning
-Imports: filehash, jpeg, png, tiff, utils, digest, tools, shiny
+Imports: filehash, jpeg, png, tiff, utils, digest, tools, MASS, mda, shiny, DT
 Suggests: rJava, mlbench
-Description: ZooImage is a free (open source) solution for analyzing digital
+Description: A free (open source) solution for analyzing digital
 	images of zooplankton. In combination with ImageJ, a free image analysis
 	system, it processes digital images, measures individuals, trains for
 	automatic classification of taxa, and finally, measures zooplankton samples
-	(abundances, total and partial size spectra or biomasses, etc.)
+	(abundances, total and partial size spectra or biomasses, etc.).
 License: GPL (>= 2)
 URL: http://www.sciviews.org/zooimage
+NeedsCompilation: no

Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/NAMESPACE	2015-09-29 13:21:59 UTC (rev 253)
@@ -21,7 +21,10 @@
 #import(RWeka)
 import(mlearning)
 #import(party)
-import(shiny)
+importFrom(MASS, lda)
+importFrom(mda, mda, gen.ridge)
+importFrom(shiny, updateSelectInput, shinyServer, reactive, isolate, renderPrint, updateTabsetPanel, renderPlot, shinyUI, fluidPage, fluidRow, sidebarPanel, selectInput, actionButton, mainPanel, tabsetPanel, tabPanel, verbatimTextOutput, plotOutput)
+importFrom(DT, dataTableOutput, renderDataTable)
 
 # planktonSorter
 export(correctError)
@@ -87,13 +90,22 @@
 # ZITrain/ZITest
 export(prepareTrain)
 export(addToTrain)
+export(compTrain)
 export(getTrain)
 export(prepareTest)
 export(addToTest)
 export(getTest)
 export(recode)
 export(template)
+export(contextSelection)
+export(addItemsToTrain)
+export(dropItemsToTrain)
 
+# ZICell
+export(cellCount)
+export(cellModel)
+export(cellCompute)
+
 # ZIRes
 export(processSample)
 export(processSampleAll)
@@ -101,7 +113,6 @@
 # Utilities
 export(addClass)
 export(calcVars)
-export(calcVarsVIS)
 export(calibrate)
 export(dropVars)
 export(ecd)
@@ -136,9 +147,11 @@
 # GUI
 export(aboutZI)
 export(acquireImg)
+export(activeLearningGUI)
 export(addVigsToTrain)
 export(analyzeClass)
 export(calib)
+export(countCellsGUI)
 export(closeAssistant)
 export(closeZooImage)
 export(collectTrain)
@@ -152,6 +165,8 @@
 export(loadObjects)
 export(makeClass)
 export(makeZid)
+export(makeZidb)
+export(makeZidbFlowCAM)
 export(makeTrain)
 export(optInOutDecimalSep)
 export(processImg)

Modified: pkg/zooimage/NEWS
===================================================================
--- pkg/zooimage/NEWS	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/NEWS	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,5 +1,89 @@
 = zooimage News
 
+== Changes in zooimage 5.4-1
+
+* Correction of various bugs by Guillaume Wacquet.
+
+
+== Changes in zooimage 5.4-0
+
+* Functions to count cells in a particle (colony): countCells() and the
+  corresponding countCellsGUI() function for an access through the menu.
+
+* Function to build predictive models for cells in particles (colonies) after
+  counting: cellModel().
+
+* Function to compute the number of cells in particles in a new sample:
+  cellCompute().
+  
+* Function to make zidb file for FlowCAM data through the menu:
+  makeZidbMakeFlowCAM().
+
+* Correct makeClass() function in the menu (missing formula).
+  
+* Functions to process active learning: contextSelection(), the associated
+  addItemsToTrain() and dropItemsToTrain() functions to complete the training
+  set with validated items, and the corresponding activeLearningGUI() function
+  for an access through the menu. Integration in the classification process.
+  
+* Function compTrain() to compare two training sets and highlight differences.
+
+* Translation of the user manual in French and update to include all changes.
+
+* Adaptation of menus to include also the new features.
+
+
+== Changes in zooimage 5.3-0
+
+* calcVarsVIS() now included in calcVars() and not accessible any more as top
+  function (to avoid duplicate code).
+
+* ecd() and ecdCell() are now merged in ecd() using arguments cells = 1.
+
+* errorCorrection() and planktonSorterPage() functions now use the new interface
+  to get R httpd server port, for R svn rev >= 67550.
+  
+* In the shiny app, dataTableOutput is now called from the DT package.
+  Dependency to DT is now added.
+  
+* The planktonSorter html page was sometimes too small, with the last group
+  being wrapped consequently. Solved by increasing the page by 1 pixel * groups.
+  
+* A bug in processSample() prevented for calculation with both 'keep=' and a
+  data frame for 'biomass=' provided simultaneously.
+
+
+== Changes in zooimage 5.2-0
+
+* importFlowCAM() now can deal with color FlowCAM images (but they are first
+  converted into grayscale because background calibration images are recorded
+  as grayscale by Visual Spreadsheet -at least, the tested version 3.2.3-, hence
+  we cannot subtract the background of the vignettes in color mode)!
+  
+* importFlowCAM() now iterates a message to indicate progression of vignettes
+  importation.
+  
+* For importFlowCAM(), the default value of the argument 'rgb.vigs' is changed
+  from 'TRUE' to 'FALSE'.
+  
+* Functions to count cells in a particle (colony): cellCount() and the
+  corresponding cellCountGUI() function for an access through the menu.
+
+* New utility function ecdCell() to calculate the ECD for one cell in a colony.
+
+## TODO:
+* Functions to build predictive models for cells in particles (colonies) after counting:   
+  cellModel().
+
+* Function to compute the number of cells in particles in a new sample: cellCompute().
+  
+
+
+* Function to make zidb file for FlowCAM data through the menu: zidbMakeFlowCAMGUI().
+
+* Correct makeClass() function in the menu (missing formula).
+
+
 == Changes in zooimage 5.1-0
 
 * calcVars()/calcVarsVIS() and dropVars() are reworked to used only FIT_xxx

Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZIClass.R	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
 ##
 ## This file is part of ZooImage
 ##

Modified: pkg/zooimage/R/ZIRes.R
===================================================================
--- pkg/zooimage/R/ZIRes.R	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZIRes.R	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
 ##
 ## This file is part of ZooImage
 ## 
@@ -103,12 +103,33 @@
 }
 
 ## Calculate abundances, biomasses and size spectra per class in a sample
+#processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both",
+#header = c("Abd", "Bio"), cells = NULL, biomass = NULL, breaks = NULL)
 processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both",
 header = c("Abd", "Bio"), biomass = NULL, breaks = NULL)
 {
 	## Fix ECD in case of FIT_VIS data
 	if ("FIT_Area_ABD" %in% names(x)) x$ECD <- ecd(x$FIT_Area_ABD)
 	
+	## Do we compute the number of cells and the ECD per cell?
+	## But see version hereunder!
+#### TODO: compute ECD using number of cells per colonies!
+####	if (!is.null(cells)) {
+####		x$Nb_cells <- computeNbCells(x, cells)
+####		x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells)
+####	}
+#### PhG: here, computation before argument checking is not good!
+#### PhG: cells points to a file. Not good! We ask for a specific object instead
+		
+	## Do we compute the number of cells and the ECD per cell?
+	## PhG: should not rely on a filehere!
+####	if (!is.null(cells) && file.exists(cells)) {
+####		## Must be a ZICell model here! predict() iterates on all items
+####		## of the list to compute cells for all classes!
+####		x$Nb_cells <- predict(cells, x)
+####		x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells)
+####}
+		
 	## Check arguments
 	if (missing(sample)) {
 		sample <- unique(sampleInfo(x$Label, type = "sample", ext = ""))
@@ -172,6 +193,7 @@
 		}
 		x <- x[x$Cl %in% keep, ] # Select keep levels
 	}
+	Cl <- as.character(x$Cl)
 	if (NROW(x) == 0) {
 		warning("no data left for this sample in 'x' when 'keep' is applied")
 		return(NULL)
@@ -211,12 +233,16 @@
 			x$P2 <- biomass[2]
 			x$P3 <- biomass[3]
 		} else stop("wrong 'biomass', must be NULL, a vector of 3 values or a data frame with Class, P1, P2 and P3")
-		if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses")
-		x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil
+		## Prefer using ECD_cells and Nb_cells if it exists
+		if (is.numeric(x$ECD_cells)) {
+			x$BioWeight <- (x$P1 * x$ECD_cells^x$P3 + x$P2) * x$Dil * x$Nb_cells
+		} else {
+			if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses")
+			x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil
+		}
 	}
 	
 	## Split among detail, if provided
-	Cl <- as.character(x$Cl)
 	if (length(detail)) {
 		# We want more details for one ore more groups...
 		detail <- as.character(detail)
@@ -225,9 +251,15 @@
 		
 		Cl[!Cl %in% detail] <- "[other]"
 		x$Cl <- Cl
-		res <- tapply(x$Dil, Cl, sum, na.rm = TRUE)
-		res <- res[c(detail, "[other]")]
-		res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE))
+		if (is.numeric(x$Nb_cells)) {
+			res <- tapply(x$Dil * x$Nb_cells, Cl, sum, na.rm = TRUE)
+			res <- res[c(detail, "[other]")]
+			res <- c(res, '[total]' = sum(x$Dil  * x$Nb_cells, na.rm = TRUE))
+		} else {
+			res <- tapply(x$Dil, Cl, sum, na.rm = TRUE)
+			res <- res[c(detail, "[other]")]
+			res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE))
+		}
 		names(res) <- paste(header[1], names(res))
 		
 		if (!missing(biomass)) {
@@ -239,7 +271,11 @@
 		}
 		
 	} else { # Total abundance (and biomass) only
-		res <- sum(x$Dil, na.rm = TRUE)
+		if (is.numeric(x$Nb_cells)) {
+			res <- sum(x$Dil * x$Nb_cells, na.rm = TRUE)
+		} else {
+			res <- sum(x$Dil, na.rm = TRUE)
+		}
 		if (!missing(biomass))
 			res <- c(res, sum(x$BioWeight, na.rm = TRUE))
 		names(res) <- paste(header, "[total]")
@@ -250,7 +286,7 @@
 	res <- structure(data.frame(Id = sample, t(res), check.names = FALSE),
 		class = c("ZI3Res", "ZIRes", "data.frame"))
 	
-	## Do we calculate size spectra?
+	## Do we calculate size spectra? (always by colonies, only)!
 	if (length(breaks)) {
 		if (!is.numeric(breaks) || length(breaks) < 2)
 			stop("'breaks' must be a vector of two or more numerics or NULL")

Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZITrain.R	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
 ##
 ## This file is part of ZooImage
 ## 
@@ -160,7 +160,7 @@
             ## Link .zidb database to R objects in memory
             Zidb <- zidbLink(zidbfiles[i])
             AllItems <- ls(Zidb)
-            Vigns <- AllItems[-grep("_dat1", AllItems)]
+            Vigns <- AllItems[!grepl("_dat1", AllItems)]
             ## Extract all vignettes in their class subdirectory
             imgext <- Zidb[[".ImageType"]]
 			## Get path for the vignettes and copy them there
@@ -244,7 +244,7 @@
 			## Link .zidb database to R objects in memory
             Zidb <- zidbLink(zidbfile)
             AllItems <- ls(Zidb)
-            Vigns <- AllItems[-grep("_dat1", AllItems)]
+            Vigns <- AllItems[!grepl("_dat1", AllItems)]
             ## Copy all vignettes in the TopPath directory
             imgext <- Zidb[[".ImageType"]]
 			## Get path for the vignettes and copy them there

Modified: pkg/zooimage/R/correction.R
===================================================================
--- pkg/zooimage/R/correction.R	2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/correction.R	2015-09-29 13:21:59 UTC (rev 253)
@@ -1,3 +1,20 @@
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
+##
+## This file is part of ZooImage
+## 
+## ZooImage is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 2 of the License, or
+## (at your option) any later version.
+## 
+## ZooImage is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
+
 ## TODO: disable next button when in last fraction
 ## TODO: save temporary and final results in the zidb file
 ## TODO: correct bugs with the back button
@@ -97,7 +114,6 @@
 	}
 }
 
-
 #### Functions used only by errorCorrection() ##################################
 ## Calculation of the global error based on validated items
 ## /!\ NOT based on items randomly selected --> Bad approximation
@@ -348,9 +364,9 @@
 ## and how to retrieve class then???
 ## Group options in a single object too
 errorCorrection <- function (data, classifier, mode = "validation",
-fraction = 0.05, sample.min = 100, grp.min = 2, random.sample = 0.1,
-algorithm = "rf", diff.max = 0.2, prop.bio = NULL,
-zidb = NULL, testdir = NULL, id = NULL,
+fraction = 0.05, sample.min = 100, sample.max = 200, grp.min = 2,
+random.sample = 0.1, algorithm = "rf", diff.max = 0.2, prop.bio = NULL,
+rare = 0.01, zidb = NULL, testdir = NULL, id = NULL,
 result = ".last_valid", envir = parent.frame()) {
 	#### Parameters explanations
 	#### Data and classifier
@@ -379,6 +395,12 @@
 	sample.min <- as.integer(sample.min)[1]
 	if (sample.min < 1)
 		stop("sample.min must be a positive integer")
+	## sample.max -- maximum number of particles to validate ate each step
+	sample.max <- as.integer(sample.max)[1]
+	if (sample.max < 1)
+		stop("sample.max must be a positive integer")
+	if (sample.max < sample.min)
+		stop("sample.max must be higher or equal than sample.min")
 	## grp.min -- minimum number of particles of each group to validate
 	grp.min <- as.integer(grp.min)[1]
 	if (grp.min < 1 || grp.min > sample.min)
@@ -390,13 +412,17 @@
 	
 	#### Parameters for the detection of suspects
 	## algorithm -- algorithm used to detect suspect particles
-	## diff.max -- maximum toalerated difference in probabilities for class identification
+	## diff.max -- maximum tolerated difference in probabilities for class identification
 	diff.max <- as.numeric(diff.max)[1]
 	if (diff.max < 0)
 		stop("diff.max must be a positive number or zero")
 	## proba.bio -- groups probabilities, using biological information
 	if (length(prop.bio) && (!is.numeric(prop.bio) || is.null(names(prop.bio))))
 		stop("prop.bio must be a named vector (groups) of numbers")
+	## rare -- detection of rare items
+    rare <- as.numeric(rare)[1]
+    if (rare < 0 || rare > 0.2)
+		stop("rare must be between 0 and 0.2")
 	
 	## zidb -- path to the zidbfile to manually validate
 	## testdir -- path of the directory used for manual validation
@@ -431,7 +457,12 @@
 	manual.history <- NULL		# history of the manual confusion matrix
 	manual2.history <- NULL		# history of manual + 2nd ident confusion matrix
 	corr.confusion <- NULL 		# corrected confusion matrix
+	classRare <- NULL			# String with the list of classes considered as rare
+	cell.confusion <- NULL   	# corrected confusion matrix for cells
+    bioweight.confusion <- NULL # corrected confusion matrix for bioweight
 	correction.history <- NULL 	# history of the correction confusion matrix
+	correctionCell.history <- NULL   # history of the correction confusion matrix for cells
+    correctionBio.history <- NULL   # history of the correction confusion matrix for bioweight
 	error.estim.data <- NULL 	# data used to estimate the error
 	error.estim <- NULL			# history of the error estimation
 	error.estim.history <- NULL # evolution of the error estimation
@@ -523,11 +554,16 @@
 		}
 		predicted2 <- secondIdent(levels(predicted))
 		
+		predTable <- table(predicted)
+		prop <- predTable / sum(predTable)
+		classRare <<- names(which(prop < rare))
+		
 		## Creation of corr object
 		corr <<- data.frame(Actual = predicted, Actual2 = predicted2,
 			Predicted = predicted, Predicted2 = predicted2, Validated = FALSE,
 			Error = error, Step = step, Suspect = rep(TRUE, nobs),
-			RdValidated = rep(Inf, nobs), OtherGp = rep(FALSE, nobs))
+			Rare = predicted %in% classRare, RdValidated = rep(Inf, nobs),
+			OtherGp = rep(FALSE, nobs))
     
 		## Confusion matrix of original classifier
 		train.validated <- attr(classifier, "response")
@@ -575,18 +611,23 @@
 	
 		## Increment step (should be 0 now, because initial value is -1)
 		step <<- step + 1
+		
 		## Determine the number of vignettes to manually validate
 		setSampleSize()
 	}
 
 	## Compute the size of the next subsample: update sample.size
 	setSampleSize <- function () {
-		sample.size <<- round(min(nrow(corr[!corr$Validated, ]), # How much remains?
-			## Or number of items we want to take
-			max(nrow(data) * fraction, # Items to take
-				sample.min, # Minimum items we can take
-				## TODO: check the following one!
-				grp.min * length(table(predicted))))) # Minimum from groups
+		## Number of items we want to take
+		sample.size <<- ceiling(nrow(data) * fraction)
+		## No less than sample.min
+		sample.size <<- max(sample.size, sample.min)
+		## According to complexity of the training set, take possibly more
+		sample.size <<- max(sample.size, grp.min * length(levels(predicted)))
+		## ... but no more than sample.max
+		sample.size <<- min(sample.size, sample.max)
+		## Or how much remains?
+		sample.size <<- min(sample.size, nrow(corr[!corr$Validated, ]))
 	}	
 	
 	## Determine the subsample to validate
@@ -628,7 +669,7 @@
 			newstep[newstep == -1] <- step
 			corr$Step[randomsample.ids] <<- newstep
 			notvalid.ids <- ids[!corr$Validated & corr$RdValidated == step]
-			## Number of items to valid in order to acheive sample.size
+			## Number of items to valid in order to achieve sample.size
 			numSample <- sample.size - length(notvalid.ids) 
 			if (numSample > 0) {
 				## Randomly select suspect items not validated
@@ -650,7 +691,63 @@
 						corr$Step[trustsample.ids] <<- step
 					}
 				}
-			} 
+			}
+					            
+      ############### stratified random sampling ############### 
+#       if (numSample > 0) {
+#     		## Select the same number of suspect items not validated in each class
+#   			suspnotval.ids <- ids[!corr$Validated & corr$Suspect &
+#   			  is.infinite(corr$RdValidated) & corr$Step == -1]
+#   		  if (length(suspnotval.ids)) {
+#   		    splitGp <- split(suspnotval.ids, list(corr[suspnotval.ids,]$Predicted))
+#   		    strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x), 
+#   		                      min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(suspnotval.ids)])))), 
+#   		                      replace = FALSE)])
+#   		    suspsample.ids <- as.numeric(do.call(c, strat.samples))
+#   			  corr$Step[suspsample.ids] <<- step
+#   			  numSample <- numSample - length(suspsample.ids)        
+#   			}
+#         
+#         if (numSample > 0) {
+#           ## If not completed, randomly select suspects items not validated
+#           suspnotval.ids <- ids[!corr$Validated & corr$Suspect &
+#                                   is.infinite(corr$RdValidated) & corr$Step == -1]
+#           if (length(suspnotval.ids)) {
+#             suspsample.ids <- as.numeric(sample(suspnotval.ids,
+#                        size = min(numSample, length(suspnotval.ids))))
+#             corr$Step[suspsample.ids] <<- step
+#             numSample <- numSample - length(suspsample.ids)        
+#           }
+#         }
+#         
+#   			if (numSample > 0) {
+#   			  ## If not completed, Select the same number of trusted items not validated in each class
+#   			  trustnotval.ids <- ids[!corr$Validated & !corr$Suspect  &
+#   			    is.infinite(corr$RdValidated) & corr$Step == -1]
+#   			  if (length(trustnotval.ids)) {
+#   			    splitGp <- split(trustnotval.ids, list(corr[trustnotval.ids,]$Predicted))
+#   			    strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x), 
+#   			                          min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(trustnotval.ids)])))), 
+#   			                          replace = FALSE)])
+#   			    trustsample.ids <- as.numeric(do.call(c, strat.samples))
+#   			    corr$Step[trustsample.ids] <<- step
+#   			    numSample <- numSample - length(trustsample.ids)
+#   			  }
+#   			}
+#         
+#   			if (numSample > 0) {
+#           ## If not completed, randomly select trusted items not validated
+#   			  trustnotval.ids <- ids[!corr$Validated & !corr$Suspect &
+#   			                          is.infinite(corr$RdValidated) & corr$Step == -1]
+#   			  if (length(trustnotval.ids)) {
+#   			    trustsample.ids <- as.numeric(sample(trustnotval.ids,
+#   			                                        size = min(numSample, length(trustnotval.ids))))
+#   			    corr$Step[trustsample.ids] <<- step
+#   			    numSample <- numSample - length(trustsample.ids)        
+#   			  }
+#   			}
+      ############### ############### ###############
+			
 			nsuspect.tovalid <- length(ids[corr$Step == step & corr$Suspect])
 			ntrusted.tovalid <- length(ids[corr$Step == step & !corr$Suspect])
 			nsuspect.history <<- c(nsuspect.history, nsuspect)
@@ -662,7 +759,11 @@
 		if (mode != "stat") {
 			## Make sure the R Httpd server is started
 			tools <- getNamespace("tools")
-			port <- tools$httpdPort
+			if (R.Version()$`svn rev` >= 67550) {
+				port <- tools::startDynamicHelp(NA)
+			} else {
+				port <- tools$httpdPort
+			}
 			if (port == 0) port <- startDynamicHelp(TRUE)
 			if (port == 0) stop("Impossible to start the R httpd server")
 			
@@ -817,6 +918,27 @@
 				error.estim.data$Actual, useNA = "no") # remove NAs
 			corr.confusion <<- error.conf / sum(error.conf) *
 				(nrow(data) - sum(corr$OtherGp)) # remove NAs
+				
+			## For cells
+			if ("Nb_cells" %in% names(data)) {
+				error.conf.cell <- xtabs(data$Nb_cells[corr$Step==step] ~
+                    error.estim.data$Actual + error.estim.data$Predicted,
+					exclude = c(NA, NaN))
+				cell.confusion <<- error.conf.cell /
+					sum(error.conf.cell) * (sum(data$Nb_cells) -
+					sum(data$Nb_cells[corr$OtherGp])) # remove NAs
+			}
+
+			## For biovolumes
+			if ("BioWeight" %in% names(data)) {
+				error.conf.bioweight <- xtabs(data$BioWeight[corr$Step==step] ~
+			        error.estim.data$Actual + error.estim.data$Predicted,
+					exclude = c(NA, NaN))
+				bioweight.confusion <<- error.conf.bioweight /
+					sum(error.conf.bioweight) * (sum(data$BioWeight) -
+					sum(data$BioWeight[corr$OtherGp])) # remove NAs
+			}	
+				
 			## Calculate error in valid data and in both suspect and trusted parts
 			error.valid.history[[step + 1]] <<-
 				error.estim.data$Actual != error.estim.data$Predicted 
@@ -841,6 +963,45 @@
 				corr$Actual[notValTrustIdx]) 
 			corr.confusion <<- confSusp.w + confTrustVal + confNotValTrust
 
+			## For cells
+			if ("Nb_cells" %in% names(data)) {
+				nCellSuspTot <- sum(data$Nb_cells[corr$Suspect &
+					!corr$OtherGp])
+				nCellSuspVal <- sum(data$Nb_cells[valSuspIdx])
+				nCellTrustVal <- sum(data$Nb_cells[valTrustIdx])
+				confSuspValCell <- xtabs(data$Nb_cells[valSuspIdx] ~
+			        corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx],
+					exclude = c(NA, NaN))
+				confTrustValCell <- xtabs(data$Nb_cells[valTrustIdx] ~
+			        corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx],
+					exclude = c(NA, NaN))
+				confSuspCell.w <- confSuspValCell / nCellSuspVal * nCellSuspTot
+				confNotValTrustCell <- xtabs(data$Nb_cells[notValTrustIdx] ~
+			        corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx],
+					exclude = c(NA, NaN))
+				cell.confusion <<-
+					confSuspCell.w + confTrustValCell + confNotValTrustCell
+			}
+
+			## For biovolumes
+			if ("BioWeight" %in% names(data)) {
+				nBioSuspTot <- sum(data$BioWeight[corr$Suspect & !corr$OtherGp])
+				nBioSuspVal <- sum(data$BioWeight[valSuspIdx])
+				nBioTrustVal <- sum(data$BioWeight[valTrustIdx])
+				confSuspValBio <- xtabs(data$BioWeight[valSuspIdx] ~
+			        corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx],
+					exclude = c(NA, NaN))
+				confTrustValBio <- xtabs(data$BioWeight[valTrustIdx] ~
+			        corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx],
+					exclude = c(NA, NaN))
+				confSuspBio.w <- confSuspValBio / nBioSuspVal * nBioSuspTot
+				confNotValTrustBio <- xtabs(data$BioWeight[notValTrustIdx] ~
+					corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx],
+					exclude = c(NA, NaN))
+				bioweight.confusion <<-
+					confSuspBio.w + confTrustValBio + confNotValTrustBio
+			}
+			
 			error.valid.history[[step + 1]] <<- testset$Actual != testset$Predicted 
 			if  (nsuspect > 0) {
 				error.suspect.history[[step + 1]] <<-
@@ -855,6 +1016,32 @@
 		}
 	}
 
+	## Compute the corrected coefficients for particles, cells, biovolume
+#   estimateCoeffs <- function () {
+#     ## For particles (colonies)
+#     col.confusion <- table(corr$Predicted[corr$Validated], corr$Actual[corr$Validated], useNA = "no") # remove NAs
+#     corr.coeffs <- ifelse(!colSums(col.confusion), rowSums(col.confusion), 
+#                           rowSums(col.confusion)/colSums(col.confusion))
+#     ## For cells
+#     if ("Nb_cells" %in% names(data)) {
+#       cell.confusion <- xtabs(data$Nb_cells[corr$Validated] ~ 
+#                                 corr$Predicted[corr$Validated] + 
+#                                 corr$Actual[corr$Validated], exclude = c(NA, NaN))
+#       corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(cell.confusion), rowSums(cell.confusion), 
+#                                              rowSums(cell.confusion)/colSums(cell.confusion)))
+#     }
+#
+#     ## For biovolumes
+#     if ("BioWeight" %in% names(data)) {
+#       bioweight.confusion <- xtabs(data$BioWeight[corr$Validated] ~ 
+#                                      corr$Predicted[corr$Validated] + 
+#                                      corr$Actual[corr$Validated], exclude = c(NA, NaN))
+#       corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(bioweight.confusion), rowSums(bioweight.confusion), 
+#                                              rowSums(bioweight.confusion)/colSums(bioweight.confusion)))
+#     }
+#     corr.coeffs
+#   }
+	
 	## Estimate error and abundance
 	## Update Validated, training set and histories
 	correct <- function () {
@@ -872,10 +1059,20 @@
 
 		estimateError()
 		estimateAbundance()
+		#estimateCoeffs()
 		
 		validated.fracs <<- c(validated.fracs, sample.size)
 		correction.history <<- cbind(correction.history,
 			rowSums(corr.confusion))
+		if ("Nb_cells" %in% names(data)) {
+			correctionCell.history <<- cbind(correctionCell.history,
+		        rowSums(cell.confusion))
+		}
+		if ("BioWeight" %in% names(data)) {
+			correctionBio.history <<- cbind(correctionBio.history,
+		        rowSums(bioweight.confusion))
+		}
+		
 		manual.history <<- cbind(manual.history, table(corr$Actual))
 		manual2.history <<- cbind(manual2.history, table(corr$Actual2))
 		setSampleSize() # Set the next subsample size
@@ -999,7 +1196,8 @@
 		print(abd)
 		
 		## Create an object with these results...
-		test <- data.frame(Id = makeId(data), data, Class = corr$Actual)
+		test <- data.frame(Id = makeId(data), data, Class = corr$Actual, Validated = corr$Validated, Suspect = corr$Suspect)
+		#test <- data.frame(Id = makeId(data), data, Class = corr$Actual)
 		attr(test, "path") <- attr(classifier, "path")
 		class(test) <- unique(c("ZI3Test", "ZITest", class(data)))
 		assign(result, test, envir = envir)
@@ -1054,6 +1252,7 @@
 			}
 			error1 <- dissimilarity(abundances, manual.history, na.rm = TRUE) * 100
 			error3 <- dissimilarity(abundances, correction.history, na.rm = TRUE) * 100
+			par(mar = c(5, 4, 4, 4) + 0.1)
 			plot(cumsum(validated.fracs) / nrow(corr) * 100, error1,
 				type = "l", xlab = "Validated fraction (%)",
 				ylab = "Dissimilarity (%)", col = "green", xlim = c(0, 100),
@@ -1074,6 +1273,7 @@
 				validated.fracs[-1]
 			suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1]
 			suspByFrac[1] <- 0
+			par(mar = c(5, 4, 4, 4) + 0.1)
 			plot(fracs * 100, errByFrac * 100, type = "l", xlab = "Validated fraction (%)",
 				ylab = "Suspect and error (%)", xlim = c(0, 100), ylim = c(0, 100), col = "red",
 				main = "Suspects and error at each iteration")
@@ -1083,30 +1283,109 @@
 				col = c("black", "red"), cex = 0.8, lwd = 2)
 		
 		} else { # Should be type == "barplot"
-			fracs <- cumsum(validated.fracs[-1]) / nrow(corr)
+			thresholdDiffDiss <- 5  # Differential dissimilarity <= 5%
+			nbStep <- ceiling(nrow(data) / validated.fracs[-1][1])
 			errByFrac <- sapply(error.valid.history, sum, na.rm = TRUE) /
 				validated.fracs[-1]
 			suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1]
 			#suspByFrac[1] <- 0
 			## case 1 item => projection, case more => another projection...
 			dat <- rbind(suspByFrac * 100, errByFrac * 100)
-			barplot(dat, xlab = "Validated fraction", beside = TRUE,
-				ylab = "Suspect and corrected error (%)", xlim = c(1, (1/fraction + 1)*2),
+			
+			diffDiss <- sapply(2:ncol(correction.history), function (x)
+				dissimilarity(correction.history[, x - 1], correction.history[, x],
+				na.rm = TRUE) * 100
+			)
+			xcoord <-
+				seq(0.7, ceiling(nrow(data) / validated.fracs[-1][1]) * 1.2, by = 1.2)
+			if (step < 1) {
+				suspRemain <- NA
+				stepSD <- round((errByFrac*nsuspect.history -
+					errByFrac*nsuspect.tovalid.history) / 
+                    nsuspect.tovalid.history) + (step+1)
+				idxStepSD <- stepSD
+				coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1]))
+			} else {
+				suspRemain <- c(NA, nsuspect.history[2:(step+1)] -
+					nsuspect.tovalid.history[2:(step+1)])
+				stepSD <- round(suspRemain / nsuspect.tovalid.history) + 1:(step+1)
+				if (length(which(suspRemain == 0)) > 0) {
+					idxStepSD <- which(suspRemain == 0)[1]
+				} else {
+					idxStepSD <- tail(stepSD,1)
+				}
+				coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1]))
+			}
+      		      
+			par(mfrow = c(2, 1), mar = c(4, 4, 1, 4) + 0.1)
+			bp1 <- barplot(suspRemain, #xlab = "Validation step",
+			    ylab = "Nb remaining suspects", xlim = c(0.2,
+				xcoord[ceiling(idxStepSD + (length(xcoord) - idxStepSD) / 3)]),
+			    ylim = c(0, max(suspRemain, diffDiss, na.rm = TRUE)), yaxs = "r",
+                col = "grey10", cex.axis = .7, cex.main = 1, ann = FALSE,
+				yaxt = "n", #main = "Remaining suspects and differential dissimilarity")
+			)
+			title(expression(bold("Remaining suspects") *
+				phantom("\tand\tdifferential dissimilarity")), 
+				col.main = "grey10", cex.main = 1)
+			title(expression(phantom("Remaining suspects\t") * "and" *
+				phantom("\tdifferential dissimilarity")), 
+				col.main = "black", cex.main = 1)
+			title(expression(phantom("Remaining suspects\tand\t") *
+				bold("differential dissimilarity")), 
+				col.main = "blue", cex.main = 1)
+# 			legend("top", legend = c("Remaining suspects","Diff dissimilarity"),
+# 			     fill = c("grey20","blue"), cex = .6, bty = "n", adj = c(0,0))
+			axis(side = 1, at = seq(bp1[1], by = 1.2, length.out = nbStep),
+				labels = 1:nbStep, cex.axis = .7)
+			if (step > 0) axis(side = 2, cex.axis = .7)
+      
+			par(new = TRUE)
+			plot(bp1, diffDiss, type = "o", col = "blue", ylim = c(0, 100), 
+				xlim = c(0.2, xcoord[ceiling(idxStepSD + (length(xcoord) -
+				## TODO: why '+' at the end of next line???
+				idxStepSD) / 3)]), lwd = 3, axes = FALSE, ann = FALSE) +
+			## TODO: why '+' at the end of next line???
+			axis(side = 4, col = "blue", col.axis = "blue", cex.axis = .7) + 
+			mtext("Differential dissimilarity (%)", side = 4, line = 3,
+				col = "blue")
+			abline(v = coordStepSD, lwd = 2, lty = 2, col = "dimgrey")
+			text(x = coordStepSD + .5, y = 90, "SD", srt = -90, pos = 4,
+				cex = 0.6, col = "dimgrey")
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/zooimage -r 253


More information about the Zooimage-commits mailing list