From noreply at r-forge.r-project.org Tue Sep 29 15:22:00 2015 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 29 Sep 2015 15:22:00 +0200 (CEST) Subject: [Zooimage-commits] r253 - in pkg/zooimage: . R inst/etc inst/gui inst/gui/errorcorrection inst/planktonSorter man Message-ID: <20150929132200.74AF31868CC@r-forge.r-project.org> Author: phgrosjean Date: 2015-09-29 15:21:59 +0200 (Tue, 29 Sep 2015) New Revision: 253 Modified: pkg/zooimage/DESCRIPTION pkg/zooimage/NAMESPACE pkg/zooimage/NEWS pkg/zooimage/R/ZIClass.R pkg/zooimage/R/ZIRes.R pkg/zooimage/R/ZITrain.R pkg/zooimage/R/correction.R pkg/zooimage/R/fileutils.R pkg/zooimage/R/gui.R pkg/zooimage/R/guiutils.R pkg/zooimage/R/import.R pkg/zooimage/R/planktonSorter.R pkg/zooimage/R/utilities.R pkg/zooimage/R/zic.R pkg/zooimage/R/zid.R pkg/zooimage/R/zidb.R pkg/zooimage/R/zie.R pkg/zooimage/R/zim.R pkg/zooimage/R/zip.R pkg/zooimage/R/zis.R pkg/zooimage/R/zzz.R pkg/zooimage/inst/etc/ZooImageManual.pdf pkg/zooimage/inst/gui/Menus.txt pkg/zooimage/inst/gui/MenusZIDlgWin.txt pkg/zooimage/inst/gui/ToolbarsZIDlgWin.txt pkg/zooimage/inst/gui/errorcorrection/global.R pkg/zooimage/inst/gui/errorcorrection/server.R pkg/zooimage/inst/gui/errorcorrection/ui.R pkg/zooimage/inst/planktonSorter/planktonSorter.js pkg/zooimage/man/ZITrain.Rd pkg/zooimage/man/correctError.Rd pkg/zooimage/man/gui.Rd pkg/zooimage/man/import.Rd pkg/zooimage/man/utilities.Rd pkg/zooimage/man/zip.Rd pkg/zooimage/man/zooimage.package.Rd Log: Version 5.4-1 with lots of changes preparing version 6.0-0 Modified: pkg/zooimage/DESCRIPTION =================================================================== --- pkg/zooimage/DESCRIPTION 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/DESCRIPTION 2015-09-29 13:21:59 UTC (rev 253) @@ -1,22 +1,25 @@ Package: zooimage Type: Package -Title: Analysis of numerical zooplankton images -Version: 5.1-2 -Date: 2014-12-09 +Title: Analysis of Numerical Zooplankton Images +Version: 5.4-1 +Date: 2015-09-29 Author: Philippe Grosjean [aut, cre], - Kevin Denis [aut] + Kevin Denis [aut], Guillaume Wacquet [aut] Authors at R: c(person("Philippe", "Grosjean", role = c("aut", "cre"), email = "phgrosjean at sciviews.org"), person("Kevin", "Denis", role = "aut", - email = "kevin.denis at umons.ac.be")) + email = "kevin.denis at umons.ac.be"), + person("Guillaume", "Wacquet", role = "aut", + email = "guillaume.wacquet at umons.ac.be")) Maintainer: Philippe Grosjean Depends: R (>= 2.14.0), svMisc (>= 0.9-67), svDialogs (>= 0.9-53), mlearning -Imports: filehash, jpeg, png, tiff, utils, digest, tools, shiny +Imports: filehash, jpeg, png, tiff, utils, digest, tools, MASS, mda, shiny, DT Suggests: rJava, mlbench -Description: ZooImage is a free (open source) solution for analyzing digital +Description: A free (open source) solution for analyzing digital images of zooplankton. In combination with ImageJ, a free image analysis system, it processes digital images, measures individuals, trains for automatic classification of taxa, and finally, measures zooplankton samples - (abundances, total and partial size spectra or biomasses, etc.) + (abundances, total and partial size spectra or biomasses, etc.). License: GPL (>= 2) URL: http://www.sciviews.org/zooimage +NeedsCompilation: no Modified: pkg/zooimage/NAMESPACE =================================================================== --- pkg/zooimage/NAMESPACE 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/NAMESPACE 2015-09-29 13:21:59 UTC (rev 253) @@ -21,7 +21,10 @@ #import(RWeka) import(mlearning) #import(party) -import(shiny) +importFrom(MASS, lda) +importFrom(mda, mda, gen.ridge) +importFrom(shiny, updateSelectInput, shinyServer, reactive, isolate, renderPrint, updateTabsetPanel, renderPlot, shinyUI, fluidPage, fluidRow, sidebarPanel, selectInput, actionButton, mainPanel, tabsetPanel, tabPanel, verbatimTextOutput, plotOutput) +importFrom(DT, dataTableOutput, renderDataTable) # planktonSorter export(correctError) @@ -87,13 +90,22 @@ # ZITrain/ZITest export(prepareTrain) export(addToTrain) +export(compTrain) export(getTrain) export(prepareTest) export(addToTest) export(getTest) export(recode) export(template) +export(contextSelection) +export(addItemsToTrain) +export(dropItemsToTrain) +# ZICell +export(cellCount) +export(cellModel) +export(cellCompute) + # ZIRes export(processSample) export(processSampleAll) @@ -101,7 +113,6 @@ # Utilities export(addClass) export(calcVars) -export(calcVarsVIS) export(calibrate) export(dropVars) export(ecd) @@ -136,9 +147,11 @@ # GUI export(aboutZI) export(acquireImg) +export(activeLearningGUI) export(addVigsToTrain) export(analyzeClass) export(calib) +export(countCellsGUI) export(closeAssistant) export(closeZooImage) export(collectTrain) @@ -152,6 +165,8 @@ export(loadObjects) export(makeClass) export(makeZid) +export(makeZidb) +export(makeZidbFlowCAM) export(makeTrain) export(optInOutDecimalSep) export(processImg) Modified: pkg/zooimage/NEWS =================================================================== --- pkg/zooimage/NEWS 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/NEWS 2015-09-29 13:21:59 UTC (rev 253) @@ -1,5 +1,89 @@ = zooimage News +== Changes in zooimage 5.4-1 + +* Correction of various bugs by Guillaume Wacquet. + + +== Changes in zooimage 5.4-0 + +* Functions to count cells in a particle (colony): countCells() and the + corresponding countCellsGUI() function for an access through the menu. + +* Function to build predictive models for cells in particles (colonies) after + counting: cellModel(). + +* Function to compute the number of cells in particles in a new sample: + cellCompute(). + +* Function to make zidb file for FlowCAM data through the menu: + makeZidbMakeFlowCAM(). + +* Correct makeClass() function in the menu (missing formula). + +* Functions to process active learning: contextSelection(), the associated + addItemsToTrain() and dropItemsToTrain() functions to complete the training + set with validated items, and the corresponding activeLearningGUI() function + for an access through the menu. Integration in the classification process. + +* Function compTrain() to compare two training sets and highlight differences. + +* Translation of the user manual in French and update to include all changes. + +* Adaptation of menus to include also the new features. + + +== Changes in zooimage 5.3-0 + +* calcVarsVIS() now included in calcVars() and not accessible any more as top + function (to avoid duplicate code). + +* ecd() and ecdCell() are now merged in ecd() using arguments cells = 1. + +* errorCorrection() and planktonSorterPage() functions now use the new interface + to get R httpd server port, for R svn rev >= 67550. + +* In the shiny app, dataTableOutput is now called from the DT package. + Dependency to DT is now added. + +* The planktonSorter html page was sometimes too small, with the last group + being wrapped consequently. Solved by increasing the page by 1 pixel * groups. + +* A bug in processSample() prevented for calculation with both 'keep=' and a + data frame for 'biomass=' provided simultaneously. + + +== Changes in zooimage 5.2-0 + +* importFlowCAM() now can deal with color FlowCAM images (but they are first + converted into grayscale because background calibration images are recorded + as grayscale by Visual Spreadsheet -at least, the tested version 3.2.3-, hence + we cannot subtract the background of the vignettes in color mode)! + +* importFlowCAM() now iterates a message to indicate progression of vignettes + importation. + +* For importFlowCAM(), the default value of the argument 'rgb.vigs' is changed + from 'TRUE' to 'FALSE'. + +* Functions to count cells in a particle (colony): cellCount() and the + corresponding cellCountGUI() function for an access through the menu. + +* New utility function ecdCell() to calculate the ECD for one cell in a colony. + +## TODO: +* Functions to build predictive models for cells in particles (colonies) after counting: + cellModel(). + +* Function to compute the number of cells in particles in a new sample: cellCompute(). + + + +* Function to make zidb file for FlowCAM data through the menu: zidbMakeFlowCAMGUI(). + +* Correct makeClass() function in the menu (missing formula). + + == Changes in zooimage 5.1-0 * calcVars()/calcVarsVIS() and dropVars() are reworked to used only FIT_xxx Modified: pkg/zooimage/R/ZIClass.R =================================================================== --- pkg/zooimage/R/ZIClass.R 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/R/ZIClass.R 2015-09-29 13:21:59 UTC (rev 253) @@ -1,4 +1,4 @@ -## Copyright (c) 2004-2012, Ph. Grosjean +## Copyright (c) 2004-2015, Ph. Grosjean ## ## This file is part of ZooImage ## Modified: pkg/zooimage/R/ZIRes.R =================================================================== --- pkg/zooimage/R/ZIRes.R 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/R/ZIRes.R 2015-09-29 13:21:59 UTC (rev 253) @@ -1,4 +1,4 @@ -## Copyright (c) 2004-2012, Ph. Grosjean +## Copyright (c) 2004-2015, Ph. Grosjean ## ## This file is part of ZooImage ## @@ -103,12 +103,33 @@ } ## Calculate abundances, biomasses and size spectra per class in a sample +#processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both", +#header = c("Abd", "Bio"), cells = NULL, biomass = NULL, breaks = NULL) processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both", header = c("Abd", "Bio"), biomass = NULL, breaks = NULL) { ## Fix ECD in case of FIT_VIS data if ("FIT_Area_ABD" %in% names(x)) x$ECD <- ecd(x$FIT_Area_ABD) + ## Do we compute the number of cells and the ECD per cell? + ## But see version hereunder! +#### TODO: compute ECD using number of cells per colonies! +#### if (!is.null(cells)) { +#### x$Nb_cells <- computeNbCells(x, cells) +#### x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells) +#### } +#### PhG: here, computation before argument checking is not good! +#### PhG: cells points to a file. Not good! We ask for a specific object instead + + ## Do we compute the number of cells and the ECD per cell? + ## PhG: should not rely on a filehere! +#### if (!is.null(cells) && file.exists(cells)) { +#### ## Must be a ZICell model here! predict() iterates on all items +#### ## of the list to compute cells for all classes! +#### x$Nb_cells <- predict(cells, x) +#### x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells) +####} + ## Check arguments if (missing(sample)) { sample <- unique(sampleInfo(x$Label, type = "sample", ext = "")) @@ -172,6 +193,7 @@ } x <- x[x$Cl %in% keep, ] # Select keep levels } + Cl <- as.character(x$Cl) if (NROW(x) == 0) { warning("no data left for this sample in 'x' when 'keep' is applied") return(NULL) @@ -211,12 +233,16 @@ x$P2 <- biomass[2] x$P3 <- biomass[3] } else stop("wrong 'biomass', must be NULL, a vector of 3 values or a data frame with Class, P1, P2 and P3") - if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses") - x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil + ## Prefer using ECD_cells and Nb_cells if it exists + if (is.numeric(x$ECD_cells)) { + x$BioWeight <- (x$P1 * x$ECD_cells^x$P3 + x$P2) * x$Dil * x$Nb_cells + } else { + if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses") + x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil + } } ## Split among detail, if provided - Cl <- as.character(x$Cl) if (length(detail)) { # We want more details for one ore more groups... detail <- as.character(detail) @@ -225,9 +251,15 @@ Cl[!Cl %in% detail] <- "[other]" x$Cl <- Cl - res <- tapply(x$Dil, Cl, sum, na.rm = TRUE) - res <- res[c(detail, "[other]")] - res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE)) + if (is.numeric(x$Nb_cells)) { + res <- tapply(x$Dil * x$Nb_cells, Cl, sum, na.rm = TRUE) + res <- res[c(detail, "[other]")] + res <- c(res, '[total]' = sum(x$Dil * x$Nb_cells, na.rm = TRUE)) + } else { + res <- tapply(x$Dil, Cl, sum, na.rm = TRUE) + res <- res[c(detail, "[other]")] + res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE)) + } names(res) <- paste(header[1], names(res)) if (!missing(biomass)) { @@ -239,7 +271,11 @@ } } else { # Total abundance (and biomass) only - res <- sum(x$Dil, na.rm = TRUE) + if (is.numeric(x$Nb_cells)) { + res <- sum(x$Dil * x$Nb_cells, na.rm = TRUE) + } else { + res <- sum(x$Dil, na.rm = TRUE) + } if (!missing(biomass)) res <- c(res, sum(x$BioWeight, na.rm = TRUE)) names(res) <- paste(header, "[total]") @@ -250,7 +286,7 @@ res <- structure(data.frame(Id = sample, t(res), check.names = FALSE), class = c("ZI3Res", "ZIRes", "data.frame")) - ## Do we calculate size spectra? + ## Do we calculate size spectra? (always by colonies, only)! if (length(breaks)) { if (!is.numeric(breaks) || length(breaks) < 2) stop("'breaks' must be a vector of two or more numerics or NULL") Modified: pkg/zooimage/R/ZITrain.R =================================================================== --- pkg/zooimage/R/ZITrain.R 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/R/ZITrain.R 2015-09-29 13:21:59 UTC (rev 253) @@ -1,4 +1,4 @@ -## Copyright (c) 2004-2012, Ph. Grosjean +## Copyright (c) 2004-2015, Ph. Grosjean ## ## This file is part of ZooImage ## @@ -160,7 +160,7 @@ ## Link .zidb database to R objects in memory Zidb <- zidbLink(zidbfiles[i]) AllItems <- ls(Zidb) - Vigns <- AllItems[-grep("_dat1", AllItems)] + Vigns <- AllItems[!grepl("_dat1", AllItems)] ## Extract all vignettes in their class subdirectory imgext <- Zidb[[".ImageType"]] ## Get path for the vignettes and copy them there @@ -244,7 +244,7 @@ ## Link .zidb database to R objects in memory Zidb <- zidbLink(zidbfile) AllItems <- ls(Zidb) - Vigns <- AllItems[-grep("_dat1", AllItems)] + Vigns <- AllItems[!grepl("_dat1", AllItems)] ## Copy all vignettes in the TopPath directory imgext <- Zidb[[".ImageType"]] ## Get path for the vignettes and copy them there Modified: pkg/zooimage/R/correction.R =================================================================== --- pkg/zooimage/R/correction.R 2014-12-09 09:07:58 UTC (rev 252) +++ pkg/zooimage/R/correction.R 2015-09-29 13:21:59 UTC (rev 253) @@ -1,3 +1,20 @@ +## Copyright (c) 2004-2015, Ph. Grosjean +## +## This file is part of ZooImage +## +## ZooImage is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 2 of the License, or +## (at your option) any later version. +## +## ZooImage is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with ZooImage. If not, see . + ## TODO: disable next button when in last fraction ## TODO: save temporary and final results in the zidb file ## TODO: correct bugs with the back button @@ -97,7 +114,6 @@ } } - #### Functions used only by errorCorrection() ################################## ## Calculation of the global error based on validated items ## /!\ NOT based on items randomly selected --> Bad approximation @@ -348,9 +364,9 @@ ## and how to retrieve class then??? ## Group options in a single object too errorCorrection <- function (data, classifier, mode = "validation", -fraction = 0.05, sample.min = 100, grp.min = 2, random.sample = 0.1, -algorithm = "rf", diff.max = 0.2, prop.bio = NULL, -zidb = NULL, testdir = NULL, id = NULL, +fraction = 0.05, sample.min = 100, sample.max = 200, grp.min = 2, +random.sample = 0.1, algorithm = "rf", diff.max = 0.2, prop.bio = NULL, +rare = 0.01, zidb = NULL, testdir = NULL, id = NULL, result = ".last_valid", envir = parent.frame()) { #### Parameters explanations #### Data and classifier @@ -379,6 +395,12 @@ sample.min <- as.integer(sample.min)[1] if (sample.min < 1) stop("sample.min must be a positive integer") + ## sample.max -- maximum number of particles to validate ate each step + sample.max <- as.integer(sample.max)[1] + if (sample.max < 1) + stop("sample.max must be a positive integer") + if (sample.max < sample.min) + stop("sample.max must be higher or equal than sample.min") ## grp.min -- minimum number of particles of each group to validate grp.min <- as.integer(grp.min)[1] if (grp.min < 1 || grp.min > sample.min) @@ -390,13 +412,17 @@ #### Parameters for the detection of suspects ## algorithm -- algorithm used to detect suspect particles - ## diff.max -- maximum toalerated difference in probabilities for class identification + ## diff.max -- maximum tolerated difference in probabilities for class identification diff.max <- as.numeric(diff.max)[1] if (diff.max < 0) stop("diff.max must be a positive number or zero") ## proba.bio -- groups probabilities, using biological information if (length(prop.bio) && (!is.numeric(prop.bio) || is.null(names(prop.bio)))) stop("prop.bio must be a named vector (groups) of numbers") + ## rare -- detection of rare items + rare <- as.numeric(rare)[1] + if (rare < 0 || rare > 0.2) + stop("rare must be between 0 and 0.2") ## zidb -- path to the zidbfile to manually validate ## testdir -- path of the directory used for manual validation @@ -431,7 +457,12 @@ manual.history <- NULL # history of the manual confusion matrix manual2.history <- NULL # history of manual + 2nd ident confusion matrix corr.confusion <- NULL # corrected confusion matrix + classRare <- NULL # String with the list of classes considered as rare + cell.confusion <- NULL # corrected confusion matrix for cells + bioweight.confusion <- NULL # corrected confusion matrix for bioweight correction.history <- NULL # history of the correction confusion matrix + correctionCell.history <- NULL # history of the correction confusion matrix for cells + correctionBio.history <- NULL # history of the correction confusion matrix for bioweight error.estim.data <- NULL # data used to estimate the error error.estim <- NULL # history of the error estimation error.estim.history <- NULL # evolution of the error estimation @@ -523,11 +554,16 @@ } predicted2 <- secondIdent(levels(predicted)) + predTable <- table(predicted) + prop <- predTable / sum(predTable) + classRare <<- names(which(prop < rare)) + ## Creation of corr object corr <<- data.frame(Actual = predicted, Actual2 = predicted2, Predicted = predicted, Predicted2 = predicted2, Validated = FALSE, Error = error, Step = step, Suspect = rep(TRUE, nobs), - RdValidated = rep(Inf, nobs), OtherGp = rep(FALSE, nobs)) + Rare = predicted %in% classRare, RdValidated = rep(Inf, nobs), + OtherGp = rep(FALSE, nobs)) ## Confusion matrix of original classifier train.validated <- attr(classifier, "response") @@ -575,18 +611,23 @@ ## Increment step (should be 0 now, because initial value is -1) step <<- step + 1 + ## Determine the number of vignettes to manually validate setSampleSize() } ## Compute the size of the next subsample: update sample.size setSampleSize <- function () { - sample.size <<- round(min(nrow(corr[!corr$Validated, ]), # How much remains? - ## Or number of items we want to take - max(nrow(data) * fraction, # Items to take - sample.min, # Minimum items we can take - ## TODO: check the following one! - grp.min * length(table(predicted))))) # Minimum from groups + ## Number of items we want to take + sample.size <<- ceiling(nrow(data) * fraction) + ## No less than sample.min + sample.size <<- max(sample.size, sample.min) + ## According to complexity of the training set, take possibly more + sample.size <<- max(sample.size, grp.min * length(levels(predicted))) + ## ... but no more than sample.max + sample.size <<- min(sample.size, sample.max) + ## Or how much remains? + sample.size <<- min(sample.size, nrow(corr[!corr$Validated, ])) } ## Determine the subsample to validate @@ -628,7 +669,7 @@ newstep[newstep == -1] <- step corr$Step[randomsample.ids] <<- newstep notvalid.ids <- ids[!corr$Validated & corr$RdValidated == step] - ## Number of items to valid in order to acheive sample.size + ## Number of items to valid in order to achieve sample.size numSample <- sample.size - length(notvalid.ids) if (numSample > 0) { ## Randomly select suspect items not validated @@ -650,7 +691,63 @@ corr$Step[trustsample.ids] <<- step } } - } + } + + ############### stratified random sampling ############### +# if (numSample > 0) { +# ## Select the same number of suspect items not validated in each class +# suspnotval.ids <- ids[!corr$Validated & corr$Suspect & +# is.infinite(corr$RdValidated) & corr$Step == -1] +# if (length(suspnotval.ids)) { +# splitGp <- split(suspnotval.ids, list(corr[suspnotval.ids,]$Predicted)) +# strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x), +# min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(suspnotval.ids)])))), +# replace = FALSE)]) +# suspsample.ids <- as.numeric(do.call(c, strat.samples)) +# corr$Step[suspsample.ids] <<- step +# numSample <- numSample - length(suspsample.ids) +# } +# +# if (numSample > 0) { +# ## If not completed, randomly select suspects items not validated +# suspnotval.ids <- ids[!corr$Validated & corr$Suspect & +# is.infinite(corr$RdValidated) & corr$Step == -1] +# if (length(suspnotval.ids)) { +# suspsample.ids <- as.numeric(sample(suspnotval.ids, +# size = min(numSample, length(suspnotval.ids)))) +# corr$Step[suspsample.ids] <<- step +# numSample <- numSample - length(suspsample.ids) +# } +# } +# +# if (numSample > 0) { +# ## If not completed, Select the same number of trusted items not validated in each class +# trustnotval.ids <- ids[!corr$Validated & !corr$Suspect & +# is.infinite(corr$RdValidated) & corr$Step == -1] +# if (length(trustnotval.ids)) { +# splitGp <- split(trustnotval.ids, list(corr[trustnotval.ids,]$Predicted)) +# strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x), +# min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(trustnotval.ids)])))), +# replace = FALSE)]) +# trustsample.ids <- as.numeric(do.call(c, strat.samples)) +# corr$Step[trustsample.ids] <<- step +# numSample <- numSample - length(trustsample.ids) +# } +# } +# +# if (numSample > 0) { +# ## If not completed, randomly select trusted items not validated +# trustnotval.ids <- ids[!corr$Validated & !corr$Suspect & +# is.infinite(corr$RdValidated) & corr$Step == -1] +# if (length(trustnotval.ids)) { +# trustsample.ids <- as.numeric(sample(trustnotval.ids, +# size = min(numSample, length(trustnotval.ids)))) +# corr$Step[trustsample.ids] <<- step +# numSample <- numSample - length(trustsample.ids) +# } +# } + ############### ############### ############### + nsuspect.tovalid <- length(ids[corr$Step == step & corr$Suspect]) ntrusted.tovalid <- length(ids[corr$Step == step & !corr$Suspect]) nsuspect.history <<- c(nsuspect.history, nsuspect) @@ -662,7 +759,11 @@ if (mode != "stat") { ## Make sure the R Httpd server is started tools <- getNamespace("tools") - port <- tools$httpdPort + if (R.Version()$`svn rev` >= 67550) { + port <- tools::startDynamicHelp(NA) + } else { + port <- tools$httpdPort + } if (port == 0) port <- startDynamicHelp(TRUE) if (port == 0) stop("Impossible to start the R httpd server") @@ -817,6 +918,27 @@ error.estim.data$Actual, useNA = "no") # remove NAs corr.confusion <<- error.conf / sum(error.conf) * (nrow(data) - sum(corr$OtherGp)) # remove NAs + + ## For cells + if ("Nb_cells" %in% names(data)) { + error.conf.cell <- xtabs(data$Nb_cells[corr$Step==step] ~ + error.estim.data$Actual + error.estim.data$Predicted, + exclude = c(NA, NaN)) + cell.confusion <<- error.conf.cell / + sum(error.conf.cell) * (sum(data$Nb_cells) - + sum(data$Nb_cells[corr$OtherGp])) # remove NAs + } + + ## For biovolumes + if ("BioWeight" %in% names(data)) { + error.conf.bioweight <- xtabs(data$BioWeight[corr$Step==step] ~ + error.estim.data$Actual + error.estim.data$Predicted, + exclude = c(NA, NaN)) + bioweight.confusion <<- error.conf.bioweight / + sum(error.conf.bioweight) * (sum(data$BioWeight) - + sum(data$BioWeight[corr$OtherGp])) # remove NAs + } + ## Calculate error in valid data and in both suspect and trusted parts error.valid.history[[step + 1]] <<- error.estim.data$Actual != error.estim.data$Predicted @@ -841,6 +963,45 @@ corr$Actual[notValTrustIdx]) corr.confusion <<- confSusp.w + confTrustVal + confNotValTrust + ## For cells + if ("Nb_cells" %in% names(data)) { + nCellSuspTot <- sum(data$Nb_cells[corr$Suspect & + !corr$OtherGp]) + nCellSuspVal <- sum(data$Nb_cells[valSuspIdx]) + nCellTrustVal <- sum(data$Nb_cells[valTrustIdx]) + confSuspValCell <- xtabs(data$Nb_cells[valSuspIdx] ~ + corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx], + exclude = c(NA, NaN)) + confTrustValCell <- xtabs(data$Nb_cells[valTrustIdx] ~ + corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx], + exclude = c(NA, NaN)) + confSuspCell.w <- confSuspValCell / nCellSuspVal * nCellSuspTot + confNotValTrustCell <- xtabs(data$Nb_cells[notValTrustIdx] ~ + corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx], + exclude = c(NA, NaN)) + cell.confusion <<- + confSuspCell.w + confTrustValCell + confNotValTrustCell + } + + ## For biovolumes + if ("BioWeight" %in% names(data)) { + nBioSuspTot <- sum(data$BioWeight[corr$Suspect & !corr$OtherGp]) + nBioSuspVal <- sum(data$BioWeight[valSuspIdx]) + nBioTrustVal <- sum(data$BioWeight[valTrustIdx]) + confSuspValBio <- xtabs(data$BioWeight[valSuspIdx] ~ + corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx], + exclude = c(NA, NaN)) + confTrustValBio <- xtabs(data$BioWeight[valTrustIdx] ~ + corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx], + exclude = c(NA, NaN)) + confSuspBio.w <- confSuspValBio / nBioSuspVal * nBioSuspTot + confNotValTrustBio <- xtabs(data$BioWeight[notValTrustIdx] ~ + corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx], + exclude = c(NA, NaN)) + bioweight.confusion <<- + confSuspBio.w + confTrustValBio + confNotValTrustBio + } + error.valid.history[[step + 1]] <<- testset$Actual != testset$Predicted if (nsuspect > 0) { error.suspect.history[[step + 1]] <<- @@ -855,6 +1016,32 @@ } } + ## Compute the corrected coefficients for particles, cells, biovolume +# estimateCoeffs <- function () { +# ## For particles (colonies) +# col.confusion <- table(corr$Predicted[corr$Validated], corr$Actual[corr$Validated], useNA = "no") # remove NAs +# corr.coeffs <- ifelse(!colSums(col.confusion), rowSums(col.confusion), +# rowSums(col.confusion)/colSums(col.confusion)) +# ## For cells +# if ("Nb_cells" %in% names(data)) { +# cell.confusion <- xtabs(data$Nb_cells[corr$Validated] ~ +# corr$Predicted[corr$Validated] + +# corr$Actual[corr$Validated], exclude = c(NA, NaN)) +# corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(cell.confusion), rowSums(cell.confusion), +# rowSums(cell.confusion)/colSums(cell.confusion))) +# } +# +# ## For biovolumes +# if ("BioWeight" %in% names(data)) { +# bioweight.confusion <- xtabs(data$BioWeight[corr$Validated] ~ +# corr$Predicted[corr$Validated] + +# corr$Actual[corr$Validated], exclude = c(NA, NaN)) +# corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(bioweight.confusion), rowSums(bioweight.confusion), +# rowSums(bioweight.confusion)/colSums(bioweight.confusion))) +# } +# corr.coeffs +# } + ## Estimate error and abundance ## Update Validated, training set and histories correct <- function () { @@ -872,10 +1059,20 @@ estimateError() estimateAbundance() + #estimateCoeffs() validated.fracs <<- c(validated.fracs, sample.size) correction.history <<- cbind(correction.history, rowSums(corr.confusion)) + if ("Nb_cells" %in% names(data)) { + correctionCell.history <<- cbind(correctionCell.history, + rowSums(cell.confusion)) + } + if ("BioWeight" %in% names(data)) { + correctionBio.history <<- cbind(correctionBio.history, + rowSums(bioweight.confusion)) + } + manual.history <<- cbind(manual.history, table(corr$Actual)) manual2.history <<- cbind(manual2.history, table(corr$Actual2)) setSampleSize() # Set the next subsample size @@ -999,7 +1196,8 @@ print(abd) ## Create an object with these results... - test <- data.frame(Id = makeId(data), data, Class = corr$Actual) + test <- data.frame(Id = makeId(data), data, Class = corr$Actual, Validated = corr$Validated, Suspect = corr$Suspect) + #test <- data.frame(Id = makeId(data), data, Class = corr$Actual) attr(test, "path") <- attr(classifier, "path") class(test) <- unique(c("ZI3Test", "ZITest", class(data))) assign(result, test, envir = envir) @@ -1054,6 +1252,7 @@ } error1 <- dissimilarity(abundances, manual.history, na.rm = TRUE) * 100 error3 <- dissimilarity(abundances, correction.history, na.rm = TRUE) * 100 + par(mar = c(5, 4, 4, 4) + 0.1) plot(cumsum(validated.fracs) / nrow(corr) * 100, error1, type = "l", xlab = "Validated fraction (%)", ylab = "Dissimilarity (%)", col = "green", xlim = c(0, 100), @@ -1074,6 +1273,7 @@ validated.fracs[-1] suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1] suspByFrac[1] <- 0 + par(mar = c(5, 4, 4, 4) + 0.1) plot(fracs * 100, errByFrac * 100, type = "l", xlab = "Validated fraction (%)", ylab = "Suspect and error (%)", xlim = c(0, 100), ylim = c(0, 100), col = "red", main = "Suspects and error at each iteration") @@ -1083,30 +1283,109 @@ col = c("black", "red"), cex = 0.8, lwd = 2) } else { # Should be type == "barplot" - fracs <- cumsum(validated.fracs[-1]) / nrow(corr) + thresholdDiffDiss <- 5 # Differential dissimilarity <= 5% + nbStep <- ceiling(nrow(data) / validated.fracs[-1][1]) errByFrac <- sapply(error.valid.history, sum, na.rm = TRUE) / validated.fracs[-1] suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1] #suspByFrac[1] <- 0 ## case 1 item => projection, case more => another projection... dat <- rbind(suspByFrac * 100, errByFrac * 100) - barplot(dat, xlab = "Validated fraction", beside = TRUE, - ylab = "Suspect and corrected error (%)", xlim = c(1, (1/fraction + 1)*2), + + diffDiss <- sapply(2:ncol(correction.history), function (x) + dissimilarity(correction.history[, x - 1], correction.history[, x], + na.rm = TRUE) * 100 + ) + xcoord <- + seq(0.7, ceiling(nrow(data) / validated.fracs[-1][1]) * 1.2, by = 1.2) + if (step < 1) { + suspRemain <- NA + stepSD <- round((errByFrac*nsuspect.history - + errByFrac*nsuspect.tovalid.history) / + nsuspect.tovalid.history) + (step+1) + idxStepSD <- stepSD + coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1])) + } else { + suspRemain <- c(NA, nsuspect.history[2:(step+1)] - + nsuspect.tovalid.history[2:(step+1)]) + stepSD <- round(suspRemain / nsuspect.tovalid.history) + 1:(step+1) + if (length(which(suspRemain == 0)) > 0) { + idxStepSD <- which(suspRemain == 0)[1] + } else { + idxStepSD <- tail(stepSD,1) + } + coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1])) + } + + par(mfrow = c(2, 1), mar = c(4, 4, 1, 4) + 0.1) + bp1 <- barplot(suspRemain, #xlab = "Validation step", + ylab = "Nb remaining suspects", xlim = c(0.2, + xcoord[ceiling(idxStepSD + (length(xcoord) - idxStepSD) / 3)]), + ylim = c(0, max(suspRemain, diffDiss, na.rm = TRUE)), yaxs = "r", + col = "grey10", cex.axis = .7, cex.main = 1, ann = FALSE, + yaxt = "n", #main = "Remaining suspects and differential dissimilarity") + ) + title(expression(bold("Remaining suspects") * + phantom("\tand\tdifferential dissimilarity")), + col.main = "grey10", cex.main = 1) + title(expression(phantom("Remaining suspects\t") * "and" * + phantom("\tdifferential dissimilarity")), + col.main = "black", cex.main = 1) + title(expression(phantom("Remaining suspects\tand\t") * + bold("differential dissimilarity")), + col.main = "blue", cex.main = 1) +# legend("top", legend = c("Remaining suspects","Diff dissimilarity"), +# fill = c("grey20","blue"), cex = .6, bty = "n", adj = c(0,0)) + axis(side = 1, at = seq(bp1[1], by = 1.2, length.out = nbStep), + labels = 1:nbStep, cex.axis = .7) + if (step > 0) axis(side = 2, cex.axis = .7) + + par(new = TRUE) + plot(bp1, diffDiss, type = "o", col = "blue", ylim = c(0, 100), + xlim = c(0.2, xcoord[ceiling(idxStepSD + (length(xcoord) - + ## TODO: why '+' at the end of next line??? + idxStepSD) / 3)]), lwd = 3, axes = FALSE, ann = FALSE) + + ## TODO: why '+' at the end of next line??? + axis(side = 4, col = "blue", col.axis = "blue", cex.axis = .7) + + mtext("Differential dissimilarity (%)", side = 4, line = 3, + col = "blue") + abline(v = coordStepSD, lwd = 2, lty = 2, col = "dimgrey") + text(x = coordStepSD + .5, y = 90, "SD", srt = -90, pos = 4, + cex = 0.6, col = "dimgrey") [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/zooimage -r 253