[Zooimage-commits] r253 - in pkg/zooimage: . R inst/etc inst/gui inst/gui/errorcorrection inst/planktonSorter man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Sep 29 15:22:00 CEST 2015
Author: phgrosjean
Date: 2015-09-29 15:21:59 +0200 (Tue, 29 Sep 2015)
New Revision: 253
Modified:
pkg/zooimage/DESCRIPTION
pkg/zooimage/NAMESPACE
pkg/zooimage/NEWS
pkg/zooimage/R/ZIClass.R
pkg/zooimage/R/ZIRes.R
pkg/zooimage/R/ZITrain.R
pkg/zooimage/R/correction.R
pkg/zooimage/R/fileutils.R
pkg/zooimage/R/gui.R
pkg/zooimage/R/guiutils.R
pkg/zooimage/R/import.R
pkg/zooimage/R/planktonSorter.R
pkg/zooimage/R/utilities.R
pkg/zooimage/R/zic.R
pkg/zooimage/R/zid.R
pkg/zooimage/R/zidb.R
pkg/zooimage/R/zie.R
pkg/zooimage/R/zim.R
pkg/zooimage/R/zip.R
pkg/zooimage/R/zis.R
pkg/zooimage/R/zzz.R
pkg/zooimage/inst/etc/ZooImageManual.pdf
pkg/zooimage/inst/gui/Menus.txt
pkg/zooimage/inst/gui/MenusZIDlgWin.txt
pkg/zooimage/inst/gui/ToolbarsZIDlgWin.txt
pkg/zooimage/inst/gui/errorcorrection/global.R
pkg/zooimage/inst/gui/errorcorrection/server.R
pkg/zooimage/inst/gui/errorcorrection/ui.R
pkg/zooimage/inst/planktonSorter/planktonSorter.js
pkg/zooimage/man/ZITrain.Rd
pkg/zooimage/man/correctError.Rd
pkg/zooimage/man/gui.Rd
pkg/zooimage/man/import.Rd
pkg/zooimage/man/utilities.Rd
pkg/zooimage/man/zip.Rd
pkg/zooimage/man/zooimage.package.Rd
Log:
Version 5.4-1 with lots of changes preparing version 6.0-0
Modified: pkg/zooimage/DESCRIPTION
===================================================================
--- pkg/zooimage/DESCRIPTION 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/DESCRIPTION 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,22 +1,25 @@
Package: zooimage
Type: Package
-Title: Analysis of numerical zooplankton images
-Version: 5.1-2
-Date: 2014-12-09
+Title: Analysis of Numerical Zooplankton Images
+Version: 5.4-1
+Date: 2015-09-29
Author: Philippe Grosjean [aut, cre],
- Kevin Denis [aut]
+ Kevin Denis [aut], Guillaume Wacquet [aut]
Authors at R: c(person("Philippe", "Grosjean", role = c("aut", "cre"),
email = "phgrosjean at sciviews.org"),
person("Kevin", "Denis", role = "aut",
- email = "kevin.denis at umons.ac.be"))
+ email = "kevin.denis at umons.ac.be"),
+ person("Guillaume", "Wacquet", role = "aut",
+ email = "guillaume.wacquet at umons.ac.be"))
Maintainer: Philippe Grosjean <phgrosjean at sciviews.org>
Depends: R (>= 2.14.0), svMisc (>= 0.9-67), svDialogs (>= 0.9-53), mlearning
-Imports: filehash, jpeg, png, tiff, utils, digest, tools, shiny
+Imports: filehash, jpeg, png, tiff, utils, digest, tools, MASS, mda, shiny, DT
Suggests: rJava, mlbench
-Description: ZooImage is a free (open source) solution for analyzing digital
+Description: A free (open source) solution for analyzing digital
images of zooplankton. In combination with ImageJ, a free image analysis
system, it processes digital images, measures individuals, trains for
automatic classification of taxa, and finally, measures zooplankton samples
- (abundances, total and partial size spectra or biomasses, etc.)
+ (abundances, total and partial size spectra or biomasses, etc.).
License: GPL (>= 2)
URL: http://www.sciviews.org/zooimage
+NeedsCompilation: no
Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/NAMESPACE 2015-09-29 13:21:59 UTC (rev 253)
@@ -21,7 +21,10 @@
#import(RWeka)
import(mlearning)
#import(party)
-import(shiny)
+importFrom(MASS, lda)
+importFrom(mda, mda, gen.ridge)
+importFrom(shiny, updateSelectInput, shinyServer, reactive, isolate, renderPrint, updateTabsetPanel, renderPlot, shinyUI, fluidPage, fluidRow, sidebarPanel, selectInput, actionButton, mainPanel, tabsetPanel, tabPanel, verbatimTextOutput, plotOutput)
+importFrom(DT, dataTableOutput, renderDataTable)
# planktonSorter
export(correctError)
@@ -87,13 +90,22 @@
# ZITrain/ZITest
export(prepareTrain)
export(addToTrain)
+export(compTrain)
export(getTrain)
export(prepareTest)
export(addToTest)
export(getTest)
export(recode)
export(template)
+export(contextSelection)
+export(addItemsToTrain)
+export(dropItemsToTrain)
+# ZICell
+export(cellCount)
+export(cellModel)
+export(cellCompute)
+
# ZIRes
export(processSample)
export(processSampleAll)
@@ -101,7 +113,6 @@
# Utilities
export(addClass)
export(calcVars)
-export(calcVarsVIS)
export(calibrate)
export(dropVars)
export(ecd)
@@ -136,9 +147,11 @@
# GUI
export(aboutZI)
export(acquireImg)
+export(activeLearningGUI)
export(addVigsToTrain)
export(analyzeClass)
export(calib)
+export(countCellsGUI)
export(closeAssistant)
export(closeZooImage)
export(collectTrain)
@@ -152,6 +165,8 @@
export(loadObjects)
export(makeClass)
export(makeZid)
+export(makeZidb)
+export(makeZidbFlowCAM)
export(makeTrain)
export(optInOutDecimalSep)
export(processImg)
Modified: pkg/zooimage/NEWS
===================================================================
--- pkg/zooimage/NEWS 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/NEWS 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,5 +1,89 @@
= zooimage News
+== Changes in zooimage 5.4-1
+
+* Correction of various bugs by Guillaume Wacquet.
+
+
+== Changes in zooimage 5.4-0
+
+* Functions to count cells in a particle (colony): countCells() and the
+ corresponding countCellsGUI() function for an access through the menu.
+
+* Function to build predictive models for cells in particles (colonies) after
+ counting: cellModel().
+
+* Function to compute the number of cells in particles in a new sample:
+ cellCompute().
+
+* Function to make zidb file for FlowCAM data through the menu:
+ makeZidbMakeFlowCAM().
+
+* Correct makeClass() function in the menu (missing formula).
+
+* Functions to process active learning: contextSelection(), the associated
+ addItemsToTrain() and dropItemsToTrain() functions to complete the training
+ set with validated items, and the corresponding activeLearningGUI() function
+ for an access through the menu. Integration in the classification process.
+
+* Function compTrain() to compare two training sets and highlight differences.
+
+* Translation of the user manual in French and update to include all changes.
+
+* Adaptation of menus to include also the new features.
+
+
+== Changes in zooimage 5.3-0
+
+* calcVarsVIS() now included in calcVars() and not accessible any more as top
+ function (to avoid duplicate code).
+
+* ecd() and ecdCell() are now merged in ecd() using arguments cells = 1.
+
+* errorCorrection() and planktonSorterPage() functions now use the new interface
+ to get R httpd server port, for R svn rev >= 67550.
+
+* In the shiny app, dataTableOutput is now called from the DT package.
+ Dependency to DT is now added.
+
+* The planktonSorter html page was sometimes too small, with the last group
+ being wrapped consequently. Solved by increasing the page by 1 pixel * groups.
+
+* A bug in processSample() prevented for calculation with both 'keep=' and a
+ data frame for 'biomass=' provided simultaneously.
+
+
+== Changes in zooimage 5.2-0
+
+* importFlowCAM() now can deal with color FlowCAM images (but they are first
+ converted into grayscale because background calibration images are recorded
+ as grayscale by Visual Spreadsheet -at least, the tested version 3.2.3-, hence
+ we cannot subtract the background of the vignettes in color mode)!
+
+* importFlowCAM() now iterates a message to indicate progression of vignettes
+ importation.
+
+* For importFlowCAM(), the default value of the argument 'rgb.vigs' is changed
+ from 'TRUE' to 'FALSE'.
+
+* Functions to count cells in a particle (colony): cellCount() and the
+ corresponding cellCountGUI() function for an access through the menu.
+
+* New utility function ecdCell() to calculate the ECD for one cell in a colony.
+
+## TODO:
+* Functions to build predictive models for cells in particles (colonies) after counting:
+ cellModel().
+
+* Function to compute the number of cells in particles in a new sample: cellCompute().
+
+
+
+* Function to make zidb file for FlowCAM data through the menu: zidbMakeFlowCAMGUI().
+
+* Correct makeClass() function in the menu (missing formula).
+
+
== Changes in zooimage 5.1-0
* calcVars()/calcVarsVIS() and dropVars() are reworked to used only FIT_xxx
Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZIClass.R 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
##
## This file is part of ZooImage
##
Modified: pkg/zooimage/R/ZIRes.R
===================================================================
--- pkg/zooimage/R/ZIRes.R 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZIRes.R 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
##
## This file is part of ZooImage
##
@@ -103,12 +103,33 @@
}
## Calculate abundances, biomasses and size spectra per class in a sample
+#processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both",
+#header = c("Abd", "Bio"), cells = NULL, biomass = NULL, breaks = NULL)
processSample <- function (x, sample, keep = NULL, detail = NULL, classes = "both",
header = c("Abd", "Bio"), biomass = NULL, breaks = NULL)
{
## Fix ECD in case of FIT_VIS data
if ("FIT_Area_ABD" %in% names(x)) x$ECD <- ecd(x$FIT_Area_ABD)
+ ## Do we compute the number of cells and the ECD per cell?
+ ## But see version hereunder!
+#### TODO: compute ECD using number of cells per colonies!
+#### if (!is.null(cells)) {
+#### x$Nb_cells <- computeNbCells(x, cells)
+#### x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells)
+#### }
+#### PhG: here, computation before argument checking is not good!
+#### PhG: cells points to a file. Not good! We ask for a specific object instead
+
+ ## Do we compute the number of cells and the ECD per cell?
+ ## PhG: should not rely on a filehere!
+#### if (!is.null(cells) && file.exists(cells)) {
+#### ## Must be a ZICell model here! predict() iterates on all items
+#### ## of the list to compute cells for all classes!
+#### x$Nb_cells <- predict(cells, x)
+#### x$ECD_cells <- ecd(x$FIT_Area_ABD, x$Nb_cells)
+####}
+
## Check arguments
if (missing(sample)) {
sample <- unique(sampleInfo(x$Label, type = "sample", ext = ""))
@@ -172,6 +193,7 @@
}
x <- x[x$Cl %in% keep, ] # Select keep levels
}
+ Cl <- as.character(x$Cl)
if (NROW(x) == 0) {
warning("no data left for this sample in 'x' when 'keep' is applied")
return(NULL)
@@ -211,12 +233,16 @@
x$P2 <- biomass[2]
x$P3 <- biomass[3]
} else stop("wrong 'biomass', must be NULL, a vector of 3 values or a data frame with Class, P1, P2 and P3")
- if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses")
- x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil
+ ## Prefer using ECD_cells and Nb_cells if it exists
+ if (is.numeric(x$ECD_cells)) {
+ x$BioWeight <- (x$P1 * x$ECD_cells^x$P3 + x$P2) * x$Dil * x$Nb_cells
+ } else {
+ if (!is.numeric(x$ECD)) stop("'ECD' required for biomasses")
+ x$BioWeight <- (x$P1 * x$ECD^x$P3 + x$P2) * x$Dil
+ }
}
## Split among detail, if provided
- Cl <- as.character(x$Cl)
if (length(detail)) {
# We want more details for one ore more groups...
detail <- as.character(detail)
@@ -225,9 +251,15 @@
Cl[!Cl %in% detail] <- "[other]"
x$Cl <- Cl
- res <- tapply(x$Dil, Cl, sum, na.rm = TRUE)
- res <- res[c(detail, "[other]")]
- res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE))
+ if (is.numeric(x$Nb_cells)) {
+ res <- tapply(x$Dil * x$Nb_cells, Cl, sum, na.rm = TRUE)
+ res <- res[c(detail, "[other]")]
+ res <- c(res, '[total]' = sum(x$Dil * x$Nb_cells, na.rm = TRUE))
+ } else {
+ res <- tapply(x$Dil, Cl, sum, na.rm = TRUE)
+ res <- res[c(detail, "[other]")]
+ res <- c(res, '[total]' = sum(x$Dil, na.rm = TRUE))
+ }
names(res) <- paste(header[1], names(res))
if (!missing(biomass)) {
@@ -239,7 +271,11 @@
}
} else { # Total abundance (and biomass) only
- res <- sum(x$Dil, na.rm = TRUE)
+ if (is.numeric(x$Nb_cells)) {
+ res <- sum(x$Dil * x$Nb_cells, na.rm = TRUE)
+ } else {
+ res <- sum(x$Dil, na.rm = TRUE)
+ }
if (!missing(biomass))
res <- c(res, sum(x$BioWeight, na.rm = TRUE))
names(res) <- paste(header, "[total]")
@@ -250,7 +286,7 @@
res <- structure(data.frame(Id = sample, t(res), check.names = FALSE),
class = c("ZI3Res", "ZIRes", "data.frame"))
- ## Do we calculate size spectra?
+ ## Do we calculate size spectra? (always by colonies, only)!
if (length(breaks)) {
if (!is.numeric(breaks) || length(breaks) < 2)
stop("'breaks' must be a vector of two or more numerics or NULL")
Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/ZITrain.R 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,4 +1,4 @@
-## Copyright (c) 2004-2012, Ph. Grosjean <phgrosjean at sciviews.org>
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
##
## This file is part of ZooImage
##
@@ -160,7 +160,7 @@
## Link .zidb database to R objects in memory
Zidb <- zidbLink(zidbfiles[i])
AllItems <- ls(Zidb)
- Vigns <- AllItems[-grep("_dat1", AllItems)]
+ Vigns <- AllItems[!grepl("_dat1", AllItems)]
## Extract all vignettes in their class subdirectory
imgext <- Zidb[[".ImageType"]]
## Get path for the vignettes and copy them there
@@ -244,7 +244,7 @@
## Link .zidb database to R objects in memory
Zidb <- zidbLink(zidbfile)
AllItems <- ls(Zidb)
- Vigns <- AllItems[-grep("_dat1", AllItems)]
+ Vigns <- AllItems[!grepl("_dat1", AllItems)]
## Copy all vignettes in the TopPath directory
imgext <- Zidb[[".ImageType"]]
## Get path for the vignettes and copy them there
Modified: pkg/zooimage/R/correction.R
===================================================================
--- pkg/zooimage/R/correction.R 2014-12-09 09:07:58 UTC (rev 252)
+++ pkg/zooimage/R/correction.R 2015-09-29 13:21:59 UTC (rev 253)
@@ -1,3 +1,20 @@
+## Copyright (c) 2004-2015, Ph. Grosjean <phgrosjean at sciviews.org>
+##
+## This file is part of ZooImage
+##
+## ZooImage is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 2 of the License, or
+## (at your option) any later version.
+##
+## ZooImage is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
+
## TODO: disable next button when in last fraction
## TODO: save temporary and final results in the zidb file
## TODO: correct bugs with the back button
@@ -97,7 +114,6 @@
}
}
-
#### Functions used only by errorCorrection() ##################################
## Calculation of the global error based on validated items
## /!\ NOT based on items randomly selected --> Bad approximation
@@ -348,9 +364,9 @@
## and how to retrieve class then???
## Group options in a single object too
errorCorrection <- function (data, classifier, mode = "validation",
-fraction = 0.05, sample.min = 100, grp.min = 2, random.sample = 0.1,
-algorithm = "rf", diff.max = 0.2, prop.bio = NULL,
-zidb = NULL, testdir = NULL, id = NULL,
+fraction = 0.05, sample.min = 100, sample.max = 200, grp.min = 2,
+random.sample = 0.1, algorithm = "rf", diff.max = 0.2, prop.bio = NULL,
+rare = 0.01, zidb = NULL, testdir = NULL, id = NULL,
result = ".last_valid", envir = parent.frame()) {
#### Parameters explanations
#### Data and classifier
@@ -379,6 +395,12 @@
sample.min <- as.integer(sample.min)[1]
if (sample.min < 1)
stop("sample.min must be a positive integer")
+ ## sample.max -- maximum number of particles to validate ate each step
+ sample.max <- as.integer(sample.max)[1]
+ if (sample.max < 1)
+ stop("sample.max must be a positive integer")
+ if (sample.max < sample.min)
+ stop("sample.max must be higher or equal than sample.min")
## grp.min -- minimum number of particles of each group to validate
grp.min <- as.integer(grp.min)[1]
if (grp.min < 1 || grp.min > sample.min)
@@ -390,13 +412,17 @@
#### Parameters for the detection of suspects
## algorithm -- algorithm used to detect suspect particles
- ## diff.max -- maximum toalerated difference in probabilities for class identification
+ ## diff.max -- maximum tolerated difference in probabilities for class identification
diff.max <- as.numeric(diff.max)[1]
if (diff.max < 0)
stop("diff.max must be a positive number or zero")
## proba.bio -- groups probabilities, using biological information
if (length(prop.bio) && (!is.numeric(prop.bio) || is.null(names(prop.bio))))
stop("prop.bio must be a named vector (groups) of numbers")
+ ## rare -- detection of rare items
+ rare <- as.numeric(rare)[1]
+ if (rare < 0 || rare > 0.2)
+ stop("rare must be between 0 and 0.2")
## zidb -- path to the zidbfile to manually validate
## testdir -- path of the directory used for manual validation
@@ -431,7 +457,12 @@
manual.history <- NULL # history of the manual confusion matrix
manual2.history <- NULL # history of manual + 2nd ident confusion matrix
corr.confusion <- NULL # corrected confusion matrix
+ classRare <- NULL # String with the list of classes considered as rare
+ cell.confusion <- NULL # corrected confusion matrix for cells
+ bioweight.confusion <- NULL # corrected confusion matrix for bioweight
correction.history <- NULL # history of the correction confusion matrix
+ correctionCell.history <- NULL # history of the correction confusion matrix for cells
+ correctionBio.history <- NULL # history of the correction confusion matrix for bioweight
error.estim.data <- NULL # data used to estimate the error
error.estim <- NULL # history of the error estimation
error.estim.history <- NULL # evolution of the error estimation
@@ -523,11 +554,16 @@
}
predicted2 <- secondIdent(levels(predicted))
+ predTable <- table(predicted)
+ prop <- predTable / sum(predTable)
+ classRare <<- names(which(prop < rare))
+
## Creation of corr object
corr <<- data.frame(Actual = predicted, Actual2 = predicted2,
Predicted = predicted, Predicted2 = predicted2, Validated = FALSE,
Error = error, Step = step, Suspect = rep(TRUE, nobs),
- RdValidated = rep(Inf, nobs), OtherGp = rep(FALSE, nobs))
+ Rare = predicted %in% classRare, RdValidated = rep(Inf, nobs),
+ OtherGp = rep(FALSE, nobs))
## Confusion matrix of original classifier
train.validated <- attr(classifier, "response")
@@ -575,18 +611,23 @@
## Increment step (should be 0 now, because initial value is -1)
step <<- step + 1
+
## Determine the number of vignettes to manually validate
setSampleSize()
}
## Compute the size of the next subsample: update sample.size
setSampleSize <- function () {
- sample.size <<- round(min(nrow(corr[!corr$Validated, ]), # How much remains?
- ## Or number of items we want to take
- max(nrow(data) * fraction, # Items to take
- sample.min, # Minimum items we can take
- ## TODO: check the following one!
- grp.min * length(table(predicted))))) # Minimum from groups
+ ## Number of items we want to take
+ sample.size <<- ceiling(nrow(data) * fraction)
+ ## No less than sample.min
+ sample.size <<- max(sample.size, sample.min)
+ ## According to complexity of the training set, take possibly more
+ sample.size <<- max(sample.size, grp.min * length(levels(predicted)))
+ ## ... but no more than sample.max
+ sample.size <<- min(sample.size, sample.max)
+ ## Or how much remains?
+ sample.size <<- min(sample.size, nrow(corr[!corr$Validated, ]))
}
## Determine the subsample to validate
@@ -628,7 +669,7 @@
newstep[newstep == -1] <- step
corr$Step[randomsample.ids] <<- newstep
notvalid.ids <- ids[!corr$Validated & corr$RdValidated == step]
- ## Number of items to valid in order to acheive sample.size
+ ## Number of items to valid in order to achieve sample.size
numSample <- sample.size - length(notvalid.ids)
if (numSample > 0) {
## Randomly select suspect items not validated
@@ -650,7 +691,63 @@
corr$Step[trustsample.ids] <<- step
}
}
- }
+ }
+
+ ############### stratified random sampling ###############
+# if (numSample > 0) {
+# ## Select the same number of suspect items not validated in each class
+# suspnotval.ids <- ids[!corr$Validated & corr$Suspect &
+# is.infinite(corr$RdValidated) & corr$Step == -1]
+# if (length(suspnotval.ids)) {
+# splitGp <- split(suspnotval.ids, list(corr[suspnotval.ids,]$Predicted))
+# strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x),
+# min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(suspnotval.ids)])))),
+# replace = FALSE)])
+# suspsample.ids <- as.numeric(do.call(c, strat.samples))
+# corr$Step[suspsample.ids] <<- step
+# numSample <- numSample - length(suspsample.ids)
+# }
+#
+# if (numSample > 0) {
+# ## If not completed, randomly select suspects items not validated
+# suspnotval.ids <- ids[!corr$Validated & corr$Suspect &
+# is.infinite(corr$RdValidated) & corr$Step == -1]
+# if (length(suspnotval.ids)) {
+# suspsample.ids <- as.numeric(sample(suspnotval.ids,
+# size = min(numSample, length(suspnotval.ids))))
+# corr$Step[suspsample.ids] <<- step
+# numSample <- numSample - length(suspsample.ids)
+# }
+# }
+#
+# if (numSample > 0) {
+# ## If not completed, Select the same number of trusted items not validated in each class
+# trustnotval.ids <- ids[!corr$Validated & !corr$Suspect &
+# is.infinite(corr$RdValidated) & corr$Step == -1]
+# if (length(trustnotval.ids)) {
+# splitGp <- split(trustnotval.ids, list(corr[trustnotval.ids,]$Predicted))
+# strat.samples <- lapply(splitGp, function(x) x[sample(1:NROW(x),
+# min(NROW(x), round(numSample/length(unique(corr$Predicted[as.numeric(trustnotval.ids)])))),
+# replace = FALSE)])
+# trustsample.ids <- as.numeric(do.call(c, strat.samples))
+# corr$Step[trustsample.ids] <<- step
+# numSample <- numSample - length(trustsample.ids)
+# }
+# }
+#
+# if (numSample > 0) {
+# ## If not completed, randomly select trusted items not validated
+# trustnotval.ids <- ids[!corr$Validated & !corr$Suspect &
+# is.infinite(corr$RdValidated) & corr$Step == -1]
+# if (length(trustnotval.ids)) {
+# trustsample.ids <- as.numeric(sample(trustnotval.ids,
+# size = min(numSample, length(trustnotval.ids))))
+# corr$Step[trustsample.ids] <<- step
+# numSample <- numSample - length(trustsample.ids)
+# }
+# }
+ ############### ############### ###############
+
nsuspect.tovalid <- length(ids[corr$Step == step & corr$Suspect])
ntrusted.tovalid <- length(ids[corr$Step == step & !corr$Suspect])
nsuspect.history <<- c(nsuspect.history, nsuspect)
@@ -662,7 +759,11 @@
if (mode != "stat") {
## Make sure the R Httpd server is started
tools <- getNamespace("tools")
- port <- tools$httpdPort
+ if (R.Version()$`svn rev` >= 67550) {
+ port <- tools::startDynamicHelp(NA)
+ } else {
+ port <- tools$httpdPort
+ }
if (port == 0) port <- startDynamicHelp(TRUE)
if (port == 0) stop("Impossible to start the R httpd server")
@@ -817,6 +918,27 @@
error.estim.data$Actual, useNA = "no") # remove NAs
corr.confusion <<- error.conf / sum(error.conf) *
(nrow(data) - sum(corr$OtherGp)) # remove NAs
+
+ ## For cells
+ if ("Nb_cells" %in% names(data)) {
+ error.conf.cell <- xtabs(data$Nb_cells[corr$Step==step] ~
+ error.estim.data$Actual + error.estim.data$Predicted,
+ exclude = c(NA, NaN))
+ cell.confusion <<- error.conf.cell /
+ sum(error.conf.cell) * (sum(data$Nb_cells) -
+ sum(data$Nb_cells[corr$OtherGp])) # remove NAs
+ }
+
+ ## For biovolumes
+ if ("BioWeight" %in% names(data)) {
+ error.conf.bioweight <- xtabs(data$BioWeight[corr$Step==step] ~
+ error.estim.data$Actual + error.estim.data$Predicted,
+ exclude = c(NA, NaN))
+ bioweight.confusion <<- error.conf.bioweight /
+ sum(error.conf.bioweight) * (sum(data$BioWeight) -
+ sum(data$BioWeight[corr$OtherGp])) # remove NAs
+ }
+
## Calculate error in valid data and in both suspect and trusted parts
error.valid.history[[step + 1]] <<-
error.estim.data$Actual != error.estim.data$Predicted
@@ -841,6 +963,45 @@
corr$Actual[notValTrustIdx])
corr.confusion <<- confSusp.w + confTrustVal + confNotValTrust
+ ## For cells
+ if ("Nb_cells" %in% names(data)) {
+ nCellSuspTot <- sum(data$Nb_cells[corr$Suspect &
+ !corr$OtherGp])
+ nCellSuspVal <- sum(data$Nb_cells[valSuspIdx])
+ nCellTrustVal <- sum(data$Nb_cells[valTrustIdx])
+ confSuspValCell <- xtabs(data$Nb_cells[valSuspIdx] ~
+ corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx],
+ exclude = c(NA, NaN))
+ confTrustValCell <- xtabs(data$Nb_cells[valTrustIdx] ~
+ corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx],
+ exclude = c(NA, NaN))
+ confSuspCell.w <- confSuspValCell / nCellSuspVal * nCellSuspTot
+ confNotValTrustCell <- xtabs(data$Nb_cells[notValTrustIdx] ~
+ corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx],
+ exclude = c(NA, NaN))
+ cell.confusion <<-
+ confSuspCell.w + confTrustValCell + confNotValTrustCell
+ }
+
+ ## For biovolumes
+ if ("BioWeight" %in% names(data)) {
+ nBioSuspTot <- sum(data$BioWeight[corr$Suspect & !corr$OtherGp])
+ nBioSuspVal <- sum(data$BioWeight[valSuspIdx])
+ nBioTrustVal <- sum(data$BioWeight[valTrustIdx])
+ confSuspValBio <- xtabs(data$BioWeight[valSuspIdx] ~
+ corr$Actual[valSuspIdx] + corr$Predicted[valSuspIdx],
+ exclude = c(NA, NaN))
+ confTrustValBio <- xtabs(data$BioWeight[valTrustIdx] ~
+ corr$Actual[valTrustIdx] + corr$Predicted[valTrustIdx],
+ exclude = c(NA, NaN))
+ confSuspBio.w <- confSuspValBio / nBioSuspVal * nBioSuspTot
+ confNotValTrustBio <- xtabs(data$BioWeight[notValTrustIdx] ~
+ corr$Actual[notValTrustIdx] + corr$Predicted[notValTrustIdx],
+ exclude = c(NA, NaN))
+ bioweight.confusion <<-
+ confSuspBio.w + confTrustValBio + confNotValTrustBio
+ }
+
error.valid.history[[step + 1]] <<- testset$Actual != testset$Predicted
if (nsuspect > 0) {
error.suspect.history[[step + 1]] <<-
@@ -855,6 +1016,32 @@
}
}
+ ## Compute the corrected coefficients for particles, cells, biovolume
+# estimateCoeffs <- function () {
+# ## For particles (colonies)
+# col.confusion <- table(corr$Predicted[corr$Validated], corr$Actual[corr$Validated], useNA = "no") # remove NAs
+# corr.coeffs <- ifelse(!colSums(col.confusion), rowSums(col.confusion),
+# rowSums(col.confusion)/colSums(col.confusion))
+# ## For cells
+# if ("Nb_cells" %in% names(data)) {
+# cell.confusion <- xtabs(data$Nb_cells[corr$Validated] ~
+# corr$Predicted[corr$Validated] +
+# corr$Actual[corr$Validated], exclude = c(NA, NaN))
+# corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(cell.confusion), rowSums(cell.confusion),
+# rowSums(cell.confusion)/colSums(cell.confusion)))
+# }
+#
+# ## For biovolumes
+# if ("BioWeight" %in% names(data)) {
+# bioweight.confusion <- xtabs(data$BioWeight[corr$Validated] ~
+# corr$Predicted[corr$Validated] +
+# corr$Actual[corr$Validated], exclude = c(NA, NaN))
+# corr.coeffs <- cbind(corr.coeffs, ifelse(!colSums(bioweight.confusion), rowSums(bioweight.confusion),
+# rowSums(bioweight.confusion)/colSums(bioweight.confusion)))
+# }
+# corr.coeffs
+# }
+
## Estimate error and abundance
## Update Validated, training set and histories
correct <- function () {
@@ -872,10 +1059,20 @@
estimateError()
estimateAbundance()
+ #estimateCoeffs()
validated.fracs <<- c(validated.fracs, sample.size)
correction.history <<- cbind(correction.history,
rowSums(corr.confusion))
+ if ("Nb_cells" %in% names(data)) {
+ correctionCell.history <<- cbind(correctionCell.history,
+ rowSums(cell.confusion))
+ }
+ if ("BioWeight" %in% names(data)) {
+ correctionBio.history <<- cbind(correctionBio.history,
+ rowSums(bioweight.confusion))
+ }
+
manual.history <<- cbind(manual.history, table(corr$Actual))
manual2.history <<- cbind(manual2.history, table(corr$Actual2))
setSampleSize() # Set the next subsample size
@@ -999,7 +1196,8 @@
print(abd)
## Create an object with these results...
- test <- data.frame(Id = makeId(data), data, Class = corr$Actual)
+ test <- data.frame(Id = makeId(data), data, Class = corr$Actual, Validated = corr$Validated, Suspect = corr$Suspect)
+ #test <- data.frame(Id = makeId(data), data, Class = corr$Actual)
attr(test, "path") <- attr(classifier, "path")
class(test) <- unique(c("ZI3Test", "ZITest", class(data)))
assign(result, test, envir = envir)
@@ -1054,6 +1252,7 @@
}
error1 <- dissimilarity(abundances, manual.history, na.rm = TRUE) * 100
error3 <- dissimilarity(abundances, correction.history, na.rm = TRUE) * 100
+ par(mar = c(5, 4, 4, 4) + 0.1)
plot(cumsum(validated.fracs) / nrow(corr) * 100, error1,
type = "l", xlab = "Validated fraction (%)",
ylab = "Dissimilarity (%)", col = "green", xlim = c(0, 100),
@@ -1074,6 +1273,7 @@
validated.fracs[-1]
suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1]
suspByFrac[1] <- 0
+ par(mar = c(5, 4, 4, 4) + 0.1)
plot(fracs * 100, errByFrac * 100, type = "l", xlab = "Validated fraction (%)",
ylab = "Suspect and error (%)", xlim = c(0, 100), ylim = c(0, 100), col = "red",
main = "Suspects and error at each iteration")
@@ -1083,30 +1283,109 @@
col = c("black", "red"), cex = 0.8, lwd = 2)
} else { # Should be type == "barplot"
- fracs <- cumsum(validated.fracs[-1]) / nrow(corr)
+ thresholdDiffDiss <- 5 # Differential dissimilarity <= 5%
+ nbStep <- ceiling(nrow(data) / validated.fracs[-1][1])
errByFrac <- sapply(error.valid.history, sum, na.rm = TRUE) /
validated.fracs[-1]
suspByFrac <- nsuspect.tovalid.history / validated.fracs[-1]
#suspByFrac[1] <- 0
## case 1 item => projection, case more => another projection...
dat <- rbind(suspByFrac * 100, errByFrac * 100)
- barplot(dat, xlab = "Validated fraction", beside = TRUE,
- ylab = "Suspect and corrected error (%)", xlim = c(1, (1/fraction + 1)*2),
+
+ diffDiss <- sapply(2:ncol(correction.history), function (x)
+ dissimilarity(correction.history[, x - 1], correction.history[, x],
+ na.rm = TRUE) * 100
+ )
+ xcoord <-
+ seq(0.7, ceiling(nrow(data) / validated.fracs[-1][1]) * 1.2, by = 1.2)
+ if (step < 1) {
+ suspRemain <- NA
+ stepSD <- round((errByFrac*nsuspect.history -
+ errByFrac*nsuspect.tovalid.history) /
+ nsuspect.tovalid.history) + (step+1)
+ idxStepSD <- stepSD
+ coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1]))
+ } else {
+ suspRemain <- c(NA, nsuspect.history[2:(step+1)] -
+ nsuspect.tovalid.history[2:(step+1)])
+ stepSD <- round(suspRemain / nsuspect.tovalid.history) + 1:(step+1)
+ if (length(which(suspRemain == 0)) > 0) {
+ idxStepSD <- which(suspRemain == 0)[1]
+ } else {
+ idxStepSD <- tail(stepSD,1)
+ }
+ coordStepSD <- mean(c(xcoord[idxStepSD], xcoord[idxStepSD + 1]))
+ }
+
+ par(mfrow = c(2, 1), mar = c(4, 4, 1, 4) + 0.1)
+ bp1 <- barplot(suspRemain, #xlab = "Validation step",
+ ylab = "Nb remaining suspects", xlim = c(0.2,
+ xcoord[ceiling(idxStepSD + (length(xcoord) - idxStepSD) / 3)]),
+ ylim = c(0, max(suspRemain, diffDiss, na.rm = TRUE)), yaxs = "r",
+ col = "grey10", cex.axis = .7, cex.main = 1, ann = FALSE,
+ yaxt = "n", #main = "Remaining suspects and differential dissimilarity")
+ )
+ title(expression(bold("Remaining suspects") *
+ phantom("\tand\tdifferential dissimilarity")),
+ col.main = "grey10", cex.main = 1)
+ title(expression(phantom("Remaining suspects\t") * "and" *
+ phantom("\tdifferential dissimilarity")),
+ col.main = "black", cex.main = 1)
+ title(expression(phantom("Remaining suspects\tand\t") *
+ bold("differential dissimilarity")),
+ col.main = "blue", cex.main = 1)
+# legend("top", legend = c("Remaining suspects","Diff dissimilarity"),
+# fill = c("grey20","blue"), cex = .6, bty = "n", adj = c(0,0))
+ axis(side = 1, at = seq(bp1[1], by = 1.2, length.out = nbStep),
+ labels = 1:nbStep, cex.axis = .7)
+ if (step > 0) axis(side = 2, cex.axis = .7)
+
+ par(new = TRUE)
+ plot(bp1, diffDiss, type = "o", col = "blue", ylim = c(0, 100),
+ xlim = c(0.2, xcoord[ceiling(idxStepSD + (length(xcoord) -
+ ## TODO: why '+' at the end of next line???
+ idxStepSD) / 3)]), lwd = 3, axes = FALSE, ann = FALSE) +
+ ## TODO: why '+' at the end of next line???
+ axis(side = 4, col = "blue", col.axis = "blue", cex.axis = .7) +
+ mtext("Differential dissimilarity (%)", side = 4, line = 3,
+ col = "blue")
+ abline(v = coordStepSD, lwd = 2, lty = 2, col = "dimgrey")
+ text(x = coordStepSD + .5, y = 90, "SD", srt = -90, pos = 4,
+ cex = 0.6, col = "dimgrey")
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/zooimage -r 253
More information about the Zooimage-commits
mailing list