[Zooimage-commits] r233 - in pkg/zooimage: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Aug 10 16:09:22 CEST 2012
Author: phgrosjean
Date: 2012-08-10 16:09:22 +0200 (Fri, 10 Aug 2012)
New Revision: 233
Modified:
pkg/zooimage/NAMESPACE
pkg/zooimage/R/ZIClass.R
pkg/zooimage/R/ZIRes.R
pkg/zooimage/R/ZITrain.R
pkg/zooimage/man/ZITrain.Rd
Log:
Correction of bugs in ZIClass and ZITrain + refactoring of ZIRes initiated
Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE 2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/NAMESPACE 2012-08-10 14:09:22 UTC (rev 233)
@@ -85,11 +85,15 @@
# ZIClass
export(ZIClass)
-# ZITrain
+# ZITrain/ZITest
export(prepareTrain)
export(addToTrain)
export(getTrain)
+export(prepareTest)
+export(addToTest)
+export(getTest)
export(recode)
+export(template)
# Utilities
export(calcVars)
@@ -166,7 +170,9 @@
export(modalAssistant)
# S3 methods
+S3method(template, default)
S3method(recode, ZITrain)
+S3method(recode, ZITest)
S3method(print, ZIClass)
S3method(predict, ZIClass)
Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R 2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZIClass.R 2012-08-10 14:09:22 UTC (rev 233)
@@ -37,20 +37,24 @@
class(ZI.class) <- c("ZIClass", class(ZI.class))
attr(ZI.class, "calc.vars") <- calc.vars
+ ## Get useful attrobutes from ZITrain
+ attr(ZI.class, "traindir") <- attr(data, "traindir")
+ attr(ZI.class, "path") <- attr(data, "path")
+
## Calculate predictions with full training set
attr(ZI.class, "predict") <- predict(ZI.class, data, calc = FALSE)
## Possibly make a k-fold cross-validation and check results
if (length(cv.k)) {
- attr(ZI.class, "cvpredict") <- cvpredict(ZI.class, type = "both",
+ attr(ZI.class, "cvpredict") <- cvpredict(ZI.class, type = "class",
cv.k = cv.k, cv.strat = cv.strat)
attr(ZI.class, "k") <- cv.k
attr(ZI.class, "strat") <- cv.strat
}
- ## Make sure the '+other+' group exists
+ ## Make sure the '+others+' group exists
lev <- levels(ZI.class)
- if (!"+other+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+other+")
+ if (!"+others+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+others+")
ZI.class
}
Modified: pkg/zooimage/R/ZIRes.R
===================================================================
--- pkg/zooimage/R/ZIRes.R 2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZIRes.R 2012-08-10 14:09:22 UTC (rev 233)
@@ -15,89 +15,99 @@
## You should have received a copy of the GNU General Public License
## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
-processSample <- function (ZidFile, ZidbFile = NULL, ZIClass, ZIMan, ZIDesc,
-abd.taxa = NULL, abd.groups = NULL, abd.type = "absolute", bio.taxa = NULL,
-bio.groups = NULL, bio.conv = c(1, 0, 1), headers = c("Abd", "Bio"),
-spec.taxa = NULL, spec.groups = NULL, spec.breaks = seq(0.25, 2, by = 0.1),
-spec.use.Dil = TRUE, exportdir = NULL, SemiTab = NULL, Semi = FALSE)
+processSample <- function (zidbfile, ZIClass = NULL, use = "both",
+ZIDesc, abd.taxa = NULL, abd.groups = NULL,
+abd.type = "absolute", bio.taxa = NULL, bio.groups = NULL, bio.conv = c(1, 0, 1),
+headers = c("Abd", "Bio"), spec.taxa = NULL, spec.groups = NULL,
+spec.breaks = seq(0.25, 2, by = 0.1), spec.use.Dil = TRUE, exportdir = NULL,
+SemiTab = NULL, Semi = FALSE)
{
- ## Check if the ZidFile exists
- if (!checkFileExists(ZidFile, message = "'ZidFile' not found"))
- return(invisible(FALSE))
-
+ zidbfile <- as.character(zidbfile)[1]
+ if (hasExtension(zidbfile, "zidb")) dbext <- "zidb" else dbext <- "zid"
+ if (!checkFileExists(zidbfile, dbext)) return(invisible(FALSE))
+
## Check if ZIClass is of the right class
- if (!inherits(ZIClass, "ZIClass")) {
+ if (!is.null(ZIClass) && !inherits(ZIClass, "ZIClass")) {
warning("'ZIClass' must be a 'ZIClass' object")
return(invisible(FALSE))
}
- if (!length(ZidbFile)) {
- ## Check if the ZidFile exists
- if (!checkFileExists(ZidFile, force.file = TRUE))
- return(invisible(FALSE))
-
- ## Get ZIDat from the ZidFile
- ZIDat <- zidDatRead(ZidFile)
- Sample <- sampleInfo(ZidFile, type = "sample",
+ if (dbext == "zidb") { # This is a ZIDB file
+ ZIDat <- zidbDatRead(zidbfile)
+ Sample <- sampleInfo(zidbfile, type = "sample",
+ ext = extensionPattern(".zidb"))
+ RES <- zidbSampleRead(zidbfile)
+ } else { # This is an old ZID file
+ ZIDat <- zidDatRead(zidbfile)
+ Sample <- sampleInfo(zidbfile, type = "sample",
ext = extensionPattern(".zid"))
-
- } else { # There is a ZIDB file
- ## Check if the ZidbFile exists
- if (!checkFileExists(ZidbFile, force.file = TRUE))
- return(invisible(FALSE))
-
- ## Get ZIDat from the ZidbFile
- ZIDat <- zidbDatRead(ZidbFile)
- ## Get ZIDat from the ZidFile
- Sample <- sampleInfo(ZidbFile, type = "sample",
- ext = extensionPattern(".zidb"))
+ ZIDesc <- zisRead(ZIDesc)
+ RES <- ZIDesc[ZIDesc$Label == Sample, ]
+ if (nrow(RES) == 0)
+ stop("'ZIDesc' has no data for that sample!")
}
- ## By default, we have to predict ZidFile with a classifier
- MakePredictions <- TRUE
+# ## By default, we have to predict zidbfile with a classifier
+# MakePredictions <- TRUE
+#
+# ## Modified by Kevin 2010-08-03
+# if (!is.null(ZIMan)) {
+# ## We want to use a ZIMan table
+# if (!inherits(ZIMan, "ZIMan"))
+# stop("'ZIMan' must be a data.frame of class 'ZIMan'")
+#
+# ## List of samples allready manually validated
+# AllSamples <- attr(ZIMan, "Samples")
+#
+# ## Check if manual validation exists for this zid file
+# if (noExtension(ZidFile) %in% AllSamples) {
+# ## The ZidFile was manually validated
+# ## --> use Class column for identification
+# ## Subtable of ZidFile vignettes
+# Vignettes <- makeId(ZIDat)
+# ZIDat <- ZIMan[ZIMan$Id %in% Vignettes, ]
+# ## Sort the table
+# ZIDat <- ZIDat[order(ZIDat$Item), ]
+# ## We don't have to predict this sample anymore!
+# MakePredictions <- FALSE
+# }
+# }
- ## Modified by Kevin 2010-08-03
- if (!is.null(ZIMan)) {
- ## We want to use a ZIMan table
- if (!inherits(ZIMan, "ZIMan"))
- stop("'ZIMan' must be a data.frame of class 'ZIMan'")
-
- ## List of samples allready manually validated
- AllSamples <- attr(ZIMan, "Samples")
-
- ## Check if manual validation exists for this zid file
- if (noExtension(ZidFile) %in% AllSamples) {
- ## The ZidFile was manually validated
- ## --> use Class column for identification
- ## Subtable of ZidFile vignettes
- Vignettes <- makeId(ZIDat)
- ZIDat <- ZIMan[ZIMan$Id %in% Vignettes, ]
- ## Sort the table
- ZIDat <- ZIDat[order(ZIDat$Item), ]
- ## We don't have to predict this sample anymore!
- MakePredictions <- FALSE
+# if (isTRUE(MakePredictions)) {
+# ## We have to recognize the zid file with a classifier
+# ZIDat <- predict(ZIClass, ZIDat)
+# }
+
+ ## Depending on 'us', rework ZIDat$Ident...
+ if (use == "Class") {
+ ZIDat$Ident <- ZIDat$Class
+ } else {
+ if (!is.null(ZIClass)) {
+ ## If a ZIClass object is provided, (re)perform the prediction
+ ZIDat <- predict(ZIClass, ZIDat, class.only = FALSE)
}
+ if (use == "both") { # If Class available, use it, otherwise, use Ident
+ if ("Class" %in% names(ZIDat)) {
+ Ident <- ZIDat$Class
+ missIdent <- is.na(Ident)
+ Ident[missIdent] <- ZIDat$Ident[missIdent]
+ ZIDat$Ident <- Ident
+ }
+ } else if (use != "Ident")
+ stop("Unknown 'use', must be 'Class', 'Ident', or 'both'")
}
-
- if (isTRUE(MakePredictions)) {
- ## We have to recognize the zid file with a classifier
- ZIDat <- predict(ZIClass, ZIDat)
- }
-
- ## Check if one can get sample metadata from ZIDesc
- RES <- ZIDesc[ZIDesc$Label == Sample, ]
- if (nrow(RES) == 0)
- stop("'ZIDesc' has no data for that sample!")
-
- ## Use manual validation if it is present
- if (isTRUE(MakePredictions)) {
- ## Use Automatic prediction
- Grp <- levels(ZIDat$Ident)
- } else {
- ## Use manual validation as identification
- Grp <- levels(ZIDat$Class)
- }
-
+
+# ## Use manual validation if it is present
+# if (isTRUE(MakePredictions)) {
+# ## Use Automatic prediction
+# Grp <- levels(ZIDat$Ident)
+# } else {
+# ## Use manual validation as identification
+# Grp <- levels(ZIDat$Class)
+# }
+
+ Grp <- levels(ZIDat$Ident)
+
if (is.null(abd.groups)) {
## Calculate groups (list with levels to consider)
abd.groups <- as.list(c("", Grp))
@@ -134,7 +144,6 @@
SPClist[[Sample]] <- SPC
attr(RES, "spectrum") <- SPClist
}
- attr(RES, "metadata") <- attr(ZIDesc, "metadata")
class(RES) <- c("ZI3Res", "ZIRes", "data.frame")
RES
}
Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R 2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZITrain.R 2012-08-10 14:09:22 UTC (rev 233)
@@ -15,10 +15,22 @@
## You should have received a copy of the GNU General Public License
## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
+template <- function (object, ...)
+ UseMethod("template")
+
+template.default <- function (object, add.others = TRUE, ...)
+{
+ res <- attr(object, "path")
+ if (isTRUE(as.logical(add.others)) &&
+ !"+others+" %in% unlist(strsplit(res, "/", fixed = TRUE)))
+ res <- c(res, "+others+")
+
+ res
+}
+
## Prepare 'dir\subdir' for a manual classification by expanding all vignettes
## from a given number of zidfiles to the '_' subdir, and making
## a template for subdirs
-## TODO: eliminate zidfiles and detect if it is zidfiles or zidbfiles like in addToTrain()
prepareTrain <- function (traindir, zidbfiles,
template = c("[Basic]", "[Detailed]", "[Very detailed]"), ident = NULL)
{
@@ -108,7 +120,7 @@
basename(zidbfiles[i])), "_dat1.RData", sep = "")))
}
}
- progress(zmax + 1) # Clear progression indicator
+ progress(101) # Clear progression indicator
### TODO: relocate vignettes in subdirectories, if ident is not NULL
if (length(ident)) {
@@ -119,6 +131,21 @@
invisible(TRUE)
}
+prepareTest <- function (testdir, zidbfiles, template, ident = NULL)
+{
+ if (!is.null(attr(template, "path"))) template <- attr(template, "path")
+ if (!"+others+" %in% unlist(strsplit(template, "/", fixed = TRUE)))
+ template <- c(template, "+others+")
+ tpl <- structure(1, path = template)
+ res <- prepareTrain(testdir, zidbfiles = zidbfiles,
+ template = tpl, ident = ident)
+ ## Add a .zic file there to make sure to respect training set classes
+ cat("ZI3\n[path]\n", paste(template, collapse = "\n"), "\n", sep = "",
+ file = file.path(testdir, "_template.zic"))
+
+ res
+}
+
## Function to add new vignettes in a training set
addToTrain <- function (traindir, zidbfiles, ident = NULL)
{
@@ -151,7 +178,7 @@
## Extract RData in the root directory
zmax <- length(zidbfiles)
- message("Adding data and vignettes to the training set...")
+ message("Adding data and vignettes to the training set...\n")
for (i in 1:zmax) {
progress(i, zmax)
## treatment depends if it is a .zid or .zidb file
@@ -192,11 +219,14 @@
unlink(td, recursive = TRUE)
}
}
- progress(zmax + 1) # Clear progression indicator
+ progress(101) # Clear progression indicator
message("-- Done --\n")
invisible(TRUE)
}
+addToTest <- function (testdir, zidbfiles, ident = NULL)
+ addToTrain(traindir = testdir, zidbfiles = zidbfiles, ident = ident)
+
## Retrieve information from a manual training set in a 'ZITrain' object
getTrain <- function (traindir, creator = NULL, desc = NULL, keep_ = FALSE,
na.rm = FALSE)
@@ -286,8 +316,8 @@
} else message("NAs found in the table of measurements and left there")
## Add attributes
- attr(df, "basedir") <- dir
- attr(df, "path") <- sort(unique(Path))
+ attr(df, "traindir") <- dir
+ attr(df, "path") <- unique(Path)
if (length(creator)) attr(df, "creator") <- creator
if (length(desc)) attr(df, "desc") <- desc
Classes <- c("ZI3Train", "ZITrain", Classes)
@@ -326,6 +356,31 @@
df
}
+getTest <- function (testdir, creator = NULL, desc = NULL, keep_ = FALSE,
+na.rm = FALSE)
+{
+ ## Same as getTrain() but returns a ZITest object... and read _template.zic
+ ## to make sure that path and classes do match!
+ if (!file.exists(zicfile) || !zicCheck(zicfile))
+ stop("testdir does not seem to contain a valid test set (may be use getTrain()?)")
+
+ res <- getTrain(traindir = testdir, creator = creator, desc = desc,
+ keep_ = keep_, na.rm = na.rm)
+ class(res) <- c("ZI3Test", "ZITest", class(res)[-(1:2)])
+
+ ## Read the _template.zic file and change res$Class factors and path accordingly
+ path <- scan(zicfile, character(), sep = "\n", skip = 2, quiet = TRUE)
+ if (!length(path))
+ stop(sprintf("'%s' is empty or corrupted!", zicfile))
+ attr(res, "path") <- path
+
+ ## Now, make sure to recode res$Class factor in the correct order!
+ lev <- sort(basename(path))
+ res$Class <- factor(as.character(res$Class), levels = lev)
+
+ res
+}
+
.recodeLevels <- function (object, depth = 1)
{
if (!inherits(object, "ZITrain"))
@@ -352,9 +407,6 @@
recode.ZITrain <- function (object, new.levels, depth, ...)
{
- if (!inherits(object, "ZITrain"))
- stop("'ZITrain' must be a 'ZITrain' object")
-
if (!missing(depth)) {
if (!missing(new.levels))
warning("depth is provided, so, new.levels is ignored and recomputed")
@@ -388,3 +440,5 @@
if (!is.null(path)) attr(object, "path") <- path
object
}
+
+recode.ZITest <- recode.ZITrain
Modified: pkg/zooimage/man/ZITrain.Rd
===================================================================
--- pkg/zooimage/man/ZITrain.Rd 2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/man/ZITrain.Rd 2012-08-10 14:09:22 UTC (rev 233)
@@ -1,16 +1,23 @@
\name{ZITrain}
-\alias{getTrain}
\alias{prepareTrain}
\alias{addToTrain}
+\alias{getTrain}
+\alias{prepareTest}
+\alias{addToTest}
+\alias{getTest}
+\alias{template}
+\alias{template.default}
\alias{recode}
\alias{recode.ZITrain}
+\alias{recode.ZITest}
-\title{ Manipulate training sets and 'ZITrain' objects }
+\title{ Manipulate training and testsets 'ZITrain'/'ZITest' objects }
\description{
'ZITrain' contain a hierarchy of classes (taxonomic or not) and a link to a
series of items belonging to these classes. It can be obtained after manual or
- automatic classification of various objects from .zid or .zidb files.
+ automatic classification of various objects from .zid or .zidb files. 'ZITest'
+ objects are almost identical, but with a '+others+' class added.
}
\usage{
prepareTrain(traindir, zidbfiles, template = c("[Basic]", "[Detailed]",
@@ -18,19 +25,30 @@
addToTrain(traindir, zidbfiles, ident = NULL)
getTrain(traindir, creator = NULL, desc = NULL, keep_ = FALSE, na.rm = FALSE)
+prepareTest(testdir, zidbfiles, template, ident = NULL)
+addToTest(testdir, zidbfiles, ident = NULL)
+getTest(testdir, creator = NULL, desc = NULL, keep_ = FALSE, na.rm = FALSE)
+
+template(object, \dots)
+\method{template}{default}(object, add.others = TRUE, \dots)
+
recode(object, \dots)
\method{recode}{ZITrain}(object, new.levels, depth, \dots)
+\method{recode}{ZITest}(object, new.levels, depth, \dots)
}
\arguments{
\item{traindir}{ the root directory of the training set. }
+ \item{testdir}{ the root directory of the test set. }
\item{zidbfiles}{ .zidb files or .zid files to use for data and vignettes in
the training set. }
\item{template}{ file containing subdirectories template to use for creating
- classes in the training set. Either a defaut template between [], or the
- name of a .zic file. }
+ classes in the training or test set. Either a defaut template between [], or
+ the name of a .zic file, or a template extracted from a 'ZITrain' or 'ZIClass'
+ object using \code{template(object)} (with the \code{add.others} argument
+ to TRUE for \code{prepareTest()} and to \code{FALSE} for \code{prepareTrain()}) }
\item{ident}{ if vignettes are already identified in the zid(b) files, should
- they be sorted that way in the created training set? If not \code{NULL},
+ they be sorted that way in the created training or test set? If not \code{NULL},
indicate the name of the classification column (usually, \code{Class} for
manual classification or \code{Ident} for automatic predictions). Not uset yet! }
\item{creator}{ name of the author of this classification (or the method,
@@ -39,7 +57,12 @@
\item{keep_}{ do we keep items in the '\_' subdirectory (corresponding to
unclassified ones)? Usually not! }
\item{na.rm}{ do we remove item with missing data? By default, not. }
- \item{object}{ a 'ZITrain' object. }
+ \item{object}{ a 'ZITrain' or 'ZITest' object. For \code{prepareTest()}, a
+ 'ZITrain' or 'ZIClass' object to use as reference to determine the
+ classes to make. }
+ \item{add.others}{ do we add the '+others+' group if not already in the
+ template? \code{TRUE} (by default) is suitable to create a template for
+ 'ZITest' objects, otherwise, use \code{FALSE} for 'ZITrain' objects. }
\item{new.levels}{ a character string of same length as the levels of
\code{object$Class} with the labels of the new levels. }
\item{depth}{ the depth in the hierachy of the classes as in the \code{"path"}
More information about the Zooimage-commits
mailing list