[Zooimage-commits] r232 - in pkg: mlearning mlearning/R mlearning/man zooimage zooimage/R zooimage/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Aug 10 10:25:12 CEST 2012
Author: phgrosjean
Date: 2012-08-10 10:25:11 +0200 (Fri, 10 Aug 2012)
New Revision: 232
Modified:
pkg/mlearning/NAMESPACE
pkg/mlearning/R/confusion.R
pkg/mlearning/R/mlearning.R
pkg/mlearning/man/confusion.Rd
pkg/zooimage/NAMESPACE
pkg/zooimage/R/ZIClass.R
pkg/zooimage/R/ZITrain.R
pkg/zooimage/R/gui.R
pkg/zooimage/R/utilities.R
pkg/zooimage/man/ZIClass.Rd
pkg/zooimage/man/ZITrain.Rd
pkg/zooimage/man/guiutils.Rd
pkg/zooimage/man/utilities.Rd
Log:
Changes in mlearning (weights -> prior) + refactoring of ZIClass and associated functions
Modified: pkg/mlearning/NAMESPACE
===================================================================
--- pkg/mlearning/NAMESPACE 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/NAMESPACE 2012-08-10 08:25:11 UTC (rev 232)
@@ -25,7 +25,8 @@
export(response)
export(train)
-export("weights<-")
+export(prior)
+export("prior<-")
export(confusion)
export(confusionImage)
@@ -50,8 +51,8 @@
S3method(response, default)
S3method(train, default)
-S3method(weights, confusion)
-S3method("weights<-", confusion)
+S3method(prior, confusion)
+S3method("prior<-", confusion)
S3method(summary, lvq)
S3method(print, summary.lvq)
Modified: pkg/mlearning/R/confusion.R
===================================================================
--- pkg/mlearning/R/confusion.R 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/R/confusion.R 2012-08-10 08:25:11 UTC (rev 232)
@@ -1,7 +1,8 @@
confusion <- function (x, ...)
UseMethod("confusion")
-.confusion <- function (classes, labels, weights, ...)
+## TODO: implement weights
+.confusion <- function (classes, labels, prior, ...)
{
res <- table(classes, dnn = labels)
total <- sum(res)
@@ -13,28 +14,28 @@
attr(res, "col.freqs") <- colSums(res)
attr(res, "levels") <- levels(classes[1, ]) # These are *initial* levels!
## Final levels may differ if there are empty levels, or NAs!
- attr(res, "weights") <- row.freqs # Initial weights are row.freqs
+ attr(res, "prior") <- row.freqs # Initial prior are row.freqs
attr(res, "stats") <- c(total = total, truepos = truePos,
error = 1 - (truePos / total))
## This is a confusion object, inheriting from table
class(res) <- c("confusion", "table")
- ## Do we reweight the confusion matrix?
- if (!missing(weights)) weights(res) <- weights
+ ## Do we rescale the confusion matrix?
+ if (!missing(prior)) prior(res) <- prior
res
}
confusion.default <- function (x, y = NULL, vars = c("Actual", "Predicted"),
-labels = vars, merge.by = "Id", weights, ...)
+labels = vars, merge.by = "Id", prior, ...)
{
## If the object is already a 'confusion' object, return it
if (inherits(x, "confusion")) {
if (!missing(y))
warning("you cannot provide 'y' when 'x' is a 'confusion' object")
- ## Possibly reweight it
- if (!missing(weights)) weights(x) <- weights
+ ## Possibly rescale it
+ if (!missing(prior)) prior(x) <- prior
return(x)
}
@@ -42,7 +43,7 @@
conf <- attr(x, "confusion")
if (!is.null(conf) && missing(y)) {
## Possibly reweight it
- if (!missing(weights)) weights(conf) <- weights
+ if (!missing(prior)) prior(conf) <- prior
return(conf)
}
@@ -120,15 +121,15 @@
}
## Construct the confusion object
- if (missing(weights)) {
+ if (missing(prior)) {
.confusion(classes = clCompa, labels = labels, ...)
} else {
- .confusion(classes = clCompa, labels = labels, weights = weights, ...)
+ .confusion(classes = clCompa, labels = labels, prior = prior, ...)
}
}
confusion.mlearning <- function (x, y = response(x),
-labels = c("Actual", "Predicted"), weights, ...) {
+labels = c("Actual", "Predicted"), prior, ...) {
## Check labels
labels <- as.character(labels)
if (length(labels) != 2)
@@ -150,35 +151,38 @@
}
## Construct the confusion object
- if (missing(weights)) {
+ if (missing(prior)) {
.confusion(data.frame(class1 = y, class2 = class2),
labels = labels, ...)
} else {
.confusion(data.frame(class1 = y, class2 = class2),
- labels = labels, weights = weights, ...)
+ labels = labels, prior = prior, ...)
}
}
-weights.confusion <- function (object, ...)
- attr(object, "weights")
+prior <- function (object, ...)
+ UseMethod("prior")
-`weights<-`<- function (object, ..., value)
- UseMethod("weights<-")
+prior.confusion <- function (object, ...)
+ attr(object, "prior")
-`weights<-.confusion`<- function (object, ..., value)
+`prior<-`<- function (object, ..., value)
+ UseMethod("prior<-")
+
+`prior<-.confusion`<- function (object, ..., value)
{
+ rsums <- rowSums(object)
if (!length(value)) { # value is NULL or of zero length
- ## Reset weights to original frequencies
+ ## Reset prior to original frequencies
value <- attr(object, "row.freqs")
- attr(object, "weights") <- value
- round(object / apply(object, 1, sum) * value)
+ res <- round(object / rsums * value)
} else if (is.numeric(value)) { # value is numeric
if (length(value) == 1) { # value is a single number
if (is.na(value) || !is.finite(value) || value <= 0)
stop("value must be a finite positive number")
- res <- object / apply(object, 1, sum) * as.numeric(value)
+ res <- object / rsums * as.numeric(value)
} else { # value is a vector of numerics
## It must be either of the same length as nrow(object) or of
@@ -207,12 +211,15 @@
} else stop("length of 'value' do not match the number of levels in the confusion matrix")
- res <- object / apply(object, 1, sum) * as.numeric(value)
+ res <- object / rsums * as.numeric(value)
}
- attr(res, "weights") <- rowSums(res)
- res
} else stop("value must be a numeric vector, a single number or NULL")
+
+ attr(res, "prior") <- value
+ ## Take care to rows with no items! => put back zeros!
+ res[rsums == 0] <- 0
+ res
}
print.confusion <- function (x, sums = TRUE, error.col = sums, digits = 0,
@@ -225,10 +232,10 @@
" true positives (error rate = ", Error, "%)\n",
sep = "")
row.freqs <- attr(x, "row.freqs")
- if (!all(attr(x, "weights") == row.freqs)) {
- cat("with initial row weights (frequencies):\n")
+ if (!all(attr(x, "prior") == row.freqs)) {
+ cat("with initial row frequencies:\n")
print(row.freqs)
- cat("Reweighted to:\n")
+ cat("Rescaled to:\n")
}
## Print the confusion matrix itself
Modified: pkg/mlearning/R/mlearning.R
===================================================================
--- pkg/mlearning/R/mlearning.R 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/R/mlearning.R 2012-08-10 08:25:11 UTC (rev 232)
@@ -413,8 +413,8 @@
classes <- classes[notDup]
classes <- classes[pos]
- ## Check that both classes are the same!
- if (any(classes != est$predictions))
+ ## Check that both classes levels are the same!
+ if (any(levels(classes) != levels(est$predictions)))
warning("cross-validated classes do not match")
res <- list(class = classes, membership = membership)
Modified: pkg/mlearning/man/confusion.Rd
===================================================================
--- pkg/mlearning/man/confusion.Rd 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/man/confusion.Rd 2012-08-10 08:25:11 UTC (rev 232)
@@ -10,9 +10,10 @@
\alias{confusionBarplot}
\alias{confusionStars}
\alias{confusionDendrogram}
-\alias{weights.confusion}
-\alias{weights<-}
-\alias{weights<-.confusion}
+\alias{prior}
+\alias{prior.confusion}
+\alias{prior<-}
+\alias{prior<-.confusion}
\title{ Construct and analyze confusion matrices }
\description{
@@ -25,9 +26,9 @@
\usage{
confusion(x, \dots)
\method{confusion}{default}(x, y = NULL, vars = c("Actual", "Predicted"),
- labels = vars, merge.by = "Id", weights, \dots)
+ labels = vars, merge.by = "Id", prior, \dots)
\method{confusion}{mlearning}(x, y = response(x),
- labels = c("Actual", "Predicted"), weights, \dots)
+ labels = c("Actual", "Predicted"), prior, \dots)
\method{print}{confusion}(x, sums = TRUE, error.col = sums, digits = 0,
sort = "ward", \dots)
@@ -49,9 +50,10 @@
confusionDendrogram(x, y = NULL, labels = rownames(x), sort = "ward",
main = "Groups clustering", \dots)
-\method{weights}{confusion}(object, \dots)
-weights(object, \dots) <- value
-\method{weights}{confusion}(object, \dots) <- value
+prior(object, \dots)
+\method{prior}{confusion}(object, \dots)
+prior(object, \dots) <- value
+\method{prior}{confusion}(object, \dots) <- value
}
\arguments{
@@ -66,7 +68,7 @@
the same as \code{vars} or the one in the confusion matrix. }
\item{merge.by}{ a character string with the name of variables to use to merge
the two data frames, or \code{NULL}. }
- \item{weights}{ weights (class frequencies) to use for first classifier that
+ \item{prior}{ class frequencies to use for first classifier that
is tabulated in the rows of the confusion matrix. For its value, see here
under, the \code{value} argument. }
\item{sums}{ is the confusion matrix printed with rows and columns sums? }
@@ -111,19 +113,17 @@
\item{names}{ names of the two classifiers to compare. }
\item{main}{ main title of the graph. }
\item{min.width}{ minimum bar width required to add numbers. }
- \item{value}{ a single positive numeric to set all class frequencies to this
+ \item{value}{ a single positive numeric to set all class frequencies to this
value (use 1 for relative frequencies and 100 for relative freqs in percent),
or a vector of positive numbers of the same length as the levels in the
object. If the vector is named, names must match levels. Alternatively,
- providing \code{NULL} or an object of null length resets weights into their
- initial values (here, weights are class frequencies according to the first
- classifier, and depending on the context, they correspond to actual values,
- priors, optimal posteriors, etc.). }
+ providing \code{NULL} or an object of null length resets row class
+ frequencies into their initial values. }
}
\value{
- A confusion matrix in a 'confusion' object. \code{weights()} returns the
- current class weights associated with first classification tabulated, i.e.,
+ A confusion matrix in a 'confusion' object. \code{prior()} returns the
+ current class frequencies associated with first classification tabulated, i.e.,
for rows in the confusion matrix.
}
@@ -168,31 +168,31 @@
## the real proportions (so-called, priors), one should first reweight the
## confusion matrix before calculating statistics, for instance:
prior1 <- c(10, 10, 10, 100, 100, 100) # Glass types 1-3 are rare
-weights(glassConf) <- prior1
+prior(glassConf) <- prior1
glassConf
summary(glassConf, type = c("Fscore", "Recall", "Precision"))
plot(glassConf)
## This is very different than if glass types 1-3 are abundants!
prior2 <- c(100, 100, 100, 10, 10, 10) # Glass types 1-3 are abundants
-weights(glassConf) <- prior2
+prior(glassConf) <- prior2
glassConf
summary(glassConf, type = c("Fscore", "Recall", "Precision"))
plot(glassConf)
## Weight can also be used to construct a matrix of relative frequencies
## In this case, all rows sum to one
-weights(glassConf) <- 1
+prior(glassConf) <- 1
print(glassConf, digits = 2)
## However, it is easier to work with relative frequencies in percent
## and one gets a more compact presentation
-weights(glassConf) <- 100
+prior(glassConf) <- 100
glassConf
-## To reset weights to original propotions, just assign NULL
-weights(glassConf) <- NULL
+## To reset row class frequencies to original propotions, just assign NULL
+prior(glassConf) <- NULL
glassConf
-weights(glassConf)
+prior(glassConf)
}
\keyword{ tree }
Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/NAMESPACE 2012-08-10 08:25:11 UTC (rev 232)
@@ -89,15 +89,13 @@
export(prepareTrain)
export(addToTrain)
export(getTrain)
-export(recode.ZITrain)
-export(ZIRecodeLevels)
+export(recode)
# Utilities
export(calcVars)
export(calibrate)
export(ecd)
export(getDec)
-export(keepVars)
export(listSamples)
export(makeId)
export(parseIni)
@@ -168,6 +166,8 @@
export(modalAssistant)
# S3 methods
+S3method(recode, ZITrain)
+
S3method(print, ZIClass)
S3method(predict, ZIClass)
S3method(summary, ZIClass)
Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/R/ZIClass.R 2012-08-10 08:25:11 UTC (rev 232)
@@ -15,35 +15,30 @@
## You should have received a copy of the GNU General Public License
## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
-ZIClass <- function (formula, data, mlearning = getOption("ZI.mlearning",
-mlRforest), calc.vars = getOption("ZI.calcVars", calcVars), cv.k = 10, cv.strat,
-..., subset, na.action = getOption("ZI.naAction", na.omit))
+## Create basically a mlearning object, but with predicted and cvpredicted added
+## to it, and the '+other+' level added at the end of all levels
+ZIClass <- function (formula, data, method = getOption("ZI.mlearning",
+"mlRforest"), calc.vars = getOption("ZI.calcVars", calcVars), cv.k = 10,
+cv.strat = TRUE, ..., subset, na.action = na.omit)
{
## Check calc.vars and use it on data
if (length(calc.vars))
if (!is.function(calc.vars)) {
stop("'calc.vars' must be a function or NULL")
} else data <- calc.vars(data)
-
- ## Machine learning function
- mlearning <- match.fun(mlearning)
- if (!is.function(mlearning))
- stop("'mlearning' must be a function that produce a 'mlearning' object or a compatible one")
## Train the machine learning algorithm
- if (missing(subset) || !length(subset)) {
- ZI.class <- mlearning(formula, data = data, ..., na.action = na.action)
- } else {
- ZI.class <- mlearning(formula, data = data, ..., subset,
- na.action = na.action)
- }
-
+ ZI.class <- mlearning(formula, data = data, method = method,
+ model.args = list(formula = formula, data = substitute(data),
+ subset = substitute(subset)), call = match.call(), ...,
+ subset = subset, na.action = substitute(na.action))
+
## Add ZIClass as class of the object
class(ZI.class) <- c("ZIClass", class(ZI.class))
attr(ZI.class, "calc.vars") <- calc.vars
## Calculate predictions with full training set
- attr(ZI.class, "predict") <- predict(ZI.class, data, calc.vars = FALSE)
+ attr(ZI.class, "predict") <- predict(ZI.class, data, calc = FALSE)
## Possibly make a k-fold cross-validation and check results
if (length(cv.k)) {
@@ -52,33 +47,43 @@
attr(ZI.class, "k") <- cv.k
attr(ZI.class, "strat") <- cv.strat
}
+
+ ## Make sure the '+other+' group exists
+ lev <- levels(ZI.class)
+ if (!"+other+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+other+")
+
ZI.class
}
print.ZIClass <- function (x, ...)
{
algorithm <- attr(x, "algorithm")
- classes <- attr(x, "classes")
+ classes <- attr(x, "response")
lclasses <- levels(classes)
predicted <- attr(x, "predict")
+ if (is.list(predicted)) predicted <- predicted$class
k <- attr(x, "k")
+ strat <- attr(x, "strat")
cat("A 'ZIClass' object predicting for", length(lclasses), "classes:\n")
print(lclasses)
- Confu <- confusion(classes, predicted)
- mism <- 100 * (1 - (sum(diag(Confu)) / sum(Confu)))
+ Confu <- table(classes, predicted)
+ SelfConsist <- 100 * (sum(diag(Confu)) / sum(Confu))
## Change the number of digits to display
oldDigits <- options(digits = 4)
on.exit(options(oldDigits))
cat("\nAlgorithm used:", algorithm, "\n")
- cat("Mismatch in classification: ", mism, "%\n", sep = "")
+ cat("Self-consistency: ", SelfConsist, "%\n", sep = "")
if (!is.null(k)) {
- cat("k-fold cross validation error estimation (k = ", k, "):\n")
- kfold.predict <- attr(x, "kfold.predict")
+ if (isTRUE(strat)) msg <- ", stratified" else msg <- ""
+ cat("K-fold cross validation error estimation (k = ", k, msg, "):\n",
+ sep = "")
+ cvpredicted <- attr(x, "cvpredict")
+ if (is.list(cvpredicted)) cvpredicted <- cvpredicted$class
prior <- table(classes)
- ok <- diag(table(classes, kfold.predict))
+ ok <- diag(table(classes, cvpredicted))
err <- 100 * (1 - (sum(ok) / sum(prior)))
- cat(err, "%\n", sep = "")
+ cat("Error rate: ", err, "%\n", sep = "")
cat("\nError per class:\n")
`Error (%)` <- sort(1 - (ok / prior)) * 100
print(as.data.frame(`Error (%)`))
@@ -90,12 +95,12 @@
na.rm = FALSE, ...)
{
## Get the confusion object out of a ZIClass object and calc stats from there
- summary(confusion(object), sort.by = sort.by, decreasing = decreasing,
- na.rm = na.rm)
+ summary(confusion(object, response(object)), sort.by = sort.by, decreasing = decreasing,
+ na.rm = na.rm, ...)
}
-predict.ZIClass <- function (object, ZIDat, calc.vars = TRUE,
-class.only = FALSE, type = "class", na.rm = FALSE, ...)
+predict.ZIClass <- function (object, ZIDat, calc = TRUE, class.only = TRUE,
+type = "class", ...)
{
## Make sure we have correct objects
if (!inherits(object, "ZIClass"))
@@ -106,72 +111,64 @@
class(object) <- class(object)[-1]
data <- as.data.frame(ZIDat)
- if (isTRUE(as.logical(calc.vars)))
+ if (isTRUE(as.logical(calc)))
data <- attr(object, "calc.vars")(data)
- if (isTRUE(as.logical(na.rm))) na.omit(data)
- algorithm <- attr(object, "algorithm")
- if (type != "prob") {
- # modification to accept algoritms from party package
- if (algorithm %in% c("ctree", "cforest")) {
- Ident <- predict(object, newdata = data, type = "response",
- OOB = FALSE)
- } else {
- Ident <- predict(object, newdata = data, type = type)
- }
- } else {
- if (inherits(object, "randomForest")) {
- Ident <- predict(object, newdata = data, type = type)
- } else if (inherits(object, "lda")) {
- Ident <- predict(object, newdata = data)$posterior
- } else stop("Cannot calculate yet for other algorithms than Random Forest or LDA")
+ class.only <- isTRUE(as.logical(class.only))
+ type <- as.character(type)[1]
+ if (class.only && type != "class") {
+ warning("with class.only == TRUE, tyep can only be 'class' and is force to it")
+ type <- "class"
}
-
- ## Special case for prediction from an LDA (list with $class item)
- if (inherits(Ident, "list") && "class" %in% names(Ident))
- Ident <- Ident$class
- if (!isTRUE(as.logical(class.only))) {
- res <- cbind(ZIDat, Ident)
- class(res) <- class(ZIDat)
- } else res <- Ident
- ## New metadata attribute
- attr(res, "metadata") <- attr(ZIDat, "metadata")
- res
+ ## Perform the prediction
+ res <- predict(object, newdata = data, ...)
+
+ ## Return either the prediction, or the ZIDat object with Ident column append/replaced
+ if (class.only) res else {
+ ZIDat$Ident <- res
+ ZIDat
+ }
}
-#confusion.ZIClass <- function (x, ...)
-#{
-# ## If the object is ZIClass, calculate 'confusion'
-# ## from attributes 'classes' and 'kfold.predict'
-# if (!inherits(x, "ZIClass"))
-# stop("'x' must be a 'ZIClass' object")
-#
-# x <- attr(x, "classes")
-# y <- attr(x, "kfold.predict")
-# labels <- c("Class", "Predict")
-# clCompa <- data.frame(Class = x, Predict = y)
-# ## How many common objects by level?
-# NbrPerClass1 <- table(clCompa[, 1])
-# ## How many predicted objects
-# NbrPerClass2 <- table(clCompa[, 2])
-# ## Confusion matrix
-# Conf <- table(clCompa)
-# ## Further stats: total, true positives, accuracy
-# Total <- sum(Conf)
-# TruePos <- sum(diag(Conf))
-# Stats <- c(total = Total, truepos = TruePos, accuracy = TruePos / Total * 100)
-#
-# ## Change labels to get a more compact presentation
-# colnames(Conf) <- formatC(1:ncol(Conf), digits = 1, flag = "0")
-# rownames(Conf) <- paste(colnames(Conf), rownames(Conf))
-#
-# ## Additional data as attributes
-# attr(Conf, "stats") <- Stats
-# attr(Conf, "nbr.rows") <- NbrPerClass1
-# attr(Conf, "nbr.cols") <- NbrPerClass2
-#
-# ## This is a confusion object
-# class(Conf) <- c("confusion", "table")
-# Conf
-#}
+confusion.ZIClass <- function (x, y = response(x),
+labels = c("Actual", "Predicted"), prior, use.cv = TRUE, ...) {
+ ## Check labels
+ labels <- as.character(labels)
+ if (length(labels) != 2)
+ stop("You must provide exactly 2 character strings for 'labels'")
+
+ ## Extract class2: cvpredict or predict from the object
+ if (isTRUE(as.logical(use.cv))) {
+ class2 <- attr(x, "cvpredict")
+ if (is.list(class2)) class2 <- class2$class
+ if (is.null(class2))
+ stop("No or wrong cross-validated predictions in this ZIClass object")
+ } else { # Use predict
+ class2 <- attr(x, "predict")
+ if (is.list(class2)) class2 <- class2$class
+ }
+
+ ## Check that both variables are of same length and same levels
+ if (length(y) != length(class2))
+ stop("lengths of 'x' and 'y' are not the same")
+
+ ## Full list of levels is in (cv)predict in class2...
+ ## Response in y may have dropped levels!
+ lev1 <- levels(y)
+ lev2 <- levels(class2)
+ if (!all(lev1 %in% lev2))
+ stop("levels of 'x' and 'y' do not match")
+
+ ## Rework levels in y to make sure they match perfectly thos in class2
+ y <- factor(as.character(y), levels = lev2)
+
+ ## Construct the confusion object
+ if (missing(prior)) {
+ mlearning:::.confusion(data.frame(class1 = y, class2 = class2),
+ labels = labels, ...)
+ } else {
+ mlearning:::.confusion(data.frame(class1 = y, class2 = class2),
+ labels = labels, prior = prior, ...)
+ }
+}
Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R 2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/R/ZITrain.R 2012-08-10 08:25:11 UTC (rev 232)
@@ -19,62 +19,73 @@
## from a given number of zidfiles to the '_' subdir, and making
## a template for subdirs
## TODO: eliminate zidfiles and detect if it is zidfiles or zidbfiles like in addToTrain()
-prepareTrain <- function (rootdir, subdir = "_train", zidfiles, zidbfiles = NULL,
-groups.template = c("[Basic]", "[Detailed]", "[Very detailed]"),
-ident = NULL, start.viewer = FALSE)
+prepareTrain <- function (traindir, zidbfiles,
+template = c("[Basic]", "[Detailed]", "[Very detailed]"), ident = NULL)
{
- ## First, check that rootdir is valid
- if (!checkDirExists(rootdir)) return(invisible(FALSE))
+ ## First, check that dirname of traindir is valid
+ if (!checkDirExists(dirname(traindir))) return(invisible(FALSE))
- ## New dir is rootdir + subdir
- dir <- file.path(rootdir, as.character(subdir)[1])
- if (!checkEmptyDir(dir,
- message = 'dir "%s" must be empty. Clean it first!'))
+ if (!checkEmptyDir(traindir,
+ message = 'dir "%s" is not empty. Use AddToTrain() instead!'))
return(invisible(FALSE))
## Then, check that all zidfiles or zidbfiles exist
- if (is.null(zidbfiles)) {
- if (!checkFileExists(zidfiles, "zid")) return(invisible(FALSE))
- zmax <- length(zidfiles)
- } else {
- if (!checkFileExists(zidbfiles, "zidb")) return(invisible(FALSE))
- zmax <- length(zidbfiles)
- }
+ if (hasExtension(zidbfiles[1], "zidb")) dbext <- "zidb" else dbext <- "zid"
+ if (!checkFileExists(zidbfiles, dbext)) return(invisible(FALSE))
+ zmax <- length(zidbfiles)
- ## Finally, look for the groups.template
- groups.template <- as.character(groups.template)[1]
- rx <- "^[[](.+)[]]$"
- if (grepl(rx, groups.template)) {
- ## This should be a template file in the default directory
- groups.template <- paste(sub(rx, "\\1", groups.template), ".zic",
- sep = "")
- groups.template <- file.path(getTemp("ZIetc"), groups.template)
- if (!file.exists(groups.template)) {
- warning("The file '", groups.template, "' is not found")
- return(invisible(FALSE))
+ ## Also look for the template
+ ## If the object has a path template, use it...
+ path <- attr(template, "path")
+ if (!length(path)) { # Look for a .zic file with classes
+ template <- as.character(template)[1]
+ rx <- "^[[](.+)[]]$"
+ if (grepl(rx, template)) {
+ ## This should be a template file in the default directory
+ template <- paste(sub(rx, "\\1", template), ".zic",
+ sep = "")
+ template <- file.path(getTemp("ZIetc"), template)
+ if (!file.exists(template)) {
+ warning("The file '", template, "' is not found")
+ return(invisible(FALSE))
+ }
}
+ ## Check that this is a .zic file
+ if (!zicCheck(template)) return(invisible(FALSE))
+
+ ## Create the other directories
+ path <- scan(template, character(), sep = "\n", skip = 2,
+ quiet = TRUE)
+ if (!length(path)) {
+ warning(sprintf("'%s' is empty or corrupted!", template))
+ return(invisible(FALSE))
+ }
}
- ## Check that this is a .zic file
- if (!zicCheck(groups.template)) return(invisible(FALSE))
- ## Do the job...
- message("Extracting data and vignettes ...")
-
## Create '_' subdir and unzip all vignettes there
- dir_ <- file.path(dir, "_")
+ dir_ <- file.path(traindir, "_")
if (!forceDirCreate(dir_)) return(invisible(FALSE))
+ ## Create subdirectories representing classes hierarchy
+ message("Making directories...")
+ path <- file.path(traindir, path)
+ for (i in 1:length(path)) {
+ #message(path[i])
+ dir.create(path[i], recursive = TRUE)
+ }
+
+ ## Place the vignettes...
+ message("Extracting data and vignettes ...")
for (i in 1:zmax) {
progress(i, zmax)
- if (is.null(zidbfiles)) {
- message("data", zidfiles[i])
+ if (dbext != "zidb") {
## Using a temporary directory to unzip all files and then copy
## the RData files to the train directory
td <- tempfile()
- unzip(zipfile = zidfiles[i], exdir = td)
+ unzip(zipfile = zidbfiles[i], exdir = td)
datafiles <- file.path(td, list.files(td,
pattern = extensionPattern(".RData"), recursive = TRUE))
- if (length(datafiles)) file.copy(datafiles, dir)
+ if (length(datafiles)) file.copy(datafiles, traindir)
vignettes <- file.path(td, list.files(td,
pattern = extensionPattern(".jpg"), recursive = TRUE))
if (length(vignettes)) file.copy(vignettes, dir_)
@@ -85,43 +96,31 @@
AllItems <- ls(Zidb)
Vigns <- AllItems[-grep("_dat1", AllItems)]
## Copy all vignettes in the "_" directory
- ext <- Zidb[[".ImageType"]]
+ imgext <- Zidb[[".ImageType"]]
for (j in 1:length(Vigns)){
From <- Vigns[j]
- To <- file.path(dir_, paste(From, ext, sep = "."))
+ To <- file.path(dir_, paste(From, imgext, sep = "."))
writeBin(Zidb[[From]], To)
}
## Save vignettes
ZI.sample <- Zidb$.Data
- save(ZI.sample, file = file.path(dir, paste(sub(".zidb", "",
+ save(ZI.sample, file = file.path(traindir, paste(sub(".zidb", "",
basename(zidbfiles[i])), "_dat1.RData", sep = "")))
}
}
- progress(101) # Clear progression indicator
-
- ## Create the other directories
- Lines <- scan(groups.template, character(), sep = "\n", skip = 2,
- quiet = TRUE)
- if (!length(Lines)) {
- warning(sprintf("'%s' is empty or corrupted!", groups.template))
- return(invisible(FALSE))
+ progress(zmax + 1) # Clear progression indicator
+
+ ### TODO: relocate vignettes in subdirectories, if ident is not NULL
+ if (length(ident)) {
+
}
- Lines <- file.path(dir, Lines)
- message("Making directories...")
- for (i in 1:length(Lines)) {
- message(Lines[i])
- dir.create(Lines[i], recursive = TRUE)
- }
- ### TODO: relocate vignettes in subdirectories, if ident is not NULL
- ## Finish and possibly start the image viewer
message(" -- Done! --")
- if (isTRUE(as.logical(start.viewer))) imageViewer(dir_)
invisible(TRUE)
}
## Function to add new vignettes in a training set
-addToTrain <- function (traindir, zidbfiles)
+addToTrain <- function (traindir, zidbfiles, ident = NULL)
{
## Check if selected zid(b) files are already classified in the training set
Rdata <- list.files(traindir, pattern = "[.]RData$")
@@ -158,20 +157,27 @@
## treatment depends if it is a .zid or .zidb file
zidbfile <- zidbfiles[i]
if (grepl("[.]zidb$", zidbfile)) { # .zidb file
- ## TODO: extract data from .zidb files...
+ ## Link .zidb database to R objects in memory
+ Zidb <- zidbLink(zidbfile)
+ AllItems <- ls(Zidb)
+ Vigns <- AllItems[-grep("_dat1", AllItems)]
+ ## Copy all vignettes in the TopPath directory
+ imgext <- Zidb[[".ImageType"]]
+ for (j in 1:length(Vigns)){
+ From <- Vigns[j]
+ To <- file.path(ToPath, paste(From, imgext, sep = "."))
+ writeBin(Zidb[[From]], To)
+ }
+ ## Save RData file
+ ZI.sample <- Zidb$.Data
+ save(ZI.sample, file = file.path(traindir, paste(sub(".zidb", "",
+ basename(zidbfile)), "_dat1.RData", sep = "")))
-
-
-
-
-
-
-
} else { # .zid file
## Using a temporary directory to unzip all files and then copy
## the RData files to the train directory
td <- tempfile()
- unzip(zipfile = zidbfiles[i], exdir = td)
+ unzip(zipfile = zidbfile, exdir = td)
datafiles <- file.path(td, list.files(td,
pattern = extensionPattern(".RData"), recursive = TRUE))
if (length(datafiles))
@@ -186,14 +192,14 @@
unlink(td, recursive = TRUE)
}
}
- progress(101) # Clear progression indicator
+ progress(zmax + 1) # Clear progression indicator
message("-- Done --\n")
invisible(TRUE)
}
## Retrieve information from a manual training set in a 'ZITrain' object
getTrain <- function (traindir, creator = NULL, desc = NULL, keep_ = FALSE,
-na.rm = FALSE, numvars = NULL)
+na.rm = FALSE)
{
## 'traindir' must be the base directory of the manual classification
if (!checkDirExists(traindir)) return(invisible(FALSE))
@@ -239,11 +245,7 @@
nitems <- nrow(df)
## Read in all the .RData files from the root directory and merge them
- ### TODO: also collect metadata and merge them => make a merge function for
- ## ZIDat!!!
## Get measurement infos
- #### TODO: Kevin, you cannot use this! You must refer to ZI.sample directly
- ## in the arguments of the function!
ZI.sample <- NULL
load(Dats[1])
Dat <- ZI.sample
@@ -263,12 +265,6 @@
}
rownames(Dat) <- 1:nrow(Dat)
- ## Create the Id column
-# Done in the loop!
-# Dat <- cbind(Id = makeId(Dat), Dat)
-
- ## Merge Dat & df by "Id"
-# df <- merge(Dat, df, by = "Id")
## Rename Dat in df
df <- Dat
## Problem if there is no remaining row in the data frame
@@ -283,7 +279,6 @@
" vignettes without measurement data are eliminated (",
nrow(df), " items remain in the object)")
- ## Delete lines which contain NA values v1.2-2
if (any(is.na(df)))
if (isTRUE(as.logical(na.rm))) {
message("NAs found in the table of measurements and deleted")
@@ -299,84 +294,97 @@
class(df) <- Classes
## Be sure that variables are numeric (sometimes, wrong importation)
- as.numeric.Vars <- function (ZIDat, numvars) {
- if (is.null(numvars)) # Default values
- numvars <- c("ECD",
- "FIT_Area_ABD", "FIT_Diameter_ABD", "FIT_Volume_ABD",
- "FIT_Diameter_ESD", "FIT_Volume_ESD", "FIT_Length", "FIT_Width",
- "FIT_Aspect_Ratio", "FIT_Transparency", "FIT_Intensity",
- "FIT_Sigma_Intensity", "FIT_Sum_Intensity", "FIT_Compactness",
- "FIT_Elongation", "FIT_Perimeter", "FIT_Convex_Perimeter",
- "FIT_Roughness", "FIT_Feret_Max_Angle", "FIT_PPC", "FIT_Ch1_Peak",
- "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF", "FIT_Ch3_Peak",
- "FIT_Ch3_TOF", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue",
- "FIT_Red_Green_Ratio", "FIT_Blue_Green", "FIT_Red_Blue_Ratio",
- "FIT_CaptureX", "FIT_CaptureY", "FIT_SaveX", "FIT_SaveY",
- "FIT_PixelW", "FIT_PixelH", "FIT_Cal_Const",
- "Area", "Mean", "StdDev", "Mode", "Min", "Max", "X", "Y", "XM",
- "YM", "Perim.", "BX", "BY", "Width", "Height", "Major", "Minor",
- "Angle", "Circ.", "Feret", "IntDen", "Median", "Skew", "Kurt",
- "XStart", "YStart", "Dil")
+# as.numeric.Vars <- function (ZIDat, numvars) {
+# if (is.null(numvars)) # Default values
+# numvars <- c("ECD",
+# "FIT_Area_ABD", "FIT_Diameter_ABD", "FIT_Volume_ABD",
+# "FIT_Diameter_ESD", "FIT_Volume_ESD", "FIT_Length", "FIT_Width",
+# "FIT_Aspect_Ratio", "FIT_Transparency", "FIT_Intensity",
+# "FIT_Sigma_Intensity", "FIT_Sum_Intensity", "FIT_Compactness",
+# "FIT_Elongation", "FIT_Perimeter", "FIT_Convex_Perimeter",
+# "FIT_Roughness", "FIT_Feret_Max_Angle", "FIT_PPC", "FIT_Ch1_Peak",
+# "FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF", "FIT_Ch3_Peak",
+# "FIT_Ch3_TOF", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue",
+# "FIT_Red_Green_Ratio", "FIT_Blue_Green", "FIT_Red_Blue_Ratio",
+# "FIT_CaptureX", "FIT_CaptureY", "FIT_SaveX", "FIT_SaveY",
+# "FIT_PixelW", "FIT_PixelH", "FIT_Cal_Const",
+# "Area", "Mean", "StdDev", "Mode", "Min", "Max", "X", "Y", "XM",
+# "YM", "Perim.", "BX", "BY", "Width", "Height", "Major", "Minor",
+# "Angle", "Circ.", "Feret", "IntDen", "Median", "Skew", "Kurt",
+# "XStart", "YStart", "Dil")
+#
+# ## Make sure numvars are numeric
+# Names <- names(ZIDat)
+# for (numvar in numvars) {
+# if (numvar %in% Names && !is.numeric(ZIDat[, numvar]))
+# ZIDat[, numvar] <- as.numeric(ZIDat[, numvar])
+# }
+# ZIDat
+# }
+# as.numeric.Vars(df, numvars = numvars)
- ## Make sure numvars are numeric
- Names <- names(ZIDat)
- for (numvar in numvars) {
- if (numvar %in% Names && !is.numeric(ZIDat[, numvar]))
- ZIDat[, numvar] <- as.numeric(ZIDat[, numvar])
- }
- ZIDat
- }
- as.numeric.Vars(df, numvars = numvars)
+ df
}
-recode.ZITrain <- function (ZITrain, ZIRecode, warn.only = FALSE)
-{
- if (!inherits(ZITrain, "ZITrain"))
+.recodeLevels <- function (object, depth = 1)
+{
+ if (!inherits(object, "ZITrain"))
stop("'ZITrain' must be a 'ZITrain' object")
- if (!inherits(ZIRecode, "ZIRecode"))
- stop("'ZIRecode' must be a 'ZIRecode' object")
- ## Check that all levels in ZITrain$Class are represented in ZIRecode
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/zooimage -r 232
More information about the Zooimage-commits
mailing list