[Zooimage-commits] r230 - in pkg: mlearning/R mlearning/man zooimage/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Aug 6 15:47:19 CEST 2012
Author: phgrosjean
Date: 2012-08-06 15:47:19 +0200 (Mon, 06 Aug 2012)
New Revision: 230
Modified:
pkg/mlearning/R/mlearning.R
pkg/mlearning/man/mlearning.Rd
pkg/zooimage/R/ZIClass.R
Log:
mlearning: bug correction in cvpredict.mlearning() and changed member to membership everywhere
Modified: pkg/mlearning/R/mlearning.R
===================================================================
--- pkg/mlearning/R/mlearning.R 2012-08-04 23:42:45 UTC (rev 229)
+++ pkg/mlearning/R/mlearning.R 2012-08-06 13:47:19 UTC (rev 230)
@@ -112,17 +112,6 @@
## Construct the mlearning object
match.fun(method)(train = train, response = response, .args. = args, ...)
-
- ## Call the corresponding workhorse function
- #res <- match.fun(paste(".", method, sep = ""))(train = train,
- # response = response, formula = formula, data = data, args, ...)
-
- ## Return a mlearning object
- #structure(res$object, formula = formula, train = train, response = response,
- # levels = lev, n = nobs, optim = optim, numeric.only = res$numeric.only,
- # type = type, pred.type = res$pred.type, summary = res$summary,
- # na.action = substitute(na.action), mlearning.call = call,
- # method = method, algorithm = res$algorithm, class = res$class)
}
print.mlearning <- function (x, ...)
@@ -252,7 +241,7 @@
}
predict.mlearning <- function(object, newdata,
-type = c("class", "member", "both"), method = c("direct", "cv"),
+type = c("class", "membership", "both"), method = c("direct", "cv"),
na.action = na.exclude, ...)
{
## Not usable for unsupervised type
@@ -296,7 +285,7 @@
## Otherwise, this is a supervised classification
type <- as.character(type)[1]
## Special case for both
- if (type == "both") type <- c("class", "member")
+ if (type == "both") type <- c("class", "membership")
## Check that type is supported and look for corresponding type name
## in original predict() method
pred.type <- attr(object, "pred.type")
@@ -304,11 +293,11 @@
stop("unsupported predict type")
if (length(type) == 2) {
- ## Special case where we predict both class and member
+ ## Special case where we predict both class and membership
classes <- predict(object, newdata = newdata,
type = pred.type["class"], ...)
members <- predict(object, newdata = newdata,
- type = pred.type["member"], ...)
+ type = pred.type["membership"], ...)
## Create a list with both res
levels <- levels(object)
return(list(class = .expandFactor(factor(as.character(classes),
@@ -335,7 +324,7 @@
cvpredict <- function (object, ...)
UseMethod("cvpredict")
-cvpredict.mlearning <- function(object, type = c("class", "member", "both"),
+cvpredict.mlearning <- function(object, type = c("class", "membership", "both"),
cv.k = 10, cv.strat = TRUE, ...)
{
type <- switch(attr(object, "type"),
@@ -346,13 +335,13 @@
if (type == "class") {
predictions <- TRUE
getmodels <- FALSE
- } else if (type == "member") {
+ } else if (type == "membership") {
predictions <- FALSE
getmodels <- TRUE
} else if (type == "both") {
predictions <- TRUE
getmodels <- TRUE
- } else stop("type must be 'class', 'member' or 'both'")
+ } else stop("type must be 'class', 'membership' or 'both'")
## Create data, using numbers are rownames
data <- data.frame(.response. = response(object), train(object))
@@ -368,7 +357,7 @@
}
Predict <- constructPredict(...)
- ## Perform cross-validation or bootstrap for prediction
+ ## Perform cross-validation for prediction
args <- attr(object, "args")
if (!is.list(args)) args <- list()
args$formula <- substitute(.response. ~ .)
@@ -385,13 +374,16 @@
if (type == "class") {
res <- est$predictions
} else {
- ## Need to calculate member
- predMember <- function (x, object, ...)
+ ## Need to calculate membership
+ predCV <- function (x, object, ...) {
+ Train <- train(object)
+ rownames(Train) <- 1:NROW(Train)
suppressWarnings(predict(x, newdata =
- train(object)[-as.numeric(rownames(train(x))), ], ...))
+ Train[-as.numeric(rownames(train(x))), ], ...))
+ }
## Apply predict on all model and collect results together
- member <- lapply(est$models, predMember, object = object, type = "member",
+ member <- lapply(est$models, predCV, object = object, type = "membership",
na.action = na.exclude, ...)
## Concatenate results
@@ -399,11 +391,33 @@
## Sort in correct order and replace initial rownames
ord <- as.numeric(rownames(member))
+ ## Sometimes, errorest() duplicates one or two items in two models
+ ## (rounding errors?) => eliminate them here
+ notDup <- !duplicated(ord)
+ member <- member[notDup, ]
+ ord <- ord[notDup]
+
+ # Restore order of the items
rownames(member) <- rn[ord]
- member <- member[order(ord), ]
+ pos <- order(ord)
+ member <- member[pos, ]
- if (type == "member") res <- member else
- res <- list(class = est$predictions, member = member)
+ if (type == "membership") {
+ res <- member
+ } else { # Need both class and membership
+ ## Because we don't know who is who in est$predictions in case of
+ ## duplicated items in est$models, we prefer to recalculate classes
+ classes <- unlist(lapply(est$models, predCV, object = object,
+ type = "class", na.action = na.exclude, ...))
+ classes <- classes[notDup]
+ classes <- classes[pos]
+
+ ## Check that both classes are the same!
+ if (any(classes != est$predictions))
+ warning("cross-validated classes do not match")
+
+ res <- list(class = classes, membership = member)
+ }
}
## Add est object as "method" attribute, without predictions or models
@@ -449,7 +463,7 @@
grouping = response, ...), formula = .args.$formula, train = train,
response = response, levels = .args.$levels, n = .args.$n, args = dots,
optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
- pred.type = c(class = "class", member = "posterior", projection = "x"),
+ pred.type = c(class = "class", membership = "posterior", projection = "x"),
summary = NULL, na.action = .args.$na.action,
mlearning.call = .args.$mlearning.call, method = .args.$method,
algorithm = "linear discriminant analysis",
@@ -457,7 +471,7 @@
}
predict.mlLda <- function(object, newdata,
-type = c("class", "member", "both", "projection"), prior = object$prior,
+type = c("class", "membership", "both", "projection"), prior = object$prior,
dimension, method = c("plug-in", "predictive", "debiased", "cv"), ...)
{
if (!inherits(object, "mlLda"))
@@ -508,12 +522,12 @@
## Rework results according to what we want
switch(as.character(type)[1],
class = factor(as.character(res$class), levels = levels(object)),
- member = .membership(res$posterior, levels = levels(object)),
+ membership = .membership(res$posterior, levels = levels(object)),
both = list(class = factor(as.character(res$class),
- levels = levels(object)), member = .membership(res$posterior,
+ levels = levels(object)), membership = .membership(res$posterior,
levels = levels(object))),
projection = res$x,
- stop("unrecognized 'type' (must be 'class', 'member', 'both' or 'projection')"))
+ stop("unrecognized 'type' (must be 'class', 'membership', 'both' or 'projection')"))
}
mlQda <- function (...)
@@ -547,14 +561,14 @@
grouping = response, ...), formula = .args.$formula, train = train,
response = response, levels = .args.$levels, n = .args.$n, args = dots,
optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
- pred.type = c(class = "class", member = "posterior"),
+ pred.type = c(class = "class", membership = "posterior"),
summary = NULL, na.action = .args.$na.action,
mlearning.call = .args.$mlearning.call, method = .args.$method,
algorithm = "quadratic discriminant analysis",
class = c("mlQda", "mlearning", "qda"))
}
-predict.mlQda <- function(object, newdata, type = c("class", "member", "both"),
+predict.mlQda <- function(object, newdata, type = c("class", "membership", "both"),
prior = object$prior, method = c("plug-in", "predictive", "debiased", "looCV",
"cv"), ...)
{
@@ -594,11 +608,11 @@
## Rework results according to what we want
switch(as.character(type)[1],
class = factor(as.character(res$class), levels = levels(object)),
- member = .membership(res$posterior, levels = levels(object)),
+ membership = .membership(res$posterior, levels = levels(object)),
both = list(class = factor(as.character(res$class),
- levels = levels(object)), member = .membership(res$posterior,
+ levels = levels(object)), membership = .membership(res$posterior,
levels = levels(object))),
- stop("unrecognized 'type' (must be 'class', 'member' or 'both')"))
+ stop("unrecognized 'type' (must be 'class', 'membership' or 'both')"))
}
mlRforest <- function (...)
@@ -658,7 +672,7 @@
structure(res, formula = .args.$formula, train = train,
response = response, levels = .args.$levels, n = .args.$n, args = dots,
optim = .args.$optim, numeric.only = FALSE, type = .args.$type,
- pred.type = c(class = "response", member = "prob", vote ="vote"),
+ pred.type = c(class = "response", membership = "prob", vote ="vote"),
summary = NULL, na.action = .args.$na.action,
mlearning.call = .args.$mlearning.call, method = .args.$method,
algorithm = "random forest",
@@ -666,7 +680,7 @@
}
predict.mlRforest <- function(object, newdata,
-type = c("class", "member", "both", "vote"), method = c("direct", "oob", "cv"),
+type = c("class", "membership", "both", "vote"), method = c("direct", "oob", "cv"),
...)
{
type <- as.character(type)[1]
@@ -700,15 +714,15 @@
res <- switch(type,
class = factor(as.character(object$predicted),
levels = levels(object)),
- member = .membership(toProps(object$votes, object$ntree),
+ membership = .membership(toProps(object$votes, object$ntree),
levels = levels(object)),
both = list(class = factor(as.character(object$predicted),
levels = levels(object)),
- member = .membership(toProps(object$votes, object$ntree),
+ membership = .membership(toProps(object$votes, object$ntree),
levels = levels(object))),
vote = .membership(toVotes(object$votes, object$ntree),
levels = levels(object)),
- stop("unknown type, must be 'class', 'member', 'both' or 'vote'"))
+ stop("unknown type, must be 'class', 'membership', 'both' or 'vote'"))
attr(res, "method") <- list(name = "out-of-bag")
res
@@ -801,7 +815,7 @@
structure(res, formula = .args.$formula, train = train,
response = response, levels = .args.$levels, n = .args.$n, args = dots,
optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
- pred.type = c(class = "class", member = "raw"),
+ pred.type = c(class = "class", membership = "raw"),
summary = "summary", na.action = .args.$na.action,
mlearning.call = .args.$mlearning.call, method = .args.$method,
algorithm = "single-hidden-layer neural network",
@@ -990,7 +1004,7 @@
laplace = laplace, ...), formula = .args.$formula, train = train,
response = response, levels = .args.$levels, n = .args.$n, args = dots,
optim = .args.$optim, numeric.only = FALSE, type = .args.$type,
- pred.type = c(class = "class", member = "raw"),
+ pred.type = c(class = "class", membership = "raw"),
summary = NULL, na.action = .args.$na.action,
mlearning.call = .args.$mlearning.call, method = .args.$method,
algorithm = "naive Bayes classifier",
@@ -1041,7 +1055,7 @@
# structure(do.call(WekaClassifier, wekaArgs), formula = .args.$formula,
# train = train, response = response, levels = .args.$levels, n = .args.$n,
# args = dots, optim = .args.$optim, numeric.only = FALSE,
-# type = .args.$type, pred.type = c(class = "class", member = "probability"),
+# type = .args.$type, pred.type = c(class = "class", membership = "probability"),
# summary = "summary", na.action = .args.$na.action,
# mlearning.call = .args.$mlearning.call, method = .args.$method,
# algorithm = "Weka naive Bayes classifier",
Modified: pkg/mlearning/man/mlearning.Rd
===================================================================
--- pkg/mlearning/man/mlearning.Rd 2012-08-04 23:42:45 UTC (rev 229)
+++ pkg/mlearning/man/mlearning.Rd 2012-08-06 13:47:19 UTC (rev 230)
@@ -50,24 +50,24 @@
\method{summary}{mlearning}(object, ...)
\method{print}{summary.mlearning}(x, ...)
\method{plot}{mlearning}(x, y, ...)
-\method{predict}{mlearning}(object, newdata, type = c("class", "member", "both"),
+\method{predict}{mlearning}(object, newdata, type = c("class", "membership", "both"),
method = c("direct", "cv"), na.action = na.exclude, ...)
cvpredict(object, ...)
-\method{cvpredict}{mlearning}(object, type = c("class", "member", "both"),
+\method{cvpredict}{mlearning}(object, type = c("class", "membership", "both"),
cv.k = 10, cv.strat = TRUE, ...)
mlLda(...)
\method{mlLda}{default}(train, response, ...)
\method{mlLda}{formula}(formula, data, ..., subset, na.action)
-\method{predict}{mlLda}(object, newdata, type = c("class", "member", "both",
+\method{predict}{mlLda}(object, newdata, type = c("class", "membership", "both",
"projection"), prior = object$prior, dimension,
method = c("plug-in", "predictive", "debiased", "cv"), ...)
mlQda(...)
\method{mlQda}{default}(train, response, ...)
\method{mlQda}{formula}(formula, data, ..., subset, na.action)
-\method{predict}{mlQda}(object, newdata, type = c("class", "member", "both"),
+\method{predict}{mlQda}(object, newdata, type = c("class", "membership", "both"),
prior = object$prior, method = c("plug-in", "predictive", "debiased",
"looCV", "cv"), ...)
@@ -75,7 +75,7 @@
\method{mlRforest}{default}(train, response, ntree = 500, mtry, replace = TRUE, classwt = NULL, ...)
\method{mlRforest}{formula}(formula, data, ntree = 500, mtry, replace = TRUE, classwt = NULL, ...,
subset, na.action)
-\method{predict}{mlRforest}(object, newdata, type = c("class", "member", "both",
+\method{predict}{mlRforest}(object, newdata, type = c("class", "membership", "both",
"vote"), method = c("direct", "oob", "cv"), ...)
mlNnet(...)
@@ -147,10 +147,10 @@
predictions. }
\item{type}{ the type of result to get. Usually, \code{"class"}, which is the
default. Depending on the algorithm, other types are also available.
- \code{member} and \code{both} are almost always available too. \code{member}
- corresponds to posterior probability, raw results, normalized votes, etc.,
- depending on the machine learning algorithm. With \code{both}, class and
- member are both returned at once in a list. }
+ \code{membership} and \code{both} are almost always available too.
+ \code{membership} corresponds to posterior probability, raw results,
+ normalized votes, etc., depending on the machine learning algorithm. With
+ \code{both}, class and membership are both returned at once in a list. }
\item{train}{ a matrix or data frame with predictors. }
\item{response}{ a vector of factor (classification) or numeric (regression),
or \code{NULL} (unsupervised classification). }
@@ -224,8 +224,8 @@
summary(irLda)
plot(irLda, col = as.numeric(response(irLda)) + 1)
predict(irLda, newdata = irisTest) # class (default type)
-predict(irLda, type = "member") # posterior probability
-predict(irLda, type = "both") # both class and member in a list
+predict(irLda, type = "membership") # posterior probability
+predict(irLda, type = "both") # both class and membership in a list
## Sometimes, other types are allowed, like for lda:
predict(irLda, type = "projection") # Projection on the LD axes
## Add test set items to the previous plot
@@ -251,7 +251,7 @@
## Factor levels with missing items are allowed
ir2 <- iris[-(51:100), ] # No Iris versicolor in the training set
summary(res <- mlLda(Species ~ ., data = ir2)) # virginica is NOT there
-## Missing levels are reinjected in class or member by predict()
+## Missing levels are reinjected in class or membership by predict()
predict(res, type = "both")
## ... but, of course, the classifier is wrong for Iris versicolor
confusion(predict(res, newdata = iris), iris$Species)
@@ -276,7 +276,7 @@
## For such a relatively simple case, 50 trees are enough
summary(res <- mlRforest(Species ~ ., data = irisTrain, ntree = 50))
predict(res) # Default type is class
-predict(res, type = "member")
+predict(res, type = "membership")
predict(res, type = "both")
predict(res, type = "vote")
## Out-of-bag prediction
@@ -310,7 +310,7 @@
set.seed(689)
summary(res <- mlNnet(Species ~ ., data = irisTrain))
predict(res) # Default type is class
-predict(res, type = "member")
+predict(res, type = "membership")
predict(res, type = "both")
confusion(res) # Self-consistency
confusion(predict(res, newdata = irisTest), irisTest$Species) # Test set perfs
@@ -338,7 +338,7 @@
## Supervised classification using naive Bayes
summary(res <- mlNaiveBayes(Species ~ ., data = irisTrain))
predict(res) # Default type is class
-predict(res, type = "member")
+predict(res, type = "membership")
predict(res, type = "both")
confusion(res) # Self-consistency
confusion(predict(res, newdata = irisTest), irisTest$Species) # Test set perfs
Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R 2012-08-04 23:42:45 UTC (rev 229)
+++ pkg/zooimage/R/ZIClass.R 2012-08-06 13:47:19 UTC (rev 230)
@@ -16,13 +16,9 @@
## along with ZooImage. If not, see <http://www.gnu.org/licenses/>.
ZIClass <- function (formula, data, mlearning = getOption("ZI.mlearning",
-mlRforest), calc.vars = getOption("ZI.calcVars", calcVars), k.xval = 10, ...,
-subset, na.action = getOption("ZI.naAction", na.omit))
-{
- ## Added by Kev... should not be necessary!
- # calcVars removes attributes of x --> extract path here before calcVars application
-# Path <- attr(data, "path")
-
+mlRforest), calc.vars = getOption("ZI.calcVars", calcVars), cv.k = 10, cv.strat,
+..., subset, na.action = getOption("ZI.naAction", na.omit))
+{
## Check calc.vars and use it on data
if (length(calc.vars))
if (!is.function(calc.vars)) {
@@ -30,10 +26,11 @@
} else data <- calc.vars(data)
## Machine learning function
+ mlearning <- match.fun(mlearning)
if (!is.function(mlearning))
- stop("'mlearning' must be a function that produce a 'mlearning' object ()or a compatible one")
+ stop("'mlearning' must be a function that produce a 'mlearning' object or a compatible one")
- ## train the machine learning algorithm
+ ## Train the machine learning algorithm
if (missing(subset) || !length(subset)) {
ZI.class <- mlearning(formula, data = data, ..., na.action = na.action)
} else {
@@ -43,50 +40,17 @@
## Add ZIClass as class of the object
class(ZI.class) <- c("ZIClass", class(ZI.class))
-
-# structure(naiveBayes(x = train, y = cl, laplace = laplace, ...),
-# data = subdata$data, vars = subdata$vars, classes = subdata$classes,
-# levels = subdata$levels, call = match.call(),
-# algorithm = "naive Bayes classifier",
-# class = c("mlNaiveBayes", "mlearning", "naiveBayes"))
-#
-# ZI.class <- structure(ZI.class,
-# class = c("ZIClass", class(ZI.class)),
-# algorithm = algorithm, calc.vars = calc.vars,
-# classes = data[[as.character(formula)[2]]]
-# )
+ attr(ZI.class, "calc.vars") <- calc.vars
## Calculate predictions with full training set
attr(ZI.class, "predict") <- predict(ZI.class, data, calc.vars = FALSE)
- ## Calculation of probabilities
-# if (algorithm == "randomForest") {
-# ## Use Formula for the probabilities v1.2-2
-# rf <- randomForest(formula = formula, data = data)
-# attr(ZI.class, "proba") <- predict(object = rf, newdata = data,
-# type = "prob")
-# }
-
## Possibly make a k-fold cross-validation and check results
- if (length(k.xval)) {
- # Modification to accept classifier from party package : ctree and cforest
- if (algorithm == "lda") {
- mypredict <- function (object, newdata)
- predict(object, newdata = newdata)$class
- } else if (algorithm %in% c("ctree", "cforest")){
- mypredict <- function(object, newdata)
- predict(object, newdata = newdata, type = "response", OOB = FALSE)
- } else {
- mypredict <- function (object, newdata)
- predict(object, newdata = newdata, type = "class")
- }
- res <- cv(attr(ZI.class, "classes"), formula, data = df,
- model = get(algorithm), predict = mypredict, k = k.xval,
- predictions = TRUE, ...)$predictions
- attr(ZI.class, "kfold.predict") <- res
- attr(ZI.class, "k") <- k.xval
- attr(ZI.class, "formula") <- formula
- attr(ZI.class, "path") <- attr(data, "path")
+ if (length(cv.k)) {
+ attr(ZI.class, "cvpredict") <- cvpredict(ZI.class, type = "both",
+ cv.k = cv.k, cv.strat = cv.strat)
+ attr(ZI.class, "k") <- cv.k
+ attr(ZI.class, "strat") <- cv.strat
}
ZI.class
}
@@ -122,7 +86,7 @@
invisible(x)
}
-summary.ZIClass <- function(object, sort.by = NULL, decreasing = FALSE,
+summary.ZIClass <- function(object, sort.by = "Fscore", decreasing = TRUE,
na.rm = FALSE, ...)
{
## Get the confusion object out of a ZIClass object and calc stats from there
@@ -176,38 +140,38 @@
res
}
-confusion.ZIClass <- function (x, ...)
-{
- ## If the object is ZIClass, calculate 'confusion'
- ## from attributes 'classes' and 'kfold.predict'
- if (!inherits(x, "ZIClass"))
- stop("'x' must be a 'ZIClass' object")
-
- x <- attr(x, "classes")
- y <- attr(x, "kfold.predict")
- labels <- c("Class", "Predict")
- clCompa <- data.frame(Class = x, Predict = y)
- ## How many common objects by level?
- NbrPerClass1 <- table(clCompa[, 1])
- ## How many predicted objects
- NbrPerClass2 <- table(clCompa[, 2])
- ## Confusion matrix
- Conf <- table(clCompa)
- ## Further stats: total, true positives, accuracy
- Total <- sum(Conf)
- TruePos <- sum(diag(Conf))
- Stats <- c(total = Total, truepos = TruePos, accuracy = TruePos / Total * 100)
-
- ## Change labels to get a more compact presentation
- colnames(Conf) <- formatC(1:ncol(Conf), digits = 1, flag = "0")
- rownames(Conf) <- paste(colnames(Conf), rownames(Conf))
-
- ## Additional data as attributes
- attr(Conf, "stats") <- Stats
- attr(Conf, "nbr.rows") <- NbrPerClass1
- attr(Conf, "nbr.cols") <- NbrPerClass2
-
- ## This is a confusion object
- class(Conf) <- c("confusion", "table")
- Conf
-}
+#confusion.ZIClass <- function (x, ...)
+#{
+# ## If the object is ZIClass, calculate 'confusion'
+# ## from attributes 'classes' and 'kfold.predict'
+# if (!inherits(x, "ZIClass"))
+# stop("'x' must be a 'ZIClass' object")
+#
+# x <- attr(x, "classes")
+# y <- attr(x, "kfold.predict")
+# labels <- c("Class", "Predict")
+# clCompa <- data.frame(Class = x, Predict = y)
+# ## How many common objects by level?
+# NbrPerClass1 <- table(clCompa[, 1])
+# ## How many predicted objects
+# NbrPerClass2 <- table(clCompa[, 2])
+# ## Confusion matrix
+# Conf <- table(clCompa)
+# ## Further stats: total, true positives, accuracy
+# Total <- sum(Conf)
+# TruePos <- sum(diag(Conf))
+# Stats <- c(total = Total, truepos = TruePos, accuracy = TruePos / Total * 100)
+#
+# ## Change labels to get a more compact presentation
+# colnames(Conf) <- formatC(1:ncol(Conf), digits = 1, flag = "0")
+# rownames(Conf) <- paste(colnames(Conf), rownames(Conf))
+#
+# ## Additional data as attributes
+# attr(Conf, "stats") <- Stats
+# attr(Conf, "nbr.rows") <- NbrPerClass1
+# attr(Conf, "nbr.cols") <- NbrPerClass2
+#
+# ## This is a confusion object
+# class(Conf) <- c("confusion", "table")
+# Conf
+#}
More information about the Zooimage-commits
mailing list