[Zooimage-commits] r232 - in pkg: mlearning mlearning/R mlearning/man zooimage zooimage/R zooimage/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Aug 10 10:25:12 CEST 2012


Author: phgrosjean
Date: 2012-08-10 10:25:11 +0200 (Fri, 10 Aug 2012)
New Revision: 232

Modified:
   pkg/mlearning/NAMESPACE
   pkg/mlearning/R/confusion.R
   pkg/mlearning/R/mlearning.R
   pkg/mlearning/man/confusion.Rd
   pkg/zooimage/NAMESPACE
   pkg/zooimage/R/ZIClass.R
   pkg/zooimage/R/ZITrain.R
   pkg/zooimage/R/gui.R
   pkg/zooimage/R/utilities.R
   pkg/zooimage/man/ZIClass.Rd
   pkg/zooimage/man/ZITrain.Rd
   pkg/zooimage/man/guiutils.Rd
   pkg/zooimage/man/utilities.Rd
Log:
Changes in mlearning (weights -> prior) + refactoring of ZIClass and associated functions

Modified: pkg/mlearning/NAMESPACE
===================================================================
--- pkg/mlearning/NAMESPACE	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/NAMESPACE	2012-08-10 08:25:11 UTC (rev 232)
@@ -25,7 +25,8 @@
 export(response)
 export(train)
 
-export("weights<-")
+export(prior)
+export("prior<-")
 
 export(confusion)
 export(confusionImage)
@@ -50,8 +51,8 @@
 S3method(response, default)
 S3method(train, default)
 
-S3method(weights, confusion)
-S3method("weights<-", confusion)
+S3method(prior, confusion)
+S3method("prior<-", confusion)
 
 S3method(summary, lvq)
 S3method(print, summary.lvq)

Modified: pkg/mlearning/R/confusion.R
===================================================================
--- pkg/mlearning/R/confusion.R	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/R/confusion.R	2012-08-10 08:25:11 UTC (rev 232)
@@ -1,7 +1,8 @@
 confusion <- function (x, ...)
 	UseMethod("confusion")
 
-.confusion <- function (classes, labels, weights, ...)
+## TODO: implement weights
+.confusion <- function (classes, labels, prior, ...)
 {
 	res <- table(classes, dnn = labels)
 	total <- sum(res)
@@ -13,28 +14,28 @@
 	attr(res, "col.freqs") <- colSums(res)
 	attr(res, "levels") <- levels(classes[1, ]) # These are *initial* levels!
 	## Final levels may differ if there are empty levels, or NAs!
-	attr(res, "weights") <- row.freqs # Initial weights are row.freqs
+	attr(res, "prior") <- row.freqs # Initial prior are row.freqs
 	attr(res, "stats") <- c(total = total, truepos = truePos,
 		error = 1 - (truePos / total))
 	
 	## This is a confusion object, inheriting from table
 	class(res) <- c("confusion", "table")
 	
-	## Do we reweight the confusion matrix?
-	if (!missing(weights)) weights(res) <- weights
+	## Do we rescale the confusion matrix?
+	if (!missing(prior)) prior(res) <- prior
 	
 	res
 }
 	
 confusion.default <- function (x, y = NULL, vars = c("Actual", "Predicted"),
-labels = vars, merge.by = "Id", weights, ...)
+labels = vars, merge.by = "Id", prior, ...)
 {	
 	## If the object is already a 'confusion' object, return it
 	if (inherits(x, "confusion")) {
 		if (!missing(y))
 			warning("you cannot provide 'y' when 'x' is a 'confusion' object")
-		## Possibly reweight it
-		if (!missing(weights)) weights(x) <- weights		
+		## Possibly rescale it
+		if (!missing(prior)) prior(x) <- prior		
 		return(x)
 	}
 	
@@ -42,7 +43,7 @@
 	conf <- attr(x, "confusion")
 	if (!is.null(conf) && missing(y)) {
 		## Possibly reweight it
-		if (!missing(weights)) weights(conf) <- weights
+		if (!missing(prior)) prior(conf) <- prior
 		return(conf)
 	}	
 
@@ -120,15 +121,15 @@
 	}
 	
 	## Construct the confusion object
-	if (missing(weights)) {
+	if (missing(prior)) {
 		.confusion(classes = clCompa, labels = labels, ...)
 	} else {
-		.confusion(classes = clCompa, labels = labels, weights = weights, ...)
+		.confusion(classes = clCompa, labels = labels, prior = prior, ...)
 	}
 }
 
 confusion.mlearning <- function (x, y = response(x),
-labels = c("Actual", "Predicted"), weights, ...) {
+labels = c("Actual", "Predicted"), prior, ...) {
 	## Check labels
 	labels <- as.character(labels)
 	if (length(labels) != 2)
@@ -150,35 +151,38 @@
 	}
 	
 	## Construct the confusion object
-	if (missing(weights)) {
+	if (missing(prior)) {
 		.confusion(data.frame(class1 = y, class2 = class2),
 			labels = labels, ...)
 	} else {
 		.confusion(data.frame(class1 = y, class2 = class2),
-			labels = labels, weights = weights, ...)
+			labels = labels, prior = prior, ...)
 	}
 }
 
-weights.confusion <- function (object, ...)
-	attr(object, "weights")
+prior <- function (object, ...)
+	UseMethod("prior")
 
-`weights<-`<- function (object, ..., value)
-	UseMethod("weights<-")
+prior.confusion <- function (object, ...)
+	attr(object, "prior")
 
-`weights<-.confusion`<- function (object, ..., value)
+`prior<-`<- function (object, ..., value)
+	UseMethod("prior<-")
+
+`prior<-.confusion`<- function (object, ..., value)
 {
+	rsums <- rowSums(object)
 	if (!length(value)) { # value is NULL or of zero length
-		## Reset weights to original frequencies
+		## Reset prior to original frequencies
 		value <- attr(object, "row.freqs")
-		attr(object, "weights") <- value
-		round(object / apply(object, 1, sum) * value)
+		res <- round(object / rsums * value)
 	
 	} else if (is.numeric(value)) { # value is numeric
 		
 		if (length(value) == 1) { # value is a single number
 			if (is.na(value) || !is.finite(value) || value <= 0)
 				stop("value must be a finite positive number")
-			res <- object / apply(object, 1, sum) * as.numeric(value)
+			res <- object / rsums * as.numeric(value)
 		
 		} else { # value is a vector of numerics
 			## It must be either of the same length as nrow(object) or of
@@ -207,12 +211,15 @@
 
 			} else stop("length of 'value' do not match the number of levels in the confusion matrix")	
 			
-			res <- object / apply(object, 1, sum) * as.numeric(value)
+			res <- object / rsums * as.numeric(value)
 		}
-		attr(res, "weights") <- rowSums(res)
-		res
 		
 	} else stop("value must be a numeric vector, a single number or NULL")
+	
+	attr(res, "prior") <- value
+	## Take care to rows with no items! => put back zeros!
+	res[rsums == 0] <- 0
+	res
 }
 
 print.confusion <- function (x, sums = TRUE, error.col = sums, digits = 0,
@@ -225,10 +232,10 @@
 		" true positives (error rate = ", Error, "%)\n",
 		sep = "")
 	row.freqs <- attr(x, "row.freqs")
-	if (!all(attr(x, "weights") == row.freqs)) {
-		cat("with initial row weights (frequencies):\n")
+	if (!all(attr(x, "prior") == row.freqs)) {
+		cat("with initial row frequencies:\n")
 		print(row.freqs)
-		cat("Reweighted to:\n")
+		cat("Rescaled to:\n")
 	}
 	
 	## Print the confusion matrix itself

Modified: pkg/mlearning/R/mlearning.R
===================================================================
--- pkg/mlearning/R/mlearning.R	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/R/mlearning.R	2012-08-10 08:25:11 UTC (rev 232)
@@ -413,8 +413,8 @@
 			classes <- classes[notDup]
 			classes <- classes[pos]
 			
-			## Check that both classes are the same!
-			if (any(classes != est$predictions))
+			## Check that both classes levels are the same!
+			if (any(levels(classes) != levels(est$predictions)))
 				warning("cross-validated classes do not match")
 
 			res <- list(class = classes, membership = membership)

Modified: pkg/mlearning/man/confusion.Rd
===================================================================
--- pkg/mlearning/man/confusion.Rd	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/mlearning/man/confusion.Rd	2012-08-10 08:25:11 UTC (rev 232)
@@ -10,9 +10,10 @@
 \alias{confusionBarplot}
 \alias{confusionStars}
 \alias{confusionDendrogram}
-\alias{weights.confusion}
-\alias{weights<-}
-\alias{weights<-.confusion}
+\alias{prior}
+\alias{prior.confusion}
+\alias{prior<-}
+\alias{prior<-.confusion}
 
 \title{ Construct and analyze confusion matrices }
 \description{
@@ -25,9 +26,9 @@
 \usage{
 confusion(x, \dots)
 \method{confusion}{default}(x, y = NULL, vars = c("Actual", "Predicted"),
-    labels = vars, merge.by = "Id", weights, \dots)
+    labels = vars, merge.by = "Id", prior, \dots)
 \method{confusion}{mlearning}(x, y = response(x),
-    labels = c("Actual", "Predicted"), weights, \dots)
+    labels = c("Actual", "Predicted"), prior, \dots)
 
 \method{print}{confusion}(x, sums = TRUE, error.col = sums, digits = 0,
     sort = "ward", \dots)
@@ -49,9 +50,10 @@
 confusionDendrogram(x, y = NULL, labels = rownames(x), sort = "ward",
     main = "Groups clustering", \dots)
 
-\method{weights}{confusion}(object, \dots)
-weights(object, \dots) <- value
-\method{weights}{confusion}(object, \dots) <- value
+prior(object, \dots)
+\method{prior}{confusion}(object, \dots)
+prior(object, \dots) <- value
+\method{prior}{confusion}(object, \dots) <- value
 }
 
 \arguments{
@@ -66,7 +68,7 @@
     the same as \code{vars} or the one in the confusion matrix. }
   \item{merge.by}{ a character string with the name of variables to use to merge
     the two data frames, or \code{NULL}. }
-  \item{weights}{ weights (class frequencies) to use for first classifier that
+  \item{prior}{ class frequencies to use for first classifier that
     is tabulated in the rows of the confusion matrix. For its value, see here
     under, the \code{value} argument. }
   \item{sums}{ is the confusion matrix printed with rows and columns sums? }
@@ -111,19 +113,17 @@
   \item{names}{ names of the two classifiers to compare. }
   \item{main}{ main title of the graph. }
   \item{min.width}{ minimum bar width required to add numbers. }
-  \item{value}{ a single positive  numeric to set all class frequencies to this
+  \item{value}{ a single positive numeric to set all class frequencies to this
     value (use 1 for relative frequencies and 100 for relative freqs in percent),
     or a vector of positive numbers of the same length as the levels in the
     object. If the vector is named, names must match levels. Alternatively,
-    providing \code{NULL} or an object of null length resets weights into their
-    initial values (here, weights are class frequencies according to the first
-    classifier, and depending on the context, they correspond to actual values,
-    priors, optimal posteriors, etc.). }
+    providing \code{NULL} or an object of null length resets row class
+    frequencies into their initial values. }
 }
 
 \value{
-  A confusion matrix in a 'confusion' object. \code{weights()} returns the
-  current class weights associated with first classification tabulated, i.e.,
+  A confusion matrix in a 'confusion' object. \code{prior()} returns the
+  current class frequencies associated with first classification tabulated, i.e.,
   for rows in the confusion matrix.
 }
 
@@ -168,31 +168,31 @@
 ## the real proportions (so-called, priors), one should first reweight the
 ## confusion matrix before calculating statistics, for instance:
 prior1 <- c(10, 10, 10, 100, 100, 100) # Glass types 1-3 are rare
-weights(glassConf) <- prior1
+prior(glassConf) <- prior1
 glassConf
 summary(glassConf, type = c("Fscore", "Recall", "Precision"))
 plot(glassConf)
 
 ## This is very different than if glass types 1-3 are abundants!
 prior2 <- c(100, 100, 100, 10, 10, 10) # Glass types 1-3 are abundants
-weights(glassConf) <- prior2
+prior(glassConf) <- prior2
 glassConf
 summary(glassConf, type = c("Fscore", "Recall", "Precision"))
 plot(glassConf)
 
 ## Weight can also be used to construct a matrix of relative frequencies
 ## In this case, all rows sum to one
-weights(glassConf) <- 1
+prior(glassConf) <- 1
 print(glassConf, digits = 2)
 ## However, it is easier to work with relative frequencies in percent
 ## and one gets a more compact presentation
-weights(glassConf) <- 100
+prior(glassConf) <- 100
 glassConf
 
-## To reset weights to original propotions, just assign NULL
-weights(glassConf) <- NULL
+## To reset row class frequencies to original propotions, just assign NULL
+prior(glassConf) <- NULL
 glassConf
-weights(glassConf)
+prior(glassConf)
 }
 
 \keyword{ tree }

Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/NAMESPACE	2012-08-10 08:25:11 UTC (rev 232)
@@ -89,15 +89,13 @@
 export(prepareTrain)
 export(addToTrain)
 export(getTrain)
-export(recode.ZITrain)
-export(ZIRecodeLevels)
+export(recode)
 
 # Utilities
 export(calcVars)
 export(calibrate)
 export(ecd)
 export(getDec)
-export(keepVars)
 export(listSamples)
 export(makeId)
 export(parseIni)
@@ -168,6 +166,8 @@
 export(modalAssistant)
 
 # S3 methods
+S3method(recode, ZITrain)
+
 S3method(print, ZIClass)
 S3method(predict, ZIClass)
 S3method(summary, ZIClass)

Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/R/ZIClass.R	2012-08-10 08:25:11 UTC (rev 232)
@@ -15,35 +15,30 @@
 ## You should have received a copy of the GNU General Public License
 ## along with ZooImage.  If not, see <http://www.gnu.org/licenses/>.
 
-ZIClass <- function (formula, data, mlearning = getOption("ZI.mlearning",
-mlRforest), calc.vars = getOption("ZI.calcVars", calcVars), cv.k = 10, cv.strat,
-..., subset, na.action = getOption("ZI.naAction", na.omit))
+## Create basically a mlearning object, but with predicted and cvpredicted added
+## to it, and the '+other+' level added at the end of all levels
+ZIClass <- function (formula, data, method = getOption("ZI.mlearning",
+"mlRforest"), calc.vars = getOption("ZI.calcVars", calcVars), cv.k = 10,
+cv.strat = TRUE, ..., subset, na.action = na.omit)
 {	
 	## Check calc.vars and use it on data
 	if (length(calc.vars))
 		if (!is.function(calc.vars)) {
 			stop("'calc.vars' must be a function or NULL")
 		} else data <- calc.vars(data)
-
-	## Machine learning function
-	mlearning <- match.fun(mlearning)
-	if (!is.function(mlearning))
-		stop("'mlearning' must be a function that produce a 'mlearning' object or a compatible one")
 	
 	## Train the machine learning algorithm
-	if (missing(subset) || !length(subset)) {
-		ZI.class <- mlearning(formula, data = data, ..., na.action = na.action)
-	} else {
-		ZI.class <- mlearning(formula, data = data, ..., subset,
-			na.action = na.action)
-	}
-	
+	ZI.class <- mlearning(formula, data = data, method = method,
+		model.args = list(formula  = formula, data = substitute(data),
+		subset = substitute(subset)), call = match.call(), ...,
+		subset = subset, na.action = substitute(na.action))
+		
 	## Add ZIClass as class of the object
 	class(ZI.class) <- c("ZIClass", class(ZI.class))
 	attr(ZI.class, "calc.vars") <- calc.vars
 
 	## Calculate predictions with full training set
-    attr(ZI.class, "predict") <- predict(ZI.class, data, calc.vars = FALSE)
+    attr(ZI.class, "predict") <- predict(ZI.class, data, calc = FALSE)
 
 	## Possibly make a k-fold cross-validation and check results
 	if (length(cv.k)) {
@@ -52,33 +47,43 @@
 		attr(ZI.class, "k") <- cv.k
 		attr(ZI.class, "strat") <- cv.strat
 	}
+	
+	## Make sure the '+other+' group exists
+	lev <- levels(ZI.class)
+	if (!"+other+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+other+")
+	
 	ZI.class
 }
 
 print.ZIClass <- function (x, ...)
 {
 	algorithm <- attr(x, "algorithm")
-	classes <- attr(x, "classes")
+	classes <- attr(x, "response")
 	lclasses <- levels(classes)
     predicted <- attr(x, "predict")
+	if (is.list(predicted)) predicted <- predicted$class
 	k <- attr(x, "k")
+	strat <- attr(x, "strat")
 	cat("A 'ZIClass' object predicting for", length(lclasses), "classes:\n")
 	print(lclasses)
-	Confu <- confusion(classes, predicted)
-	mism <- 100 * (1 - (sum(diag(Confu)) / sum(Confu)))
+	Confu <- table(classes, predicted)
+	SelfConsist <- 100 * (sum(diag(Confu)) / sum(Confu))
 
 	## Change the number of digits to display
 	oldDigits <- options(digits = 4)
 	on.exit(options(oldDigits))
 	cat("\nAlgorithm used:", algorithm, "\n")
-	cat("Mismatch in classification: ", mism, "%\n", sep = "")
+	cat("Self-consistency: ", SelfConsist, "%\n", sep = "")
 	if (!is.null(k)) {
-    	cat("k-fold cross validation error estimation (k = ", k, "):\n")
-		kfold.predict <- attr(x, "kfold.predict")
+		if (isTRUE(strat)) msg <- ", stratified" else msg <- ""
+    	cat("K-fold cross validation error estimation (k = ", k, msg, "):\n",
+			sep = "")
+		cvpredicted <- attr(x, "cvpredict")
+		if (is.list(cvpredicted)) cvpredicted <- cvpredicted$class
 		prior <- table(classes)
-		ok <- diag(table(classes, kfold.predict))
+		ok <- diag(table(classes, cvpredicted))
 		err <- 100 * (1 - (sum(ok) / sum(prior)))
-		cat(err, "%\n", sep = "")
+		cat("Error rate: ", err, "%\n", sep = "")
 		cat("\nError per class:\n")
 		`Error (%)` <- sort(1 - (ok / prior)) * 100
 		print(as.data.frame(`Error (%)`))
@@ -90,12 +95,12 @@
 na.rm = FALSE, ...)
 {
 	## Get the confusion object out of a ZIClass object and calc stats from there
-	summary(confusion(object), sort.by = sort.by, decreasing = decreasing,
-		na.rm = na.rm)
+	summary(confusion(object, response(object)), sort.by = sort.by, decreasing = decreasing,
+		na.rm = na.rm, ...)
 }
 
-predict.ZIClass <- function (object, ZIDat, calc.vars = TRUE,
-class.only = FALSE, type = "class", na.rm = FALSE, ...)
+predict.ZIClass <- function (object, ZIDat, calc = TRUE, class.only = TRUE,
+type = "class", ...)
 {
 	## Make sure we have correct objects
 	if (!inherits(object, "ZIClass"))
@@ -106,72 +111,64 @@
     class(object) <- class(object)[-1]
 	data <- as.data.frame(ZIDat)
 	
-	if (isTRUE(as.logical(calc.vars)))
+	if (isTRUE(as.logical(calc)))
 		data <- attr(object, "calc.vars")(data)
-	if (isTRUE(as.logical(na.rm))) na.omit(data)
 	
-	algorithm <- attr(object, "algorithm")
-	if (type != "prob") {
-	   # modification to accept algoritms from party package
-	   if (algorithm %in% c("ctree", "cforest")) {
-            Ident <- predict(object, newdata = data, type = "response",
-				OOB = FALSE)
-		} else {
-            Ident <- predict(object, newdata = data, type = type)
-		}
-	} else {
-		if (inherits(object, "randomForest")) {
-			Ident <- predict(object, newdata = data, type = type)
-		} else if (inherits(object, "lda")) {
-			Ident <- predict(object, newdata = data)$posterior
-		} else stop("Cannot calculate yet for other algorithms than Random Forest or LDA")
+	class.only <- isTRUE(as.logical(class.only))
+	type <- as.character(type)[1]
+	if (class.only && type != "class") {
+		warning("with class.only == TRUE, tyep can only be 'class' and is force to it")
+		type <- "class"
 	}
-
-	## Special case for prediction from an LDA (list with $class item)
-	if (inherits(Ident, "list") && "class" %in% names(Ident))
-		Ident <- Ident$class
-	if (!isTRUE(as.logical(class.only))) {
-		res <- cbind(ZIDat, Ident)
-		class(res) <- class(ZIDat)
-	} else res <- Ident
 	
-	## New metadata attribute
-	attr(res, "metadata") <- attr(ZIDat, "metadata")
-	res
+	## Perform the prediction
+	res <- predict(object, newdata = data, ...)
+	
+	## Return either the prediction, or the ZIDat object with Ident column append/replaced
+	if (class.only) res else {
+		ZIDat$Ident <- res
+		ZIDat
+	}
 }
 
-#confusion.ZIClass <- function (x, ...)
-#{
-#	## If the object is ZIClass, calculate 'confusion'
-#	## from attributes 'classes' and 'kfold.predict' 
-#	if (!inherits(x, "ZIClass"))
-#		stop("'x' must be a 'ZIClass' object")
-#	
-#	x <- attr(x, "classes")
-#	y <- attr(x, "kfold.predict")
-#	labels <- c("Class", "Predict")
-#	clCompa <- data.frame(Class = x, Predict = y)
-#	## How many common objects by level?
-#	NbrPerClass1 <- table(clCompa[, 1])
-#	## How many predicted objects
-#	NbrPerClass2 <- table(clCompa[, 2])
-#	## Confusion matrix
-#	Conf <- table(clCompa)
-#	## Further stats: total, true positives, accuracy
-#	Total <- sum(Conf)
-#	TruePos <- sum(diag(Conf))
-#	Stats <- c(total = Total, truepos = TruePos, accuracy = TruePos / Total * 100)
-#
-#	## Change labels to get a more compact presentation
-#	colnames(Conf) <- formatC(1:ncol(Conf), digits = 1, flag = "0")
-#	rownames(Conf) <- paste(colnames(Conf), rownames(Conf))
-#
-#	## Additional data as attributes
-#	attr(Conf, "stats") <- Stats
-#	attr(Conf, "nbr.rows") <- NbrPerClass1
-#	attr(Conf, "nbr.cols") <- NbrPerClass2
-#	
-#	## This is a confusion object
-#	class(Conf) <- c("confusion", "table")
-#	Conf
-#}
+confusion.ZIClass <- function (x, y = response(x),
+labels = c("Actual", "Predicted"), prior, use.cv = TRUE, ...) {
+	## Check labels
+	labels <- as.character(labels)
+	if (length(labels) != 2)
+		stop("You must provide exactly 2 character strings for 'labels'")
+	
+	## Extract class2: cvpredict or predict from the object
+	if (isTRUE(as.logical(use.cv))) {
+		class2 <- attr(x, "cvpredict")
+		if (is.list(class2)) class2 <- class2$class
+		if (is.null(class2))
+			stop("No or wrong cross-validated predictions in this ZIClass object")
+	} else { # Use predict
+		class2 <- attr(x, "predict")
+		if (is.list(class2)) class2 <- class2$class
+	}
+	
+	## Check that both variables are of same length and same levels
+	if (length(y) != length(class2))
+		stop("lengths of 'x' and 'y' are not the same")
+	
+	## Full list of levels is in (cv)predict in class2...
+	## Response in y may have dropped levels! 
+	lev1 <- levels(y)
+	lev2 <- levels(class2)
+	if (!all(lev1  %in% lev2))
+		stop("levels of 'x' and 'y' do not match")
+	
+	## Rework levels in y to make sure they match perfectly thos in class2
+	y <- factor(as.character(y), levels = lev2)
+	
+	## Construct the confusion object
+	if (missing(prior)) {
+		mlearning:::.confusion(data.frame(class1 = y, class2 = class2),
+			labels = labels, ...)
+	} else {
+		mlearning:::.confusion(data.frame(class1 = y, class2 = class2),
+			labels = labels, prior = prior, ...)
+	}
+}

Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R	2012-08-06 14:04:45 UTC (rev 231)
+++ pkg/zooimage/R/ZITrain.R	2012-08-10 08:25:11 UTC (rev 232)
@@ -19,62 +19,73 @@
 ## from a given number of zidfiles to the '_' subdir, and making
 ## a template for subdirs
 ## TODO: eliminate zidfiles and detect if it is zidfiles or zidbfiles like in addToTrain()
-prepareTrain <- function (rootdir, subdir = "_train", zidfiles, zidbfiles = NULL,
-groups.template = c("[Basic]", "[Detailed]", "[Very detailed]"),
-ident = NULL, start.viewer = FALSE)
+prepareTrain <- function (traindir, zidbfiles,
+template = c("[Basic]", "[Detailed]", "[Very detailed]"), ident = NULL)
 {
-	## First, check that rootdir is valid
-	if (!checkDirExists(rootdir)) return(invisible(FALSE))
+	## First, check that dirname of traindir is valid
+	if (!checkDirExists(dirname(traindir))) return(invisible(FALSE))
 
-	## New dir is rootdir + subdir
-	dir <- file.path(rootdir, as.character(subdir)[1])
-	if (!checkEmptyDir(dir,
-		message = 'dir "%s" must be empty. Clean it first!'))
+	if (!checkEmptyDir(traindir,
+		message = 'dir "%s" is not empty. Use AddToTrain() instead!'))
 		return(invisible(FALSE))
 
 	## Then, check that all zidfiles or zidbfiles exist
-	if (is.null(zidbfiles)) {
-        if (!checkFileExists(zidfiles, "zid")) return(invisible(FALSE))
-        zmax <- length(zidfiles)
-    } else {
-        if (!checkFileExists(zidbfiles, "zidb")) return(invisible(FALSE))
-        zmax <- length(zidbfiles)
-    }
+	if (hasExtension(zidbfiles[1], "zidb")) dbext <- "zidb" else dbext <- "zid"
+    if (!checkFileExists(zidbfiles, dbext)) return(invisible(FALSE))
+    zmax <- length(zidbfiles)
 
-	## Finally, look for the groups.template
-	groups.template <- as.character(groups.template)[1]
-	rx <- "^[[](.+)[]]$"
-	if (grepl(rx, groups.template)) {
-		## This should be a template file in the default directory
-		groups.template <- paste(sub(rx, "\\1", groups.template), ".zic",
-			sep = "")
-		groups.template <- file.path(getTemp("ZIetc"), groups.template)
-		if (!file.exists(groups.template)) {
-			warning("The file '", groups.template, "' is not found")
-			return(invisible(FALSE))
+	## Also look for the template
+	## If the object has a path template, use it...
+	path <- attr(template, "path")
+	if (!length(path)) { # Look for a .zic file with classes
+		template <- as.character(template)[1]
+		rx <- "^[[](.+)[]]$"
+		if (grepl(rx, template)) {
+			## This should be a template file in the default directory
+			template <- paste(sub(rx, "\\1", template), ".zic",
+				sep = "")
+			template <- file.path(getTemp("ZIetc"), template)
+			if (!file.exists(template)) {
+				warning("The file '", template, "' is not found")
+				return(invisible(FALSE))
+			}
 		}
+		## Check that this is a .zic file
+		if (!zicCheck(template)) return(invisible(FALSE))
+	
+		## Create the other directories
+		path <- scan(template, character(), sep = "\n", skip = 2,
+			quiet = TRUE)
+		if (!length(path)) {
+			warning(sprintf("'%s' is empty or corrupted!", template))
+			return(invisible(FALSE))	
+		}
 	}
-	## Check that this is a .zic file
-	if (!zicCheck(groups.template)) return(invisible(FALSE))
 
-	## Do the job...
-	message("Extracting data and vignettes ...")
-
 	## Create '_' subdir and unzip all vignettes there
-	dir_ <- file.path(dir, "_")
+	dir_ <- file.path(traindir, "_")
 	if (!forceDirCreate(dir_)) return(invisible(FALSE))
 
+	## Create subdirectories representing classes hierarchy
+	message("Making directories...")
+	path <- file.path(traindir, path)
+	for (i in 1:length(path)) {
+		#message(path[i])
+		dir.create(path[i], recursive = TRUE)
+	}
+	
+	## Place the vignettes...
+	message("Extracting data and vignettes ...")
 	for (i in 1:zmax) {
 		progress(i, zmax)
-        if (is.null(zidbfiles)) {
-    		message("data", zidfiles[i])
+        if (dbext != "zidb") {
             ## Using a temporary directory to unzip all files and then copy
     		## the RData files to the train directory
     		td <- tempfile()
-    		unzip(zipfile = zidfiles[i], exdir = td)
+    		unzip(zipfile = zidbfiles[i], exdir = td)
     		datafiles <- file.path(td, list.files(td,
     			pattern = extensionPattern(".RData"), recursive = TRUE))
-    		if (length(datafiles)) file.copy(datafiles, dir)
+    		if (length(datafiles)) file.copy(datafiles, traindir)
     		vignettes <- file.path(td, list.files(td,
     			pattern = extensionPattern(".jpg"), recursive = TRUE))
     		if (length(vignettes)) file.copy(vignettes, dir_)
@@ -85,43 +96,31 @@
             AllItems <- ls(Zidb)
             Vigns <- AllItems[-grep("_dat1", AllItems)]
             ## Copy all vignettes in the "_" directory
-            ext <- Zidb[[".ImageType"]]
+            imgext <- Zidb[[".ImageType"]]
 			for (j in 1:length(Vigns)){
                 From <- Vigns[j]
-                To <- file.path(dir_, paste(From, ext, sep = "."))
+                To <- file.path(dir_, paste(From, imgext, sep = "."))
                 writeBin(Zidb[[From]], To)
             }
             ## Save vignettes
             ZI.sample <- Zidb$.Data
-            save(ZI.sample, file = file.path(dir, paste(sub(".zidb", "",
+            save(ZI.sample, file = file.path(traindir, paste(sub(".zidb", "",
 				basename(zidbfiles[i])), "_dat1.RData", sep = "")))
 		}
 	}
-	progress(101) # Clear progression indicator
-
-	## Create the other directories
-    Lines <- scan(groups.template, character(), sep = "\n", skip = 2,
-		quiet = TRUE)
-	if (!length(Lines)) {
- 		warning(sprintf("'%s' is empty or corrupted!", groups.template))
-		return(invisible(FALSE))	
+	progress(zmax + 1) # Clear progression indicator
+	
+	### TODO: relocate vignettes in subdirectories, if ident is not NULL
+	if (length(ident)) {
+		
 	}
-	Lines <- file.path(dir, Lines)
-	message("Making directories...")
-	for (i in 1:length(Lines)) {
-		message(Lines[i])
-		dir.create(Lines[i], recursive = TRUE)
-	}
-	### TODO: relocate vignettes in subdirectories, if ident is not NULL
 
-	## Finish and possibly start the image viewer
 	message(" -- Done! --")
-	if (isTRUE(as.logical(start.viewer))) imageViewer(dir_)
 	invisible(TRUE)
 }
 
 ## Function to add new vignettes in a training set
-addToTrain <- function (traindir, zidbfiles)
+addToTrain <- function (traindir, zidbfiles, ident = NULL)
 {
 	## Check if selected zid(b) files are already classified in the training set
 	Rdata <- list.files(traindir, pattern = "[.]RData$")
@@ -158,20 +157,27 @@
 		## treatment depends if it is a .zid or .zidb file
 		zidbfile <- zidbfiles[i]
 		if (grepl("[.]zidb$", zidbfile)) { # .zidb file
-			## TODO: extract data from .zidb files...
+			## Link .zidb database to R objects in memory
+            Zidb <- zidbLink(zidbfile)
+            AllItems <- ls(Zidb)
+            Vigns <- AllItems[-grep("_dat1", AllItems)]
+            ## Copy all vignettes in the TopPath directory
+            imgext <- Zidb[[".ImageType"]]
+			for (j in 1:length(Vigns)){
+                From <- Vigns[j]
+                To <- file.path(ToPath, paste(From, imgext, sep = "."))
+                writeBin(Zidb[[From]], To)
+            }
+            ## Save RData file
+            ZI.sample <- Zidb$.Data
+            save(ZI.sample, file = file.path(traindir, paste(sub(".zidb", "",
+				basename(zidbfile)), "_dat1.RData", sep = "")))
 
-
-
-
-
-
-
-
 		} else { # .zid file
 			## Using a temporary directory to unzip all files and then copy
 			## the RData files to the train directory
 			td <- tempfile()
-			unzip(zipfile = zidbfiles[i], exdir = td)
+			unzip(zipfile = zidbfile, exdir = td)
 			datafiles <- file.path(td, list.files(td,
 				pattern = extensionPattern(".RData"), recursive = TRUE))
 			if (length(datafiles))
@@ -186,14 +192,14 @@
 			unlink(td, recursive = TRUE)	
 		}
 	}
-	progress(101) # Clear progression indicator
+	progress(zmax + 1) # Clear progression indicator
 	message("-- Done --\n")
 	invisible(TRUE)
 }
 
 ## Retrieve information from a manual training set in a 'ZITrain' object	
 getTrain <- function (traindir, creator = NULL, desc = NULL, keep_ = FALSE,
-na.rm = FALSE, numvars = NULL)
+na.rm = FALSE)
 {
 	## 'traindir' must be the base directory of the manual classification
 	if (!checkDirExists(traindir)) return(invisible(FALSE))
@@ -239,11 +245,7 @@
 	nitems <- nrow(df)
 
 	## Read in all the .RData files from the root directory and merge them
-    ### TODO: also collect metadata and merge them => make a merge function for
-	## ZIDat!!!
 	## Get measurement infos
-    #### TODO: Kevin, you cannot use this! You must refer to ZI.sample directly
-	## in the arguments of the function!
 	ZI.sample <- NULL
 	load(Dats[1])
 	Dat <- ZI.sample
@@ -263,12 +265,6 @@
 	}
 	rownames(Dat) <- 1:nrow(Dat)
 
-	## Create the Id column
-# Done in the loop!
-#	Dat <- cbind(Id = makeId(Dat), Dat)
-
-	## Merge Dat & df by "Id"
-#	df <- merge(Dat, df, by = "Id")
 	## Rename Dat in df
 	df <- Dat
 	## Problem if there is no remaining row in the data frame
@@ -283,7 +279,6 @@
 			" vignettes without measurement data are eliminated (",
 			nrow(df), " items remain in the object)")
 
-	## Delete lines which contain NA values v1.2-2
 	if (any(is.na(df)))
 		if (isTRUE(as.logical(na.rm))) {
   	  		message("NAs found in the table of measurements and deleted")
@@ -299,84 +294,97 @@
 	class(df) <- Classes
 	
 	## Be sure that variables are numeric (sometimes, wrong importation)
-	as.numeric.Vars <- function (ZIDat, numvars) {
-	    if (is.null(numvars)) # Default values
-	        numvars <- c("ECD",
-	            "FIT_Area_ABD", "FIT_Diameter_ABD", "FIT_Volume_ABD",
-				"FIT_Diameter_ESD", "FIT_Volume_ESD", "FIT_Length", "FIT_Width",
-				"FIT_Aspect_Ratio", "FIT_Transparency", "FIT_Intensity",
-				"FIT_Sigma_Intensity", "FIT_Sum_Intensity", "FIT_Compactness",
-				"FIT_Elongation", "FIT_Perimeter", "FIT_Convex_Perimeter",
-				"FIT_Roughness", "FIT_Feret_Max_Angle", "FIT_PPC", "FIT_Ch1_Peak",
-				"FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF", "FIT_Ch3_Peak",
-				"FIT_Ch3_TOF", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue",
-				"FIT_Red_Green_Ratio", "FIT_Blue_Green", "FIT_Red_Blue_Ratio",
-				"FIT_CaptureX", "FIT_CaptureY", "FIT_SaveX", "FIT_SaveY",
-				"FIT_PixelW", "FIT_PixelH", "FIT_Cal_Const",
-	            "Area", "Mean", "StdDev", "Mode", "Min", "Max", "X", "Y", "XM",
-	            "YM", "Perim.", "BX", "BY", "Width", "Height", "Major", "Minor",
-				"Angle", "Circ.", "Feret", "IntDen", "Median", "Skew", "Kurt",
-				"XStart", "YStart", "Dil")
+#	as.numeric.Vars <- function (ZIDat, numvars) {
+#	    if (is.null(numvars)) # Default values
+#	        numvars <- c("ECD",
+#	            "FIT_Area_ABD", "FIT_Diameter_ABD", "FIT_Volume_ABD",
+#				"FIT_Diameter_ESD", "FIT_Volume_ESD", "FIT_Length", "FIT_Width",
+#				"FIT_Aspect_Ratio", "FIT_Transparency", "FIT_Intensity",
+#				"FIT_Sigma_Intensity", "FIT_Sum_Intensity", "FIT_Compactness",
+#				"FIT_Elongation", "FIT_Perimeter", "FIT_Convex_Perimeter",
+#				"FIT_Roughness", "FIT_Feret_Max_Angle", "FIT_PPC", "FIT_Ch1_Peak",
+#				"FIT_Ch1_TOF", "FIT_Ch2_Peak", "FIT_Ch2_TOF", "FIT_Ch3_Peak",
+#				"FIT_Ch3_TOF", "FIT_Avg_Red", "FIT_Avg_Green", "FIT_Avg_Blue",
+#				"FIT_Red_Green_Ratio", "FIT_Blue_Green", "FIT_Red_Blue_Ratio",
+#				"FIT_CaptureX", "FIT_CaptureY", "FIT_SaveX", "FIT_SaveY",
+#				"FIT_PixelW", "FIT_PixelH", "FIT_Cal_Const",
+#	            "Area", "Mean", "StdDev", "Mode", "Min", "Max", "X", "Y", "XM",
+#	            "YM", "Perim.", "BX", "BY", "Width", "Height", "Major", "Minor",
+#				"Angle", "Circ.", "Feret", "IntDen", "Median", "Skew", "Kurt",
+#				"XStart", "YStart", "Dil")
+#
+#	    ## Make sure numvars are numeric
+#		Names <- names(ZIDat)
+#	    for (numvar in numvars) {
+#	        if (numvar %in% Names && !is.numeric(ZIDat[, numvar]))
+#	            ZIDat[, numvar] <- as.numeric(ZIDat[, numvar])
+#	    }
+#	    ZIDat
+#	}
+#	as.numeric.Vars(df, numvars = numvars)
 
-	    ## Make sure numvars are numeric
-		Names <- names(ZIDat)
-	    for (numvar in numvars) {
-	        if (numvar %in% Names && !is.numeric(ZIDat[, numvar]))
-	            ZIDat[, numvar] <- as.numeric(ZIDat[, numvar])
-	    }
-	    ZIDat
-	}
-	as.numeric.Vars(df, numvars = numvars)
+	df
 }
 
-recode.ZITrain <- function (ZITrain, ZIRecode, warn.only = FALSE)
-{	
-	if (!inherits(ZITrain, "ZITrain"))
+.recodeLevels <- function (object, depth = 1)
+{
+	if (!inherits(object, "ZITrain"))
 		stop("'ZITrain' must be a 'ZITrain' object")
-	if (!inherits(ZIRecode, "ZIRecode"))
-		stop("'ZIRecode' must be a 'ZIRecode' object")
 	
-	## Check that all levels in ZITrain$Class are represented in ZIRecode
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/zooimage -r 232


More information about the Zooimage-commits mailing list