[Zooimage-commits] r233 - in pkg/zooimage: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Aug 10 16:09:22 CEST 2012


Author: phgrosjean
Date: 2012-08-10 16:09:22 +0200 (Fri, 10 Aug 2012)
New Revision: 233

Modified:
   pkg/zooimage/NAMESPACE
   pkg/zooimage/R/ZIClass.R
   pkg/zooimage/R/ZIRes.R
   pkg/zooimage/R/ZITrain.R
   pkg/zooimage/man/ZITrain.Rd
Log:
Correction of bugs in ZIClass and ZITrain + refactoring of ZIRes initiated

Modified: pkg/zooimage/NAMESPACE
===================================================================
--- pkg/zooimage/NAMESPACE	2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/NAMESPACE	2012-08-10 14:09:22 UTC (rev 233)
@@ -85,11 +85,15 @@
 # ZIClass
 export(ZIClass)
 
-# ZITrain
+# ZITrain/ZITest
 export(prepareTrain)
 export(addToTrain)
 export(getTrain)
+export(prepareTest)
+export(addToTest)
+export(getTest)
 export(recode)
+export(template)
 
 # Utilities
 export(calcVars)
@@ -166,7 +170,9 @@
 export(modalAssistant)
 
 # S3 methods
+S3method(template, default)
 S3method(recode, ZITrain)
+S3method(recode, ZITest)
 
 S3method(print, ZIClass)
 S3method(predict, ZIClass)

Modified: pkg/zooimage/R/ZIClass.R
===================================================================
--- pkg/zooimage/R/ZIClass.R	2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZIClass.R	2012-08-10 14:09:22 UTC (rev 233)
@@ -37,20 +37,24 @@
 	class(ZI.class) <- c("ZIClass", class(ZI.class))
 	attr(ZI.class, "calc.vars") <- calc.vars
 
+	## Get useful attrobutes from ZITrain
+	attr(ZI.class, "traindir") <- attr(data, "traindir")
+	attr(ZI.class, "path") <- attr(data, "path")
+
 	## Calculate predictions with full training set
     attr(ZI.class, "predict") <- predict(ZI.class, data, calc = FALSE)
 
 	## Possibly make a k-fold cross-validation and check results
 	if (length(cv.k)) {
-		attr(ZI.class, "cvpredict") <- cvpredict(ZI.class, type = "both",
+		attr(ZI.class, "cvpredict") <- cvpredict(ZI.class, type = "class",
 			cv.k = cv.k, cv.strat = cv.strat)
 		attr(ZI.class, "k") <- cv.k
 		attr(ZI.class, "strat") <- cv.strat
 	}
 	
-	## Make sure the '+other+' group exists
+	## Make sure the '+others+' group exists
 	lev <- levels(ZI.class)
-	if (!"+other+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+other+")
+	if (!"+others+" %in% lev) attr(ZI.class, "levels") <- c(lev, "+others+")
 	
 	ZI.class
 }

Modified: pkg/zooimage/R/ZIRes.R
===================================================================
--- pkg/zooimage/R/ZIRes.R	2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZIRes.R	2012-08-10 14:09:22 UTC (rev 233)
@@ -15,89 +15,99 @@
 ## You should have received a copy of the GNU General Public License
 ## along with ZooImage.  If not, see <http://www.gnu.org/licenses/>.
 
-processSample <- function (ZidFile, ZidbFile = NULL, ZIClass, ZIMan, ZIDesc,
-abd.taxa = NULL, abd.groups = NULL, abd.type = "absolute", bio.taxa = NULL,
-bio.groups = NULL, bio.conv = c(1, 0, 1), headers = c("Abd", "Bio"),
-spec.taxa = NULL, spec.groups = NULL, spec.breaks = seq(0.25, 2, by = 0.1),
-spec.use.Dil = TRUE, exportdir = NULL, SemiTab = NULL, Semi = FALSE)
+processSample <- function (zidbfile, ZIClass = NULL, use = "both",
+ZIDesc, abd.taxa = NULL, abd.groups = NULL,
+abd.type = "absolute", bio.taxa = NULL, bio.groups = NULL, bio.conv = c(1, 0, 1),
+headers = c("Abd", "Bio"), spec.taxa = NULL, spec.groups = NULL,
+spec.breaks = seq(0.25, 2, by = 0.1), spec.use.Dil = TRUE, exportdir = NULL,
+SemiTab = NULL, Semi = FALSE)
 {    
-	## Check if the ZidFile exists
-	if (!checkFileExists(ZidFile, message = "'ZidFile' not found"))
-		return(invisible(FALSE))
-	
+	zidbfile <- as.character(zidbfile)[1]
+	if (hasExtension(zidbfile, "zidb")) dbext <- "zidb" else dbext <- "zid"
+    if (!checkFileExists(zidbfile, dbext)) return(invisible(FALSE))
+		
 	## Check if ZIClass is of the right class
-	if (!inherits(ZIClass, "ZIClass")) {
+	if (!is.null(ZIClass) && !inherits(ZIClass, "ZIClass")) {
 		warning("'ZIClass' must be a 'ZIClass' object")
 		return(invisible(FALSE))
 	}
     
-	if (!length(ZidbFile)) {
-        ## Check if the ZidFile exists
-        if (!checkFileExists(ZidFile, force.file = TRUE))
-			return(invisible(FALSE))
-		
-        ## Get ZIDat from the ZidFile
-        ZIDat <- zidDatRead(ZidFile)
-        Sample <- sampleInfo(ZidFile, type = "sample",
+	if (dbext == "zidb") { # This is a ZIDB file
+		ZIDat <- zidbDatRead(zidbfile)
+        Sample <- sampleInfo(zidbfile, type = "sample",
+            ext = extensionPattern(".zidb"))
+		RES <- zidbSampleRead(zidbfile)
+	} else { # This is an old ZID file
+		 ZIDat <- zidDatRead(zidbfile)
+        Sample <- sampleInfo(zidbfile, type = "sample",
             ext = extensionPattern(".zid"))
-	
-	} else { # There is a ZIDB file
-        ## Check if the ZidbFile exists
-        if (!checkFileExists(ZidbFile, force.file = TRUE))
-			return(invisible(FALSE))
-		
-        ## Get ZIDat from the ZidbFile
-        ZIDat <- zidbDatRead(ZidbFile)
-        ## Get ZIDat from the ZidFile
-        Sample <- sampleInfo(ZidbFile, type = "sample",
-            ext = extensionPattern(".zidb"))
+		ZIDesc <- zisRead(ZIDesc)
+		RES <- ZIDesc[ZIDesc$Label == Sample, ] 
+		if (nrow(RES) == 0)
+			stop("'ZIDesc' has no data for that sample!")
 	}
 	
-	## By default, we have to predict ZidFile with a classifier
-	MakePredictions <- TRUE
+#	## By default, we have to predict zidbfile with a classifier
+#	MakePredictions <- TRUE
+#	
+#	## Modified by Kevin 2010-08-03
+#	if (!is.null(ZIMan)) {
+#		## We want to use a ZIMan table
+#		if (!inherits(ZIMan, "ZIMan"))
+#			stop("'ZIMan' must be a data.frame of class 'ZIMan'")
+#		
+#		## List of samples allready manually validated
+#		AllSamples <- attr(ZIMan, "Samples")
+#		
+#		## Check if manual validation exists for this zid file
+#		if (noExtension(ZidFile) %in% AllSamples) {
+#			## The ZidFile was manually validated
+#			## --> use Class column for identification
+#			## Subtable of ZidFile vignettes
+#			Vignettes <- makeId(ZIDat)
+#			ZIDat <- ZIMan[ZIMan$Id %in% Vignettes, ]
+#			## Sort the table
+#			ZIDat <- ZIDat[order(ZIDat$Item), ]
+#			## We don't have to predict this sample anymore!
+#			MakePredictions <- FALSE
+#		}
+#	}
 	
-	## Modified by Kevin 2010-08-03
-	if (!is.null(ZIMan)) {
-		## We want to use a ZIMan table
-		if (!inherits(ZIMan, "ZIMan"))
-			stop("'ZIMan' must be a data.frame of class 'ZIMan'")
-		
-		## List of samples allready manually validated
-		AllSamples <- attr(ZIMan, "Samples")
-		
-		## Check if manual validation exists for this zid file
-		if (noExtension(ZidFile) %in% AllSamples) {
-			## The ZidFile was manually validated
-			## --> use Class column for identification
-			## Subtable of ZidFile vignettes
-			Vignettes <- makeId(ZIDat)
-			ZIDat <- ZIMan[ZIMan$Id %in% Vignettes, ]
-			## Sort the table
-			ZIDat <- ZIDat[order(ZIDat$Item), ]
-			## We don't have to predict this sample anymore!
-			MakePredictions <- FALSE
+#	if (isTRUE(MakePredictions)) {
+#		## We have to recognize the zid file with a classifier
+#		ZIDat <- predict(ZIClass, ZIDat)
+#	}
+
+	## Depending on 'us', rework ZIDat$Ident...
+	if (use == "Class") {
+		ZIDat$Ident <- ZIDat$Class
+	} else {
+		if (!is.null(ZIClass)) {
+			## If a ZIClass object is provided, (re)perform the prediction
+			ZIDat <- predict(ZIClass, ZIDat, class.only = FALSE)
 		}
+		if (use == "both") { # If Class available, use it, otherwise, use Ident
+			if ("Class" %in% names(ZIDat)) {
+				Ident <- ZIDat$Class
+				missIdent <- is.na(Ident)
+				Ident[missIdent] <- ZIDat$Ident[missIdent]
+				ZIDat$Ident <- Ident
+			}
+		} else if (use != "Ident")
+			stop("Unknown 'use', must be 'Class', 'Ident', or 'both'")
 	}
-	
-	if (isTRUE(MakePredictions)) {
-		## We have to recognize the zid file with a classifier
-		ZIDat <- predict(ZIClass, ZIDat)
-	}
-	
-	## Check if one can get sample metadata from ZIDesc
-	RES <- ZIDesc[ZIDesc$Label == Sample, ] 
-	if (nrow(RES) == 0)
-		stop("'ZIDesc' has no data for that sample!")
-	
-	## Use manual validation if it is present
-	if (isTRUE(MakePredictions)) {
-		## Use Automatic prediction
-		Grp <- levels(ZIDat$Ident)	
-	} else {
-		## Use manual validation as identification
-		Grp <- levels(ZIDat$Class)
-	}
-	
+		
+#	## Use manual validation if it is present
+#	if (isTRUE(MakePredictions)) {
+#		## Use Automatic prediction
+#		Grp <- levels(ZIDat$Ident)	
+#	} else {
+#		## Use manual validation as identification
+#		Grp <- levels(ZIDat$Class)
+#	}
+
+	Grp <- levels(ZIDat$Ident)
+
 	if (is.null(abd.groups)) {
 		## Calculate groups (list with levels to consider)
 		abd.groups <- as.list(c("", Grp))
@@ -134,7 +144,6 @@
 		SPClist[[Sample]] <- SPC
 		attr(RES, "spectrum") <- SPClist
 	}
-	attr(RES, "metadata") <- attr(ZIDesc, "metadata")
 	class(RES) <- c("ZI3Res", "ZIRes", "data.frame")
 	RES
 }

Modified: pkg/zooimage/R/ZITrain.R
===================================================================
--- pkg/zooimage/R/ZITrain.R	2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/R/ZITrain.R	2012-08-10 14:09:22 UTC (rev 233)
@@ -15,10 +15,22 @@
 ## You should have received a copy of the GNU General Public License
 ## along with ZooImage.  If not, see <http://www.gnu.org/licenses/>.
 
+template <- function (object, ...)
+	UseMethod("template")
+	
+template.default <- function (object, add.others = TRUE, ...)
+{
+	res <- attr(object, "path")
+	if (isTRUE(as.logical(add.others)) &&
+		!"+others+" %in% unlist(strsplit(res, "/", fixed = TRUE)))
+		res <- c(res, "+others+")
+	
+	res
+}
+
 ## Prepare 'dir\subdir' for a manual classification by expanding all vignettes
 ## from a given number of zidfiles to the '_' subdir, and making
 ## a template for subdirs
-## TODO: eliminate zidfiles and detect if it is zidfiles or zidbfiles like in addToTrain()
 prepareTrain <- function (traindir, zidbfiles,
 template = c("[Basic]", "[Detailed]", "[Very detailed]"), ident = NULL)
 {
@@ -108,7 +120,7 @@
 				basename(zidbfiles[i])), "_dat1.RData", sep = "")))
 		}
 	}
-	progress(zmax + 1) # Clear progression indicator
+	progress(101) # Clear progression indicator
 	
 	### TODO: relocate vignettes in subdirectories, if ident is not NULL
 	if (length(ident)) {
@@ -119,6 +131,21 @@
 	invisible(TRUE)
 }
 
+prepareTest <- function (testdir, zidbfiles, template, ident = NULL)
+{
+	if (!is.null(attr(template, "path"))) template <- attr(template, "path")
+	if (!"+others+" %in% unlist(strsplit(template, "/", fixed = TRUE)))
+		template <- c(template, "+others+")
+	tpl <- structure(1, path = template)
+	res <- prepareTrain(testdir, zidbfiles = zidbfiles,
+		template = tpl, ident = ident)
+	## Add a .zic file there to make sure to respect training set classes
+	cat("ZI3\n[path]\n", paste(template, collapse = "\n"), "\n", sep = "",
+		file = file.path(testdir, "_template.zic"))
+	
+	res
+}
+
 ## Function to add new vignettes in a training set
 addToTrain <- function (traindir, zidbfiles, ident = NULL)
 {
@@ -151,7 +178,7 @@
 	
 	## Extract RData in the root directory
 	zmax <- length(zidbfiles)
-	message("Adding data and vignettes to the training set...")
+	message("Adding data and vignettes to the training set...\n")
 	for (i in 1:zmax) {
 		progress(i, zmax)
 		## treatment depends if it is a .zid or .zidb file
@@ -192,11 +219,14 @@
 			unlink(td, recursive = TRUE)	
 		}
 	}
-	progress(zmax + 1) # Clear progression indicator
+	progress(101) # Clear progression indicator
 	message("-- Done --\n")
 	invisible(TRUE)
 }
 
+addToTest <- function (testdir, zidbfiles, ident = NULL)
+	addToTrain(traindir = testdir, zidbfiles = zidbfiles, ident = ident)
+
 ## Retrieve information from a manual training set in a 'ZITrain' object	
 getTrain <- function (traindir, creator = NULL, desc = NULL, keep_ = FALSE,
 na.rm = FALSE)
@@ -286,8 +316,8 @@
 		} else message("NAs found in the table of measurements and left there")
 	
 	## Add attributes
-	attr(df, "basedir") <- dir
-	attr(df, "path") <- sort(unique(Path))
+	attr(df, "traindir") <- dir
+	attr(df, "path") <- unique(Path)
 	if (length(creator)) attr(df, "creator") <- creator
 	if (length(desc)) attr(df, "desc") <- desc
 	Classes <- c("ZI3Train", "ZITrain", Classes)
@@ -326,6 +356,31 @@
 	df
 }
 
+getTest <- function (testdir, creator = NULL, desc = NULL, keep_ = FALSE,
+na.rm = FALSE)
+{
+	## Same as getTrain() but returns a ZITest object... and read _template.zic
+	## to make sure that path and classes do match!
+	if (!file.exists(zicfile) || !zicCheck(zicfile))
+		stop("testdir does not seem to contain a valid test set (may be use getTrain()?)")
+	
+	res <- getTrain(traindir = testdir, creator = creator, desc = desc,
+		keep_ = keep_, na.rm = na.rm)
+	class(res) <- c("ZI3Test", "ZITest", class(res)[-(1:2)])
+	
+	## Read the _template.zic file and change res$Class factors and path accordingly
+	path <- scan(zicfile, character(), sep = "\n", skip = 2, quiet = TRUE)
+	if (!length(path))
+		stop(sprintf("'%s' is empty or corrupted!", zicfile))
+	attr(res, "path") <- path
+	
+	## Now, make sure to recode res$Class factor in the correct order!
+	lev <- sort(basename(path))
+	res$Class <- factor(as.character(res$Class), levels = lev)
+
+	res
+}
+
 .recodeLevels <- function (object, depth = 1)
 {
 	if (!inherits(object, "ZITrain"))
@@ -352,9 +407,6 @@
 
 recode.ZITrain <- function (object, new.levels, depth, ...)
 {	
-	if (!inherits(object, "ZITrain"))
-		stop("'ZITrain' must be a 'ZITrain' object")
-	
 	if (!missing(depth)) {
 		if (!missing(new.levels))
 			warning("depth is provided, so, new.levels is ignored and recomputed")
@@ -388,3 +440,5 @@
 	if (!is.null(path)) attr(object, "path") <- path
 	object
 }
+
+recode.ZITest <- recode.ZITrain

Modified: pkg/zooimage/man/ZITrain.Rd
===================================================================
--- pkg/zooimage/man/ZITrain.Rd	2012-08-10 08:25:11 UTC (rev 232)
+++ pkg/zooimage/man/ZITrain.Rd	2012-08-10 14:09:22 UTC (rev 233)
@@ -1,16 +1,23 @@
 \name{ZITrain}
-\alias{getTrain}
 \alias{prepareTrain}
 \alias{addToTrain}
+\alias{getTrain}
+\alias{prepareTest}
+\alias{addToTest}
+\alias{getTest}
+\alias{template}
+\alias{template.default}
 \alias{recode}
 \alias{recode.ZITrain}
+\alias{recode.ZITest}
 
-\title{ Manipulate training sets and 'ZITrain' objects }
+\title{ Manipulate training and testsets 'ZITrain'/'ZITest' objects }
 
 \description{
   'ZITrain' contain a hierarchy of classes (taxonomic or not) and a link to a
   series of items belonging to these classes. It can be obtained after manual or
-  automatic classification of various objects from .zid or .zidb files.
+  automatic classification of various objects from .zid or .zidb files. 'ZITest'
+  objects are almost identical, but with a '+others+' class added.
 }
 \usage{
 prepareTrain(traindir, zidbfiles, template = c("[Basic]", "[Detailed]",
@@ -18,19 +25,30 @@
 addToTrain(traindir, zidbfiles, ident = NULL)
 getTrain(traindir, creator = NULL, desc = NULL, keep_ = FALSE, na.rm = FALSE)
 
+prepareTest(testdir, zidbfiles, template, ident = NULL)
+addToTest(testdir, zidbfiles, ident = NULL)
+getTest(testdir, creator = NULL, desc = NULL, keep_ = FALSE, na.rm = FALSE)
+
+template(object, \dots)
+\method{template}{default}(object, add.others = TRUE, \dots)
+
 recode(object, \dots)
 \method{recode}{ZITrain}(object, new.levels, depth, \dots)
+\method{recode}{ZITest}(object, new.levels, depth, \dots)
 }
 
 \arguments{
   \item{traindir}{ the root directory of the training set. }
+  \item{testdir}{ the root directory of the test set. }
   \item{zidbfiles}{ .zidb files or .zid files to use for data and vignettes in
     the training set. }
   \item{template}{ file containing subdirectories template to use for creating
-    classes in the training set. Either a defaut template between [], or the
-    name of a .zic file. }
+    classes in the training or test set. Either a defaut template between [], or
+    the name of a .zic file, or a template extracted from a 'ZITrain' or 'ZIClass'
+    object using \code{template(object)} (with the \code{add.others} argument
+    to TRUE for \code{prepareTest()} and to \code{FALSE} for \code{prepareTrain()}) }
   \item{ident}{ if vignettes are already identified in the zid(b) files, should
-    they be sorted that way in the created training set? If not \code{NULL},
+    they be sorted that way in the created training or test set? If not \code{NULL},
     indicate the name of the classification column (usually, \code{Class} for
     manual classification or \code{Ident} for automatic predictions). Not uset yet! }  
   \item{creator}{ name of the author of this classification (or the method,
@@ -39,7 +57,12 @@
   \item{keep_}{ do we keep items in the '\_' subdirectory (corresponding to
     unclassified ones)? Usually not! }
   \item{na.rm}{ do we remove item with missing data? By default, not. }
-  \item{object}{ a 'ZITrain' object. }
+  \item{object}{ a 'ZITrain' or 'ZITest' object. For \code{prepareTest()}, a
+    'ZITrain' or 'ZIClass' object to use as reference to determine the
+    classes to make. }
+  \item{add.others}{ do we add the '+others+' group if not already in the
+    template? \code{TRUE} (by default) is suitable to create a template for
+    'ZITest' objects, otherwise, use \code{FALSE} for 'ZITrain' objects. }
   \item{new.levels}{ a character string of same length as the levels of
     \code{object$Class} with the labels of the new levels. }
   \item{depth}{ the depth in the hierachy of the classes as in the \code{"path"}



More information about the Zooimage-commits mailing list