[Subgroup-commits] r59 - in pkg/rsubgroup: . R inst/java man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Jun 9 15:21:55 CEST 2015


Author: atzmueller
Date: 2015-06-09 15:21:54 +0200 (Tue, 09 Jun 2015)
New Revision: 59

Added:
   pkg/rsubgroup/man/is.matching.Rd
Modified:
   pkg/rsubgroup/DESCRIPTION
   pkg/rsubgroup/NEWS
   pkg/rsubgroup/R/classes.R
   pkg/rsubgroup/R/subgroup.R
   pkg/rsubgroup/inst/java/subgroup.jar
   pkg/rsubgroup/man/Pattern-class.Rd
   pkg/rsubgroup/man/subgroup-package.Rd
Log:
* Improve error handling (exception signaling) when running subgroup discovery
  using an ARFF file directly.
  * The Pattern class now contains a list of selection expressions (selectors)
  for the subgroup, not only the description. Using the is.matching function,
  a match of a pattern and a data instance can be checked now.

Modified: pkg/rsubgroup/DESCRIPTION
===================================================================
--- pkg/rsubgroup/DESCRIPTION	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/DESCRIPTION	2015-06-09 13:21:54 UTC (rev 59)
@@ -2,7 +2,7 @@
 Type: Package
 Title: Subgroup Discovery and Analytics
 Version: 0.7
-Date: 2015-06-08
+Date: 2015-06-09
 Author: Martin Atzmueller
 Maintainer: Martin Atzmueller <martin at atzmueller.net>
 Description: A collection of efficient and effective tools and

Modified: pkg/rsubgroup/NEWS
===================================================================
--- pkg/rsubgroup/NEWS	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/NEWS	2015-06-09 13:21:54 UTC (rev 59)
@@ -3,10 +3,15 @@
 For a detailed list of changes, see the svn commit history at
 svn://r-forge.r-project.org/svnroot/subgroup/
 
-Changes in version 0.7 (2015-05-xx)
+Changes in version 0.7 (2015-06-xx)
 ===================================
 * Improvements
   * document setting Java heap space before loading the rsubgroup library.
+  * Improve error handling (exception signaling) when running subgroup discovery
+  using an ARFF file directly.
+  * The Pattern class now contains a list of selection expressions (selectors)
+  for the subgroup, not only the description. Using the is.matching function,
+  a match of a pattern and a data instance can be checked now. 
 
 * Bug fixes:
   * fix providing attributes=NULL (i.e., automatically include all attributes)

Modified: pkg/rsubgroup/R/classes.R
===================================================================
--- pkg/rsubgroup/R/classes.R	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/R/classes.R	2015-06-09 13:21:54 UTC (rev 59)
@@ -54,6 +54,7 @@
 setClass("Pattern",
     representation(
         description="character",
+        selectors="list",
         quality="numeric",
         size="numeric",
         parameters="list"

Modified: pkg/rsubgroup/R/subgroup.R
===================================================================
--- pkg/rsubgroup/R/subgroup.R	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/R/subgroup.R	2015-06-09 13:21:54 UTC (rev 59)
@@ -50,7 +50,7 @@
 setMethod(".CreateARFFProvider", signature(source = "character", name = "character"),
     function(source, name, ...) {
       # Creates a dataset provider given a file name
-      provider <- .jnew("org/vikamine/kernel/xpdl/FileDatasetProvider", source)
+      provider <- .jnew("org/vikamine/kernel/xpdl/FileDatasetProvider", source, name)
       return(provider)
     }
 )
@@ -160,6 +160,30 @@
   return(as.character(sgSelectorArray))
 }
 
+.ExtractSelectors <- function(sgDescription) {
+  # Internal function for converting a (Java) SGDescription consisting
+  # of a set of selection expressions into a list of those expressions
+  # where the 'key' is the attribute and the 'value' is the selector value
+  # Args:
+  #   sgDescription: A (Java) SGDescription object
+  #
+  # Returns:
+  #   A list of characters
+  result <- list()
+  sgSelectorList <- J(J(sgDescription, "getSelectors"), "toArray")
+  sgSelectorArray <- .jevalArray(
+      sgSelectorList,
+      simplify=TRUE)
+  for (selector in sgSelectorArray) {
+    attribute <- .jcall("org/vikamine/kernel/subgroup/search/SDSimpleTask", "Ljava/lang/String;", method = "getAttributeIDOfSelector", .jcast(selector, "org/vikamine/kernel/subgroup/selectors/SGSelector"))
+    value <- .jcall("org/vikamine/kernel/subgroup/search/SDSimpleTask", "Ljava/lang/String;", method="getSingleValueIDOfSelector", .jcast(selector, "org/vikamine/kernel/subgroup/selectors/SGSelector"))
+    tmp <- list()
+    tmp[[attribute]] <- value
+    result <- append(result, tmp)
+  }
+  return(result)
+}
+
 DiscoverSubgroupsByTask <- function(task, as.df = FALSE) {
   # Internal function for setting up and performing subgroup discovery
   # Args:
@@ -171,16 +195,17 @@
   sgList <- J(sgSet, "toSortedList", FALSE)
   sgArray <- .jevalArray(J(sgList, "toArray"))
   
-  patterns = list()
+  patterns <- list()
   for (sg in sgArray) {
     #description <- as.character(J(J(sg, "getSGDescription"), "getDescription"))
     sgDescription <- J(sg, "getSGDescription")
     description <- .ConvertDescription(sgDescription)
+    selectors <- .ExtractSelectors(sgDescription)
     quality <- J(sg, "getQuality")
     size <- J(J(sg, "getStatistics"), "getSubgroupSize")
-    parameters = .GetParameters(task, sg)    
-    pattern <- new("Pattern", description=description, quality=quality, size=size, parameters=parameters)
-    patterns = append(patterns, pattern)
+    parameters <- .GetParameters(task, sg)    
+    pattern <- new("Pattern", description=description, quality=quality, size=size, parameters=parameters, selectors=selectors)
+    patterns <- append(patterns, pattern)
   }
   
   if (as.df) {
@@ -227,28 +252,28 @@
   #
   # Returns:
   #   The dataframe containing the pattern information
-  isNumeric = FALSE  
+  isNumeric <- FALSE  
   descriptions <- list()
-  length(descriptions) = length(patterns)
+  length(descriptions) <- length(patterns)
   qualities <-list()
-  length(qualities) = length(patterns)
+  length(qualities) <- length(patterns)
   sizes <- list()
   length(sizes) <- length(patterns)
   ps <- list()
   
-  i = 1
+  i <- 1
   for (pattern in patterns) {
-    descriptions[i] = paste(pattern at description, collapse=", ")
-    qualities[i] = .FormatDoubleSignificantDigits(pattern at quality, ndigits)
-    sizes[i] = pattern at size
+    descriptions[i] <- paste(pattern at description, collapse=", ")
+    qualities[i] <- .FormatDoubleSignificantDigits(pattern at quality, ndigits)
+    sizes[i] <- pattern at size
     if (!is.null(pattern at parameters$mean)) {
-      ps[i] = .FormatDoubleSignificantDigits(pattern at parameters$mean, ndigits)
-      isNumeric = TRUE
+      ps[i] <- .FormatDoubleSignificantDigits(pattern at parameters$mean, ndigits)
+      isNumeric <- TRUE
     } else {
-      ps[i] = .FormatDoubleSignificantDigits(pattern at parameters$p, ndigits)
-      isNumeric = FALSE
+      ps[i] <- .FormatDoubleSignificantDigits(pattern at parameters$p, ndigits)
+      isNumeric <- FALSE
     }
-    i = i + 1
+    i <- i + 1
   }
   if (isNumeric) {
     dataframe <- data.frame(
@@ -272,4 +297,16 @@
   gc(...)
   .jcall("java/lang/System", method = "gc")
   invisible()
+}
+
+is.matching <- function(pattern, data.list) {
+  selectors <- pattern at selectors
+  matching <- FALSE
+  for (sel in names(selectors)) {
+    if (data.list[[sel]] == selectors[[sel]]) {
+      matching <- TRUE
+      break
+    }
+  }
+  return(matching)
 }
\ No newline at end of file

Modified: pkg/rsubgroup/inst/java/subgroup.jar
===================================================================
(Binary files differ)

Modified: pkg/rsubgroup/man/Pattern-class.Rd
===================================================================
--- pkg/rsubgroup/man/Pattern-class.Rd	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/man/Pattern-class.Rd	2015-06-09 13:21:54 UTC (rev 59)
@@ -17,6 +17,10 @@
     \item{\code{description}:}{The subgroup description,
     as a character vector.
     }
+    \item{\code{selectors}:}{The subgroup description,
+    given as a list of (simple) selection expressions, where the
+    'key' is the attribute and the 'value' is the value.
+    }
     \item{\code{quality}:}{The numeric value denoting the
     quality of the subgroup pattern as determined by the applied
     quality function.

Added: pkg/rsubgroup/man/is.matching.Rd
===================================================================
--- pkg/rsubgroup/man/is.matching.Rd	                        (rev 0)
+++ pkg/rsubgroup/man/is.matching.Rd	2015-06-09 13:21:54 UTC (rev 59)
@@ -0,0 +1,20 @@
+\name{is.matching}
+\alias{is.matching}
+\title{Tests whether a pattern and a data list (row of a data frame) match}
+\description{
+Tests whether a pattern and a data list (row of a data frame) match, e.g.,
+for implementing classification methods.
+}
+\usage{
+is.matching(pattern, data.list)
+}
+\arguments{
+\item{pattern}{An instance of class Pattern, e.g., returned by DiscoverSubgroups.}
+\item{data.list}{A list having the attributes as 'keys', and the values as
+respective values of the list. This corresponds, for example, to a row of a
+data frame.}
+}
+\seealso{
+\code{\link{Pattern-class}}.
+}
+\keyword{test pattern}
\ No newline at end of file

Modified: pkg/rsubgroup/man/subgroup-package.Rd
===================================================================
--- pkg/rsubgroup/man/subgroup-package.Rd	2015-06-08 05:53:05 UTC (rev 58)
+++ pkg/rsubgroup/man/subgroup-package.Rd	2015-06-09 13:21:54 UTC (rev 59)
@@ -29,7 +29,7 @@
 Package: \tab rsubgroup\cr
 Type: \tab Package\cr
 Version: \tab 0.7\cr
-Date: \tab 2015-05-xx\cr
+Date: \tab 2015-06-xx\cr
 License: \tab GPL (>= 3)\cr
 LazyLoad: \tab yes\cr
 }



More information about the Subgroup-commits mailing list