[Subgroup-commits] r71 - in pkg/rsubgroup: . R inst/java java man tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Feb 22 23:38:28 CET 2021


Author: atzmueller
Date: 2021-02-22 23:38:27 +0100 (Mon, 22 Feb 2021)
New Revision: 71

Modified:
   pkg/rsubgroup/DESCRIPTION
   pkg/rsubgroup/NEWS
   pkg/rsubgroup/R/classes.R
   pkg/rsubgroup/R/subgroup.R
   pkg/rsubgroup/inst/java/subgroup.jar
   pkg/rsubgroup/java/readme-subgroup-jar.txt
   pkg/rsubgroup/man/SDTaskConfig-class.Rd
   pkg/rsubgroup/tests/test.R
Log:
Changes in version 1.1 (2021-02-22)
===================================
* provide automatic discretization options of VIKAMINE kernel for
  numeric attributes, they are internally discretized
  SDTaskConfig provides new options.
  defaults: discretize = TRUE, nbins = 3
* Improved/extended tests for this.

Modified: pkg/rsubgroup/DESCRIPTION
===================================================================
--- pkg/rsubgroup/DESCRIPTION	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/DESCRIPTION	2021-02-22 22:38:27 UTC (rev 71)
@@ -1,8 +1,8 @@
 Package: rsubgroup
 Type: Package
 Title: Subgroup Discovery and Analytics
-Version: 1.0
-Date: 2020-04-20
+Version: 1.1
+Date: 2021-02-22
 Author: Martin Atzmueller
 Maintainer: Martin Atzmueller <martin at atzmueller.net>
 Description: A collection of efficient and effective tools and
@@ -15,4 +15,4 @@
 Depends: R (>= 2.6.0), methods, rJava (>= 0.6-3), foreign (>= 0.8-40)
 SystemRequirements: Java (>= 8)
 Collate: 'AAAonLoad.R' 'randomSeed.R' 'classes.R' 'subgroup.R'
-URL: http://www.rsubgroup.org
\ No newline at end of file
+URL: https://rsubgroup.org
\ No newline at end of file

Modified: pkg/rsubgroup/NEWS
===================================================================
--- pkg/rsubgroup/NEWS	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/NEWS	2021-02-22 22:38:27 UTC (rev 71)
@@ -3,6 +3,15 @@
 For a detailed list of changes, see the svn commit history at
 svn://r-forge.r-project.org/svnroot/subgroup/
 
+Changes in version 1.1 (2021-02-22)
+===================================
+* provide automatic discretization options of VIKAMINE kernel for
+  numeric attributes, they are internally discretized
+  SDTaskConfig provides new options.
+  defaults: discretize = TRUE, nbins = 3
+* Improved/extended tests for this.
+
+
 Changes in version 1.0 (2020-04-20)
 ===================================
 * internal enhancements in subgroup.jar, i.e., the VIKAMINE kernel library,
@@ -9,11 +18,13 @@
   e.g. according to better error messages relating to the R connection.
 * Improved documentation and examples.
 
+
 Changes in version 0.9 (2020-03-04)
 ===================================
 * internal enhancements in subgroup.jar, i.e., the VIKAMINE kernel library
 * rsubgroup requires >= Java 8 (i.e., >= java.version 1.8)   
 
+
 Changes in version 0.8 (2019-05-23)
 ===================================
 * Improvements
@@ -26,6 +37,7 @@
 * Bug fixes.
   * fixed bug in automatic discretization used in rsubgroup VIKAMINE kernel
 
+
 Changes in version 0.7 (2015-07-01)
 ===================================
 * Improvements
@@ -55,7 +67,6 @@
   of two few selectors in subgroup discovery methods
 
 
-
 Changes in version 0.6 (2014-09-10)
 ===================================
 * Improvements:

Modified: pkg/rsubgroup/R/classes.R
===================================================================
--- pkg/rsubgroup/R/classes.R	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/R/classes.R	2021-02-22 22:38:27 UTC (rev 71)
@@ -44,12 +44,15 @@
         relfilter   = "logical",
         postfilter  = ".vectorOrCharacter",
         attributes  = ".vectorOrNull",
-        parfilter  = "numeric"
+        parfilter   = "numeric",
+		discretize  = "logical",
+		nbins       = "numeric"		
     ),
     prototype(qf="ps", method="sdmap", k = as.integer(20),
         minqual = as.integer(0), minsize = as.integer(0), mintp = as.integer(0),
         maxlen = as.integer(7), nodefaults = FALSE, relfilter = FALSE,
-        postfilter = "", parfilter = 0.05, attributes = NULL)
+        postfilter = "", parfilter = 0.05, discretize = TRUE, nbins = 3,
+		attributes = NULL)
 )
 
 SDTaskConfig <- function(...){

Modified: pkg/rsubgroup/R/subgroup.R
===================================================================
--- pkg/rsubgroup/R/subgroup.R	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/R/subgroup.R	2021-02-22 22:38:27 UTC (rev 71)
@@ -107,14 +107,23 @@
   if (!is.na(config at parfilter)) {
     J(task, "setPostFilterParameter", config at parfilter)
   }
+  
+  doDiscretize = config at discretize
+  if (is.na(config at nbins)) {
+	  doDiscretize = FALSE
+  }
+  intBins = as.integer(config at nbins)
+  
+  
   if (is.null(config at attributes)) {
     attributesArrayObject <- .GetAllAttributesAsJArray(ontology = ontology)
-    J(task, "setAttributes", attributesArrayObject)
+    J(task, "setAttributes", attributesArrayObject, doDiscretize, intBins)
   } else if ((!is.null(config at attributes)) && (length(config at attributes) > 0)) {
-    J(task, "setAttributes", .jarray(config at attributes))  
+    J(task, "setAttributes", .jarray(config at attributes), doDiscretize, intBins)  
   } else {
-    J(task, "setAttributes", .jarray(character(0))) 
+    J(task, "setAttributes", .jarray(character(0)), doDiscretize, intBins) 
   }
+    
   return(task)
 }
 

Modified: pkg/rsubgroup/inst/java/subgroup.jar
===================================================================
(Binary files differ)

Modified: pkg/rsubgroup/java/readme-subgroup-jar.txt
===================================================================
--- pkg/rsubgroup/java/readme-subgroup-jar.txt	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/java/readme-subgroup-jar.txt	2021-02-22 22:38:27 UTC (rev 71)
@@ -4,7 +4,7 @@
 
 VIKAMINE is licensed under LGPL >= 2.1.
 Copyright (C) 2003-2008 by Martin Atzmueller, and contributors.
-Copyright (C) 2008-2020 by Martin Atzmueller, Florian Lemmerich, and contributors.
+Copyright (C) 2008-2021 by Martin Atzmueller, Florian Lemmerich, and contributors.
 
 The source code can be obtained at www.vikamine.org, and the SVN repository
 sourceforge.net/p/vikamine/code/HEAD/tree/trunk/ => org.vikamine.kernel

Modified: pkg/rsubgroup/man/SDTaskConfig-class.Rd
===================================================================
--- pkg/rsubgroup/man/SDTaskConfig-class.Rd	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/man/SDTaskConfig-class.Rd	2021-02-22 22:38:27 UTC (rev 71)
@@ -17,6 +17,11 @@
     \item{\code{attributes}:}{The list of attributes to consider for mining.
     Either a vector of attribute names, or NULL (the default),
     which includes all attributes.} 
+    \item{\code{discretize}:}{Boolean, indicating whether to (automatically)
+    discretize numeric attributes (default \code{discretize=TRUE}. Depends on
+    parameter nbins. Either creates distinct values, if their number in the
+    dataset is <= nbins, or applies equal-frequency discretization for the
+    respective numeric attribute.}
     \item{\code{method}:}{A mining method; one of
     Beam-Search \code{beam},
     BSD \code{bsd},
@@ -24,6 +29,8 @@
     SD-Map enabling internal disjunctions \code{sdmap-dis}.
     The default is \code{method = "sdmap"}.
     }
+    \item{\code{nbins}:}{Specifies the number of bins to be used when
+    discretizing numeric attributes (see \code{discretize} above).}
     \item{\code{qf}:}{A quality function; one of:
     Adjusted Residuals \code{ares},
 	Binomial Test \code{bin},

Modified: pkg/rsubgroup/tests/test.R
===================================================================
--- pkg/rsubgroup/tests/test.R	2020-04-20 07:41:18 UTC (rev 70)
+++ pkg/rsubgroup/tests/test.R	2021-02-22 22:38:27 UTC (rev 71)
@@ -23,5 +23,15 @@
 
 data(credit.data)
 
-patterns.from.data.table <- DiscoverSubgroups(credit.data, as.target("class", "bad"))
+patterns.from.data.table <- DiscoverSubgroups(
+		credit.data,
+		as.target("class", "bad"),
+		new("SDTaskConfig", discretize = FALSE))
+result.data.frame <- ToDataFrame(patterns.from.data.table)
+
+patterns.from.data.table <- DiscoverSubgroups(
+		credit.data,
+		as.target("class", "bad"),
+		new("SDTaskConfig", discretize = TRUE, nbins = 3,
+				attributes = c("checking_status", "duration", "credit_amount")))
 result.data.frame <- ToDataFrame(patterns.from.data.table)
\ No newline at end of file



More information about the Subgroup-commits mailing list