From noreply at r-forge.r-project.org Mon Feb 22 23:38:28 2021 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 22 Feb 2021 23:38:28 +0100 (CET) Subject: [Subgroup-commits] r71 - in pkg/rsubgroup: . R inst/java java man tests Message-ID: <20210222223828.58DD8180A8D@r-forge.r-project.org> Author: atzmueller Date: 2021-02-22 23:38:27 +0100 (Mon, 22 Feb 2021) New Revision: 71 Modified: pkg/rsubgroup/DESCRIPTION pkg/rsubgroup/NEWS pkg/rsubgroup/R/classes.R pkg/rsubgroup/R/subgroup.R pkg/rsubgroup/inst/java/subgroup.jar pkg/rsubgroup/java/readme-subgroup-jar.txt pkg/rsubgroup/man/SDTaskConfig-class.Rd pkg/rsubgroup/tests/test.R Log: Changes in version 1.1 (2021-02-22) =================================== * provide automatic discretization options of VIKAMINE kernel for numeric attributes, they are internally discretized SDTaskConfig provides new options. defaults: discretize = TRUE, nbins = 3 * Improved/extended tests for this. Modified: pkg/rsubgroup/DESCRIPTION =================================================================== --- pkg/rsubgroup/DESCRIPTION 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/DESCRIPTION 2021-02-22 22:38:27 UTC (rev 71) @@ -1,8 +1,8 @@ Package: rsubgroup Type: Package Title: Subgroup Discovery and Analytics -Version: 1.0 -Date: 2020-04-20 +Version: 1.1 +Date: 2021-02-22 Author: Martin Atzmueller Maintainer: Martin Atzmueller Description: A collection of efficient and effective tools and @@ -15,4 +15,4 @@ Depends: R (>= 2.6.0), methods, rJava (>= 0.6-3), foreign (>= 0.8-40) SystemRequirements: Java (>= 8) Collate: 'AAAonLoad.R' 'randomSeed.R' 'classes.R' 'subgroup.R' -URL: http://www.rsubgroup.org \ No newline at end of file +URL: https://rsubgroup.org \ No newline at end of file Modified: pkg/rsubgroup/NEWS =================================================================== --- pkg/rsubgroup/NEWS 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/NEWS 2021-02-22 22:38:27 UTC (rev 71) @@ -3,6 +3,15 @@ For a detailed list of changes, see the svn commit history at svn://r-forge.r-project.org/svnroot/subgroup/ +Changes in version 1.1 (2021-02-22) +=================================== +* provide automatic discretization options of VIKAMINE kernel for + numeric attributes, they are internally discretized + SDTaskConfig provides new options. + defaults: discretize = TRUE, nbins = 3 +* Improved/extended tests for this. + + Changes in version 1.0 (2020-04-20) =================================== * internal enhancements in subgroup.jar, i.e., the VIKAMINE kernel library, @@ -9,11 +18,13 @@ e.g. according to better error messages relating to the R connection. * Improved documentation and examples. + Changes in version 0.9 (2020-03-04) =================================== * internal enhancements in subgroup.jar, i.e., the VIKAMINE kernel library * rsubgroup requires >= Java 8 (i.e., >= java.version 1.8) + Changes in version 0.8 (2019-05-23) =================================== * Improvements @@ -26,6 +37,7 @@ * Bug fixes. * fixed bug in automatic discretization used in rsubgroup VIKAMINE kernel + Changes in version 0.7 (2015-07-01) =================================== * Improvements @@ -55,7 +67,6 @@ of two few selectors in subgroup discovery methods - Changes in version 0.6 (2014-09-10) =================================== * Improvements: Modified: pkg/rsubgroup/R/classes.R =================================================================== --- pkg/rsubgroup/R/classes.R 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/R/classes.R 2021-02-22 22:38:27 UTC (rev 71) @@ -44,12 +44,15 @@ relfilter = "logical", postfilter = ".vectorOrCharacter", attributes = ".vectorOrNull", - parfilter = "numeric" + parfilter = "numeric", + discretize = "logical", + nbins = "numeric" ), prototype(qf="ps", method="sdmap", k = as.integer(20), minqual = as.integer(0), minsize = as.integer(0), mintp = as.integer(0), maxlen = as.integer(7), nodefaults = FALSE, relfilter = FALSE, - postfilter = "", parfilter = 0.05, attributes = NULL) + postfilter = "", parfilter = 0.05, discretize = TRUE, nbins = 3, + attributes = NULL) ) SDTaskConfig <- function(...){ Modified: pkg/rsubgroup/R/subgroup.R =================================================================== --- pkg/rsubgroup/R/subgroup.R 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/R/subgroup.R 2021-02-22 22:38:27 UTC (rev 71) @@ -107,14 +107,23 @@ if (!is.na(config at parfilter)) { J(task, "setPostFilterParameter", config at parfilter) } + + doDiscretize = config at discretize + if (is.na(config at nbins)) { + doDiscretize = FALSE + } + intBins = as.integer(config at nbins) + + if (is.null(config at attributes)) { attributesArrayObject <- .GetAllAttributesAsJArray(ontology = ontology) - J(task, "setAttributes", attributesArrayObject) + J(task, "setAttributes", attributesArrayObject, doDiscretize, intBins) } else if ((!is.null(config at attributes)) && (length(config at attributes) > 0)) { - J(task, "setAttributes", .jarray(config at attributes)) + J(task, "setAttributes", .jarray(config at attributes), doDiscretize, intBins) } else { - J(task, "setAttributes", .jarray(character(0))) + J(task, "setAttributes", .jarray(character(0)), doDiscretize, intBins) } + return(task) } Modified: pkg/rsubgroup/inst/java/subgroup.jar =================================================================== (Binary files differ) Modified: pkg/rsubgroup/java/readme-subgroup-jar.txt =================================================================== --- pkg/rsubgroup/java/readme-subgroup-jar.txt 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/java/readme-subgroup-jar.txt 2021-02-22 22:38:27 UTC (rev 71) @@ -4,7 +4,7 @@ VIKAMINE is licensed under LGPL >= 2.1. Copyright (C) 2003-2008 by Martin Atzmueller, and contributors. -Copyright (C) 2008-2020 by Martin Atzmueller, Florian Lemmerich, and contributors. +Copyright (C) 2008-2021 by Martin Atzmueller, Florian Lemmerich, and contributors. The source code can be obtained at www.vikamine.org, and the SVN repository sourceforge.net/p/vikamine/code/HEAD/tree/trunk/ => org.vikamine.kernel Modified: pkg/rsubgroup/man/SDTaskConfig-class.Rd =================================================================== --- pkg/rsubgroup/man/SDTaskConfig-class.Rd 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/man/SDTaskConfig-class.Rd 2021-02-22 22:38:27 UTC (rev 71) @@ -17,6 +17,11 @@ \item{\code{attributes}:}{The list of attributes to consider for mining. Either a vector of attribute names, or NULL (the default), which includes all attributes.} + \item{\code{discretize}:}{Boolean, indicating whether to (automatically) + discretize numeric attributes (default \code{discretize=TRUE}. Depends on + parameter nbins. Either creates distinct values, if their number in the + dataset is <= nbins, or applies equal-frequency discretization for the + respective numeric attribute.} \item{\code{method}:}{A mining method; one of Beam-Search \code{beam}, BSD \code{bsd}, @@ -24,6 +29,8 @@ SD-Map enabling internal disjunctions \code{sdmap-dis}. The default is \code{method = "sdmap"}. } + \item{\code{nbins}:}{Specifies the number of bins to be used when + discretizing numeric attributes (see \code{discretize} above).} \item{\code{qf}:}{A quality function; one of: Adjusted Residuals \code{ares}, Binomial Test \code{bin}, Modified: pkg/rsubgroup/tests/test.R =================================================================== --- pkg/rsubgroup/tests/test.R 2020-04-20 07:41:18 UTC (rev 70) +++ pkg/rsubgroup/tests/test.R 2021-02-22 22:38:27 UTC (rev 71) @@ -23,5 +23,15 @@ data(credit.data) -patterns.from.data.table <- DiscoverSubgroups(credit.data, as.target("class", "bad")) +patterns.from.data.table <- DiscoverSubgroups( + credit.data, + as.target("class", "bad"), + new("SDTaskConfig", discretize = FALSE)) +result.data.frame <- ToDataFrame(patterns.from.data.table) + +patterns.from.data.table <- DiscoverSubgroups( + credit.data, + as.target("class", "bad"), + new("SDTaskConfig", discretize = TRUE, nbins = 3, + attributes = c("checking_status", "duration", "credit_amount"))) result.data.frame <- ToDataFrame(patterns.from.data.table) \ No newline at end of file