From noreply at r-forge.r-project.org Wed Jul 1 21:51:32 2015 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Wed, 1 Jul 2015 21:51:32 +0200 (CEST) Subject: [Subgroup-commits] r63 - in pkg/rsubgroup: . R inst/java man Message-ID: <20150701195132.E2AC7186DDC@r-forge.r-project.org> Author: atzmueller Date: 2015-07-01 21:51:32 +0200 (Wed, 01 Jul 2015) New Revision: 63 Modified: pkg/rsubgroup/DESCRIPTION pkg/rsubgroup/NEWS pkg/rsubgroup/R/subgroup.R pkg/rsubgroup/inst/java/subgroup.jar pkg/rsubgroup/man/SDTaskConfig-class.Rd pkg/rsubgroup/man/subgroup-package.Rd Log: * Implement/enable new quality function (Adjusted residuals, cf. Agresti 2007) ==> qf="ares" * For a binary target variable, the resulting patterns now also store the chi-squared value comparing subgroup and population w.r.t. the target in the parameters field. * ToDataFrame shows the chi-squared value for a binary target. Modified: pkg/rsubgroup/DESCRIPTION =================================================================== --- pkg/rsubgroup/DESCRIPTION 2015-06-19 07:12:54 UTC (rev 62) +++ pkg/rsubgroup/DESCRIPTION 2015-07-01 19:51:32 UTC (rev 63) @@ -2,7 +2,7 @@ Type: Package Title: Subgroup Discovery and Analytics Version: 0.7 -Date: 2015-06-19 +Date: 2015-07-01 Author: Martin Atzmueller Maintainer: Martin Atzmueller Description: A collection of efficient and effective tools and Modified: pkg/rsubgroup/NEWS =================================================================== --- pkg/rsubgroup/NEWS 2015-06-19 07:12:54 UTC (rev 62) +++ pkg/rsubgroup/NEWS 2015-07-01 19:51:32 UTC (rev 63) @@ -3,7 +3,7 @@ For a detailed list of changes, see the svn commit history at svn://r-forge.r-project.org/svnroot/subgroup/ -Changes in version 0.7 (2015-06-xx) +Changes in version 0.7 (2015-07-xx) =================================== * Improvements * document setting Java heap space before loading the rsubgroup library. @@ -17,7 +17,13 @@ function, a match of a pattern and a data instance can be checked now. * In SDTaskConfig, postfilter can be a single filter or a vector of filters, that are then applied in order on the results. This allows e.g., the combination - of minimal improvement filtering with weighted covering post-processing. + of minimal improvement filtering with weighted covering post-processing. + * Implement/enable new quality function (Adjusted residuals, cf. Agresti 2007) + ==> qf="ares" + * For a binary target variable, the resulting patterns now also store the + chi-squared value comparing subgroup and population w.r.t. the target in the + parameters field. + * ToDataFrame shows the chi-squared value for a binary target. * Bug fixes: * fix providing attributes=NULL (i.e., automatically include all attributes) Modified: pkg/rsubgroup/R/subgroup.R =================================================================== --- pkg/rsubgroup/R/subgroup.R 2015-06-19 07:12:54 UTC (rev 62) +++ pkg/rsubgroup/R/subgroup.R 2015-07-01 19:51:32 UTC (rev 63) @@ -138,7 +138,8 @@ size <- J(J(sg, "getStatistics"), "getSubgroupSize") p <- J(J(sg, "getStatistics"), "getP") p0 <- J(J(sg, "getStatistics"), "getP0") - return(list(p = p, p0 = p0, size = size)) + chi2 <- J("org.vikamine.kernel.subgroup.SGUtils")$calculateChi2OfSubgroup(J(sg, "getStatistics")) + return(list(p = p, p0 = p0, chi2=chi2, size = size)) } else if (J(target, "isNumeric")) { size <- J(J(sg, "getStatistics"), "getSubgroupSize") mean <- J(J(sg, "getStatistics"), "getSGMean") @@ -263,6 +264,9 @@ sizes <- list() length(sizes) <- length(patterns) ps <- list() + length(ps) <- length(patterns) + chi2 <- list() + length(chi2) <- length(patterns) i <- 1 for (pattern in patterns) { @@ -274,6 +278,7 @@ isNumeric <- TRUE } else { ps[i] <- .FormatDoubleSignificantDigits(pattern at parameters$p, ndigits) + chi2[i] <- .FormatDoubleSignificantDigits(pattern at parameters$chi2, ndigits) isNumeric <- FALSE } i <- i + 1 @@ -289,6 +294,7 @@ quality=as.vector(qualities, "numeric"), p=as.vector(ps, "numeric"), size=as.vector(sizes, "numeric"), + chi2=as.vector(chi2, "numeric"), description=as.vector(descriptions, "character")) } return(dataframe) Modified: pkg/rsubgroup/inst/java/subgroup.jar =================================================================== (Binary files differ) Modified: pkg/rsubgroup/man/SDTaskConfig-class.Rd =================================================================== --- pkg/rsubgroup/man/SDTaskConfig-class.Rd 2015-06-19 07:12:54 UTC (rev 62) +++ pkg/rsubgroup/man/SDTaskConfig-class.Rd 2015-07-01 19:51:32 UTC (rev 63) @@ -15,11 +15,12 @@ \section{Slots}{ \describe{ \item{\code{qf}:}{A quality function; one of: - Binomial-Test \code{bin}, - Chi-Square-Test \code{chi2}, + Adjusted Residuals \code{ares}, + Binomial Test \code{bin}, + Chi-Square Test \code{chi2}, + Gain \code{gain}, Lift \code{lift}, Piatetsky-Shapiro \code{ps}, - Gain \code{gain}, Relative Gain \code{relgain}, Weighted Relative Accuracy \code{wracc}. The default is \code{qf = "ps"}. Modified: pkg/rsubgroup/man/subgroup-package.Rd =================================================================== --- pkg/rsubgroup/man/subgroup-package.Rd 2015-06-19 07:12:54 UTC (rev 62) +++ pkg/rsubgroup/man/subgroup-package.Rd 2015-07-01 19:51:32 UTC (rev 63) @@ -29,7 +29,7 @@ Package: \tab rsubgroup\cr Type: \tab Package\cr Version: \tab 0.7\cr -Date: \tab 2015-06-xx\cr +Date: \tab 2015-07-xx\cr License: \tab GPL (>= 3)\cr LazyLoad: \tab yes\cr }