From noreply at r-forge.r-project.org Sun Dec 15 19:26:07 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sun, 15 Dec 2013 19:26:07 +0100 (CET) Subject: [Archetypes-commits] r66 - in pkg: . R inst/doc man vignettes Message-ID: <20131215182607.EA76A186AE7@r-forge.r-project.org> Author: manuel Date: 2013-12-15 19:26:07 +0100 (Sun, 15 Dec 2013) New Revision: 66 Added: pkg/vignettes/ pkg/vignettes/archetypes.Rnw pkg/vignettes/archetypes.bib pkg/vignettes/rt-002.pdf pkg/vignettes/rt-003.pdf Removed: pkg/inst/doc/archetypes.Rnw pkg/inst/doc/archetypes.bib pkg/inst/doc/rt-002.pdf pkg/inst/doc/rt-003.pdf Modified: pkg/ pkg/.Rbuildignore pkg/DESCRIPTION pkg/NAMESPACE pkg/NEWS pkg/R/archetypes-kit-blocks.R pkg/R/archetypes-kit.R pkg/man/skeletonplot.Rd Log: check updates Property changes on: pkg ___________________________________________________________________ Added: svn:ignore + .Rproj.user .Rhistory .RData Modified: pkg/.Rbuildignore =================================================================== --- pkg/.Rbuildignore 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/.Rbuildignore 2013-12-15 18:26:07 UTC (rev 66) @@ -5,3 +5,5 @@ inst/doc/Rplots.pdf inst/nnls R/deploy.R +^.*\.Rproj$ +^\.Rproj\.user$ Modified: pkg/DESCRIPTION =================================================================== --- pkg/DESCRIPTION 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/DESCRIPTION 2013-12-15 18:26:07 UTC (rev 66) @@ -1,8 +1,8 @@ Package: archetypes Type: Package Title: Archetypal Analysis -Version: 2.1-1 -Date: 2011-10-25 +Version: 2.1-2 +Date: 2013-12-15 Depends: methods, stats, @@ -22,7 +22,6 @@ problem solving mechanisms for the different conceputal parts of the algorithm. License: GPL (>= 2) -Revision: 44 Collate: 'archetypes-barplot.R' 'generics.R' Modified: pkg/NAMESPACE =================================================================== --- pkg/NAMESPACE 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/NAMESPACE 2013-12-15 18:26:07 UTC (rev 66) @@ -1,3 +1,5 @@ +import(nnls) +import(methods) export(archetypes) export(archetypesFamily) export(archmap) Modified: pkg/NEWS =================================================================== --- pkg/NEWS 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/NEWS 2013-12-15 18:26:07 UTC (rev 66) @@ -1,4 +1,11 @@ +Changes in archetypes version 2.1-2 + + o Moved vignette to 'vignettes' directory. + + o Fixed R CMD --as-cran warnings/notes. + + Changes in archetypes version 2.1-1 o Fixed bug in movieplot2. Modified: pkg/R/archetypes-kit-blocks.R =================================================================== --- pkg/R/archetypes-kit-blocks.R 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/R/archetypes-kit-blocks.R 2013-12-15 18:26:07 UTC (rev 66) @@ -1,447 +1,447 @@ - - -### Scaling and rescaling functions: ################################# - -#' Scaling block: standardize to mean 0 and standard deviation 1. -#' @param x Data matrix. -#' @return Standardized data matrix with some attribues. -#' @noRd -std.scalefn <- function(x, ...) { - m = rowMeans(x) - x = x - m - - s = apply(x, 1, sd) - x = x / s - - attr(x, '.Meta') = list(mean=m, sd=s) - - return(x) -} - -#' Rescaling block: counterpart of std.scalefn. -#' @param x Standardized data matrix. -#' @param zs Archetypes matrix -#' @return Rescaled archetypes. -#' @noRd -std.rescalefn <- function(x, zs, ...) { - - m = attr(x, '.Meta')$mean - s = attr(x, '.Meta')$sd - - zs = zs * s - zs = zs + m - - return(zs) -} - - - -#' Scaling block: no scaling. -#' @param x Data matrix. -#' @return Data matrix. -#' @noRd -no.scalefn <- function(x, ...) { - return(x) -} - -#' Rescaling block: counterpart of no.scalefn. -#' @param x Data matrix. -#' @param zs Archetypes matrix. -#' @return Archetypes zs. -#' @noRd -no.rescalefn <- function(x, zs, ...) { - if ( is.null(zs) ) - return(matrix(NA, nrow = 0, ncol = 0)) - - return(zs) -} - - - -### Dummy and undummy functions: ##################################### - -#' Dummy block: generator for a dummy function which adds a row -#' containing a huge value. -#' @param huge The value. -#' @return A function which takes a data matrix and returns the -#' data matrix with an additonal row containing \code{huge} values. -#' @noRd -make.dummyfn <- function(huge=200) { - - bp.dummyfn <- function(x, ...) { - y = rbind(x, rep(huge, ncol(x))) - - attr(y, '.Meta') = attr(x, '.Meta') - attr(y, '.Meta')$dummyrow = nrow(y) - - return(y) - } - - return(bp.dummyfn) -} - - -#' Undummy block: remove dummy row. -#' @param x Data matrix. -#' @param zs Archetypes matrix. -#' @return Archetypes zs. -#' @noRd -rm.undummyfn <- function(x, zs, ...) { - dr = attr(x, '.Meta')$dummyrow - - return(zs[-dr,]) -} - - -#' Dummy block: no dummy row. -#' @param x Data matrix. -#' @return Data matrix x. -#' @noRd -no.dummyfn <- function(x, ...) { - return(x) -} - -#' Undummy block: return archetypes.. -#' @param x Data matrix. -#' @param zs Archetypes matrix. -#' @return Archetypes zs. -#' @noRd -no.undummyfn <- function(x, zs, ...) { - return(zs) -} - - - -### `From X and alpha to archetypes` functions: ###################### - - -#' X to alpha block: QR approach. -#' @param alphas The coefficients. -#' @param x Data matrix. -#' @return The solved linear system. -#' @noRd -qrsolve.zalphasfn <- function(alphas, x, ...) { - return(t(qr.solve(alphas %*% t(alphas)) %*% alphas %*% t(x))) -} - - - -#' X to alpha block: pseudo-inverse approach. -#' @param alphas The coefficients. -#' @param x Data matrix. -#' @return The solved linear system. -#' @noRd -ginv.zalphasfn <- function(alphas, x, ...) { - require(MASS) - - return(t(ginv(alphas %*% t(alphas)) %*% alphas %*% t(x))) -} - - - -#' X to alpha block: optim approach. -#' @param alphas The coefficients. -#' @param x Data matrix. -#' @return The solved linear system. -#' @noRd -opt.zalphasfn <- function(alphas, x, ...) { - z <- rnorm(nrow(x)*nrow(alphas)) - - fun <- function(z){ - z <- matrix(z, ncol=nrow(alphas)) - sum( (x - z %*% alphas)^2) - } - - z <- optim(z, fun, method="BFGS") - z <- matrix(z$par, ncol=nrow(alphas)) - - return(z) -} - - - -### Alpha calculation functions: ##################################### - - -#' Alpha block: plain nnls. -#' @param coefs The coefficients alpha. -#' @param C The archetypes matrix. -#' @param d The data matrix. -#' @return Recalculated alpha. -#' @noRd -nnls.alphasfn <- function(coefs, C, d, ...) { - require(nnls) - - n = ncol(d) - - for ( j in 1:n ) - coefs[,j] = coef(nnls(C, d[,j])) - - return(coefs) -} - -#' Alpha block: nnls with singular value decomposition. -#' @param coefs The coefficients alpha. -#' @param C The archetypes matrix. -#' @param d The data matrix. -#' @return Recalculated alpha. -#' @noRd -snnls.alphasfn <- function(coefs, C, d, ...) { - require(nnls) - - n = ncol(d) - - nc = ncol(C) - nr = nrow(C) - - - s = svd(C, nv=nc) - yint = t(s$u) %*% d - - for ( j in 1:n ) - coefs[,j] = coef(nnls(diag(s$d, nrow=nr, ncol=nc) %*% t(s$v), yint[,j])) - - return(coefs) -} - - - -### Beta calculation functions: ###################################### - - -#' Beta block: plain nnls. -#' @param coefs The coefficients beta. -#' @param C The data matrix. -#' @param d The archetypes matrix. -#' @return Recalculated beta. -#' @noRd -nnls.betasfn <- nnls.alphasfn - - - -#' Beta block: nnls with singular value decomposition. -#' @param coefs The coefficients beta. -#' @param C The data matrix. -#' @param d The archetypes matrix. -#' @return Recalculated beta. -#' @noRd -snnls.betasfn <- snnls.alphasfn - - - -### Norm functions: ################################################## - - -#' Norm block: standard matrix norm (spectral norm). -#' @param m Matrix. -#' @return The norm. -#' @noRd -norm2.normfn <- function(m, ...) { - return(max(svd(m)$d)) -} - - -#' Norm block: euclidian norm. -#' @param m Matrix. -#' @return The norm. -#' @noRd -euc.normfn <- function(m, ...) { - return(sum(apply(m, 2, function(x){sqrt(sum(x^2))}))) -} - - - -### Archetypes initialization functions: ############################# - - -#' Init block: generator for random initializtion. -#' @param k The proportion of beta for each archetype. -#' @return A function which returns a list with alpha and beta. -#' @noRd -make.random.initfn <- function(k) { - - bp.initfn <- function(x, p, ...) { - - n <- ncol(x) - b <- matrix(0, nrow=n, ncol=p) - - for ( i in 1:p ) - b[sample(n, k, replace=FALSE),i] <- 1 / k - - a <- matrix(1, nrow = p, ncol = n) / p - - return(list(betas = b, alphas = a)) - } - - return(bp.initfn) -} - -#' Init block: generator for fix initializtion. -#' @param indizes The indizies of data points to use as archetypes. -#' @return A function which returns a list with alpha and beta. -#' @noRd -make.fix.initfn <- function(indizes) { - - fix.initfn <- function(x, p, ...) { - n <- ncol(x) - - b <- matrix(0, nrow = n, ncol = p) - b[indizes, ] <- diag(p) - - a <- matrix(1, nrow = p, ncol = n) / p - - return(list(betas = b, alphas = a)) - } - - return(fix.initfn) -} - - - -### Weight functions: ################################################ - - -#' Weight function: move data closer to global center -#' @param data A numeric \eqn{m \times n} data matrix. -#' @param weights Vector of data weights within \eqn{[0, 1]}. -#' @return Weighted data matrix. -#' @noRd -center.weightfn <- function(data, weights, ...) { - if ( is.null(weights) ) - return(data) - - weights <- as.numeric(1 - weights) - - dr <- attr(data, '.Meta')$dummyrow - - if ( is.null(dr) ) { - center <- rowMeans(data) - data <- data + t(weights * t(center - data)) - } - else { - center <- rowMeans(data[-dr, ]) - data[-dr, ] <- data[-dr, ] + t(weights * t(center - data[-dr, ])) - } - - data -} - -#' Global weight function: move data closer to global center -#' @param data A numeric \eqn{m \times n} data matrix. -#' @param weights Vector or matrix of data weights within \eqn{[0, 1]}. -#' @return Weighted data matrix. -#' @noRd -center.globweightfn <- function(data, weights, ...) { - if ( is.null(weights) ) - return(data) - - if ( is.vector(weights) ) - weights <- diag(weights) - - dr <- attr(data, '.Meta')$dummyrow - - if ( is.null(dr) ) { - data <- data %*% weights - } - else { - data[-dr, ] <- data[-dr, ] %*% weights - } - - data -} - - - -### Reweights functions: ############################################# - - -#' Reweights function: calculate Bisquare reweights. -#' @param resid A numeric \eqn{m \times n} data matrix. -#' @param reweights Vector of data reweights within \eqn{[0, 1]}. -#' @return Reweights vector. -#' @noRd -bisquare0.reweightsfn <- function(resid, reweights, ...) { - resid <- apply(resid, 2, function(x) sum(abs(x))) - resid0 <- resid < sqrt(.Machine$double.eps) - - s <- 6 * median(resid[!resid0]) - v <- resid / s - - ifelse(v < 1, (1 - v^2)^2, 0) -} - - - -#' Reweights function: calculate binary Bisquare reweights. -#' @param resid A numeric \eqn{m \times n} data matrix. -#' @param reweights Vector of data reweights within \eqn{[0, 1]}. -#' @param threshold Threshold for binarization. -#' @return Reweights vector. -#' @noRd -binary.bisquare0.reweightsfn <- function(resid, reweights, - threshold = 0.1, ...) { - rw <- bisquare0.reweightsfn(resid, reweights, ...) - ifelse(rw < threshold, 0, 1) -} - - - -### Archetypes family: ############################################### - - -#' Archetypes family constructor -#' -#' This function returns a problem solving block for each of the -#' different conceptual parts of the algorithm. -#' -#' @param which The kind of archetypes family. -#' @param ... Exchange predefined family blocks with self-defined -#' functions. -#' -#' @return A list containing a function for each of the different parts. -#' -#' @family archetypes -#' -#' @export -archetypesFamily <- function(which = c('original', 'weighted', 'robust'), ...) { - - which <- match.arg(which) - blocks <- list(...) - - family <- do.call(sprintf('.%s.archetypesFamily', which), list()) - family$which <- which - family$which.exchanged <- NULL - - if ( length(blocks) > 0 ) { - family$which <- sprintf('%s*', family$which) - family$which.exchanged <- names(blocks) - - for ( n in names(blocks) ) - family[[n]] <- blocks[[n]] - } - - - family -} - - - -#' Original family constructor helper. -#' @return A list of blocks. -#' @noRd -.original.archetypesFamily <- function() { - list(normfn = norm2.normfn, - scalefn = std.scalefn, - rescalefn = std.rescalefn, - dummyfn = make.dummyfn(200), - undummyfn = rm.undummyfn, - initfn = make.random.initfn(1), - alphasfn = nnls.alphasfn, - betasfn = nnls.betasfn, - zalphasfn = qrsolve.zalphasfn, - globweightfn = function(x, weights) x, - weightfn = function(x, weights) x, - reweightsfn = function(x, weights) weights, - class = NULL) -} - + + +### Scaling and rescaling functions: ################################# + +#' Scaling block: standardize to mean 0 and standard deviation 1. +#' @param x Data matrix. +#' @return Standardized data matrix with some attribues. +#' @noRd +std.scalefn <- function(x, ...) { + m = rowMeans(x) + x = x - m + + s = apply(x, 1, sd) + x = x / s + + attr(x, '.Meta') = list(mean=m, sd=s) + + return(x) +} + +#' Rescaling block: counterpart of std.scalefn. +#' @param x Standardized data matrix. +#' @param zs Archetypes matrix +#' @return Rescaled archetypes. +#' @noRd +std.rescalefn <- function(x, zs, ...) { + + m = attr(x, '.Meta')$mean + s = attr(x, '.Meta')$sd + + zs = zs * s + zs = zs + m + + return(zs) +} + + + +#' Scaling block: no scaling. +#' @param x Data matrix. +#' @return Data matrix. +#' @noRd +no.scalefn <- function(x, ...) { + return(x) +} + +#' Rescaling block: counterpart of no.scalefn. +#' @param x Data matrix. +#' @param zs Archetypes matrix. +#' @return Archetypes zs. +#' @noRd +no.rescalefn <- function(x, zs, ...) { + if ( is.null(zs) ) + return(matrix(NA, nrow = 0, ncol = 0)) + + return(zs) +} + + + +### Dummy and undummy functions: ##################################### + +#' Dummy block: generator for a dummy function which adds a row +#' containing a huge value. +#' @param huge The value. +#' @return A function which takes a data matrix and returns the +#' data matrix with an additonal row containing \code{huge} values. +#' @noRd +make.dummyfn <- function(huge=200) { + + bp.dummyfn <- function(x, ...) { + y = rbind(x, rep(huge, ncol(x))) + + attr(y, '.Meta') = attr(x, '.Meta') + attr(y, '.Meta')$dummyrow = nrow(y) + + return(y) + } + + return(bp.dummyfn) +} + + +#' Undummy block: remove dummy row. +#' @param x Data matrix. +#' @param zs Archetypes matrix. +#' @return Archetypes zs. +#' @noRd +rm.undummyfn <- function(x, zs, ...) { + dr = attr(x, '.Meta')$dummyrow + + return(zs[-dr,]) +} + + +#' Dummy block: no dummy row. +#' @param x Data matrix. +#' @return Data matrix x. +#' @noRd +no.dummyfn <- function(x, ...) { + return(x) +} + +#' Undummy block: return archetypes.. +#' @param x Data matrix. +#' @param zs Archetypes matrix. +#' @return Archetypes zs. +#' @noRd +no.undummyfn <- function(x, zs, ...) { + return(zs) +} + + + +### `From X and alpha to archetypes` functions: ###################### + + +#' X to alpha block: QR approach. +#' @param alphas The coefficients. +#' @param x Data matrix. +#' @return The solved linear system. +#' @noRd +qrsolve.zalphasfn <- function(alphas, x, ...) { + return(t(qr.solve(alphas %*% t(alphas)) %*% alphas %*% t(x))) +} + + + +#' X to alpha block: pseudo-inverse approach. +#' @param alphas The coefficients. +#' @param x Data matrix. +#' @return The solved linear system. +#' @noRd +ginv.zalphasfn <- function(alphas, x, ...) { + require(MASS) + + return(t(ginv(alphas %*% t(alphas)) %*% alphas %*% t(x))) +} + + + +#' X to alpha block: optim approach. +#' @param alphas The coefficients. +#' @param x Data matrix. +#' @return The solved linear system. +#' @noRd +opt.zalphasfn <- function(alphas, x, ...) { + z <- rnorm(nrow(x)*nrow(alphas)) + + fun <- function(z){ + z <- matrix(z, ncol=nrow(alphas)) + sum( (x - z %*% alphas)^2) + } + + z <- optim(z, fun, method="BFGS") + z <- matrix(z$par, ncol=nrow(alphas)) + + return(z) +} + + + +### Alpha calculation functions: ##################################### + + +#' Alpha block: plain nnls. +#' @param coefs The coefficients alpha. +#' @param C The archetypes matrix. +#' @param d The data matrix. +#' @return Recalculated alpha. +#' @noRd +nnls.alphasfn <- function(coefs, C, d, ...) { + #require(nnls) + + n = ncol(d) + + for ( j in 1:n ) + coefs[,j] = coef(nnls(C, d[,j])) + + return(coefs) +} + +#' Alpha block: nnls with singular value decomposition. +#' @param coefs The coefficients alpha. +#' @param C The archetypes matrix. +#' @param d The data matrix. +#' @return Recalculated alpha. +#' @noRd +snnls.alphasfn <- function(coefs, C, d, ...) { + #require(nnls) + + n = ncol(d) + + nc = ncol(C) + nr = nrow(C) + + + s = svd(C, nv=nc) + yint = t(s$u) %*% d + + for ( j in 1:n ) + coefs[,j] = coef(nnls(diag(s$d, nrow=nr, ncol=nc) %*% t(s$v), yint[,j])) + + return(coefs) +} + + + +### Beta calculation functions: ###################################### + + +#' Beta block: plain nnls. +#' @param coefs The coefficients beta. +#' @param C The data matrix. +#' @param d The archetypes matrix. +#' @return Recalculated beta. +#' @noRd +nnls.betasfn <- nnls.alphasfn + + + +#' Beta block: nnls with singular value decomposition. +#' @param coefs The coefficients beta. +#' @param C The data matrix. +#' @param d The archetypes matrix. +#' @return Recalculated beta. +#' @noRd +snnls.betasfn <- snnls.alphasfn + + + +### Norm functions: ################################################## + + +#' Norm block: standard matrix norm (spectral norm). +#' @param m Matrix. +#' @return The norm. +#' @noRd +norm2.normfn <- function(m, ...) { + return(max(svd(m)$d)) +} + + +#' Norm block: euclidian norm. +#' @param m Matrix. +#' @return The norm. +#' @noRd +euc.normfn <- function(m, ...) { + return(sum(apply(m, 2, function(x){sqrt(sum(x^2))}))) +} + + + +### Archetypes initialization functions: ############################# + + +#' Init block: generator for random initializtion. +#' @param k The proportion of beta for each archetype. +#' @return A function which returns a list with alpha and beta. +#' @noRd +make.random.initfn <- function(k) { + + bp.initfn <- function(x, p, ...) { + + n <- ncol(x) + b <- matrix(0, nrow=n, ncol=p) + + for ( i in 1:p ) + b[sample(n, k, replace=FALSE),i] <- 1 / k + + a <- matrix(1, nrow = p, ncol = n) / p + + return(list(betas = b, alphas = a)) + } + + return(bp.initfn) +} + +#' Init block: generator for fix initializtion. +#' @param indizes The indizies of data points to use as archetypes. +#' @return A function which returns a list with alpha and beta. +#' @noRd +make.fix.initfn <- function(indizes) { + + fix.initfn <- function(x, p, ...) { + n <- ncol(x) + + b <- matrix(0, nrow = n, ncol = p) + b[indizes, ] <- diag(p) + + a <- matrix(1, nrow = p, ncol = n) / p + + return(list(betas = b, alphas = a)) + } + + return(fix.initfn) +} + + + +### Weight functions: ################################################ + + +#' Weight function: move data closer to global center +#' @param data A numeric \eqn{m \times n} data matrix. +#' @param weights Vector of data weights within \eqn{[0, 1]}. +#' @return Weighted data matrix. +#' @noRd +center.weightfn <- function(data, weights, ...) { + if ( is.null(weights) ) + return(data) + + weights <- as.numeric(1 - weights) + + dr <- attr(data, '.Meta')$dummyrow + + if ( is.null(dr) ) { + center <- rowMeans(data) + data <- data + t(weights * t(center - data)) + } + else { + center <- rowMeans(data[-dr, ]) + data[-dr, ] <- data[-dr, ] + t(weights * t(center - data[-dr, ])) + } + + data +} + +#' Global weight function: move data closer to global center +#' @param data A numeric \eqn{m \times n} data matrix. +#' @param weights Vector or matrix of data weights within \eqn{[0, 1]}. +#' @return Weighted data matrix. +#' @noRd +center.globweightfn <- function(data, weights, ...) { + if ( is.null(weights) ) + return(data) + + if ( is.vector(weights) ) + weights <- diag(weights) + + dr <- attr(data, '.Meta')$dummyrow + + if ( is.null(dr) ) { + data <- data %*% weights + } + else { + data[-dr, ] <- data[-dr, ] %*% weights + } + + data +} + + + +### Reweights functions: ############################################# + + +#' Reweights function: calculate Bisquare reweights. +#' @param resid A numeric \eqn{m \times n} data matrix. +#' @param reweights Vector of data reweights within \eqn{[0, 1]}. +#' @return Reweights vector. +#' @noRd +bisquare0.reweightsfn <- function(resid, reweights, ...) { + resid <- apply(resid, 2, function(x) sum(abs(x))) + resid0 <- resid < sqrt(.Machine$double.eps) + + s <- 6 * median(resid[!resid0]) + v <- resid / s + + ifelse(v < 1, (1 - v^2)^2, 0) +} + + + +#' Reweights function: calculate binary Bisquare reweights. +#' @param resid A numeric \eqn{m \times n} data matrix. +#' @param reweights Vector of data reweights within \eqn{[0, 1]}. +#' @param threshold Threshold for binarization. +#' @return Reweights vector. +#' @noRd +binary.bisquare0.reweightsfn <- function(resid, reweights, + threshold = 0.1, ...) { + rw <- bisquare0.reweightsfn(resid, reweights, ...) + ifelse(rw < threshold, 0, 1) +} + + + +### Archetypes family: ############################################### + + +#' Archetypes family constructor +#' +#' This function returns a problem solving block for each of the +#' different conceptual parts of the algorithm. +#' +#' @param which The kind of archetypes family. +#' @param ... Exchange predefined family blocks with self-defined +#' functions. +#' +#' @return A list containing a function for each of the different parts. +#' +#' @family archetypes +#' +#' @export +archetypesFamily <- function(which = c('original', 'weighted', 'robust'), ...) { + + which <- match.arg(which) + blocks <- list(...) + + family <- do.call(sprintf('.%s.archetypesFamily', which), list()) + family$which <- which + family$which.exchanged <- NULL + + if ( length(blocks) > 0 ) { + family$which <- sprintf('%s*', family$which) + family$which.exchanged <- names(blocks) + + for ( n in names(blocks) ) + family[[n]] <- blocks[[n]] + } + + + family +} + + + +#' Original family constructor helper. +#' @return A list of blocks. +#' @noRd +.original.archetypesFamily <- function() { + list(normfn = norm2.normfn, + scalefn = std.scalefn, + rescalefn = std.rescalefn, + dummyfn = make.dummyfn(200), + undummyfn = rm.undummyfn, + initfn = make.random.initfn(1), + alphasfn = nnls.alphasfn, + betasfn = nnls.betasfn, + zalphasfn = qrsolve.zalphasfn, + globweightfn = function(x, weights) x, + weightfn = function(x, weights) x, + reweightsfn = function(x, weights) weights, + class = NULL) +} + Modified: pkg/R/archetypes-kit.R =================================================================== --- pkg/R/archetypes-kit.R 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/R/archetypes-kit.R 2013-12-15 18:26:07 UTC (rev 66) @@ -170,7 +170,7 @@ zs <- family$undummyfn(x1, zs) zs <- family$rescalefn(x1, zs) - resid <- zs %*% alphas - t(data) + #resid <- zs %*% alphas - t(data) return(as.archetypes(t(zs), k, t(alphas), rss, iters = (i-1), Deleted: pkg/inst/doc/archetypes.Rnw =================================================================== --- pkg/inst/doc/archetypes.Rnw 2012-10-05 14:23:48 UTC (rev 65) +++ pkg/inst/doc/archetypes.Rnw 2013-12-15 18:26:07 UTC (rev 66) @@ -1,1076 +0,0 @@ -\documentclass[nojss]{jss} - -\usepackage{enumitem} -\usepackage{amsmath} -\usepackage{float} - -\clubpenalty = 10000 -\widowpenalty = 10000 -\displaywidowpenalty = 10000 - -\author{Manuel J. A. Eugster\\{\small - Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen} \And - Friedrich Leisch\\{\small Ludwig-Maximilians-Universit{\"a}t - M{\"u}nchen}} - -\title{From Spider-Man to Hero --\\ - Archetypal Analysis in \proglang{R}} - -\Plainauthor{Manuel J. A. Eugster, Friedrich Leisch} -\Plaintitle{From Spider-Man to Hero -- Archetypal Analysis in R} -\Shorttitle{Archetypal Analysis in \proglang{R}} -\Keywords{archetypal analysis, convex hull, \proglang{R}} -\Plainkeywords{archetypal analysis, convex hull, R} - -\Abstract{ -This introduction to the \proglang{R} package \pkg{archetypes} is a -(slightly) modified version of \citet{Eugster+Leisch at 2009}, published -in the {\it Journal of Statistical Software}. - -\bigskip - -Archetypal analysis has the aim to represent observations in a -multivariate data set as convex combinations of extremal points. This -approach was introduced by \citet{Cutler+Breiman at 1994}; they defined -the concrete problem, laid out the theoretical foundations and -presented an algorithm written in \proglang{Fortran}. In this paper we -present the \proglang{R} package \pkg{archetypes} which is available -on the Comprehensive \proglang{R} Archive Network. The package -provides an implementation of the archetypal analysis algorithm within -\proglang{R} and different exploratory tools to analyze the algorithm -during its execution and its final result. The application of the -package is demonstrated on two examples. -} - -\Address{ - Manuel J. A. Eugster\\ - Department of Statistics\\ - Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen\\ - 80539 Munich, Germany\\ - E-mail: \email{Manuel.Eugster at stat.uni-muenchen.de}\\ - URL: \url{http://www.statistik.lmu.de/~eugster/} - - \medskip - Friedrich Leisch\\ - Department of Statistics\\ - Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen\\ - 80539 Munich, Germany\\ - E-mail: \email{Friedrich.Leisch at R-project.org}\\ - URL: \url{http://www.statistik.lmu.de/~leisch/} - -} - -%%\usepackage{Sweave} %% already provided by jss.cls -%%\VignetteIndexEntry{From Spider-Man to Hero -- Archetypal Analysis in R} -%%\VignetteDepends{archetypes} -%%\VignetteKeywords{archetypal analysis, convex hull, R} -%%\VignettePackage{archetypes} - -\SweaveOpts{eps=FALSE, keep.source=TRUE} -<>= -options(width=80, prompt='R> ') -@ - -\begin{document} - -\section[Introduction]{Introduction\label{intro}} - -The \citet{merriam-webster:archetype} defines an archetype as {\it the - original pattern or model of which all things of the same type are - representations or copies}. The aim of archetypal analysis is to -find ``pure types'', the archetypes, within a set defined in a -specific context. The concept of archetypes is used in many different -areas, the set can be defined in terms of literature, philosophy, -psychology and also statistics. Here, the concrete problem is to find -a few, not necessarily observed, points (archetypes) in a set of -multivariate observations such that all the data can be well -represented as convex combinations of the archetypes. The title of -this article illustrates the concept of archetypes on the basis of -archetypes in literature: the {\it Spider-Man} personality belongs to -the generic {\it Hero} archetype, and archetypal analysis tries to -find this coherence. - -In statistics archetypal analysis was first introduced by -\citet{Cutler+Breiman at 1994}. In their paper they laid out the -theoretical foundations, defined the concrete problem as a nonlinear -least squares problem and presented an alternating minimizing -algorithm to solve it. It has found applications in different areas, -with recently grown popularity in economics, e.g., -\citet{Li+Wang+Louviere+Carson at 2003} and -\citet{Porzio+Ragozini+Vistocco at 2008}. In spite of the rising interest -in this computer-intensive but numerically sensitive method, no -``easy-to-use'' and freely available software package has been -developed yet. In this paper we present the software package -\pkg{archetypes} within the \proglang{R} statistical environment -\citep{R} which provides an implementation of the archetypal analysis -algorithm. Additionally, the package provides exploratory tools to -visualize the algorithm during the minimization steps and its final -result. The newest released version of \pkg{archetypes} is always -available from the Comprehensive R Archive Network at -\url{http://CRAN.R-project.org/package=archetypes}. - -The paper is organized as follows: In Section~\ref{algorithm} we -outline the archetypal analysis with its different conceptual -parts. We present the theoretical background as far as we need it for -a sound introduction of our implementation; for a complete explanation -we refer to the original paper. Section~\ref{pkg} demonstrates how to use -\pkg{archetypes} based on a simple artificial data set, with details about -numerical problems and the behavior of the algorithm. Section~\ref{comp} - presents a simulation study to show how the implementation scales -with numbers of observations, attributes and archetypes. In -Section~\ref{body} we show a real word example -- the archetypes of -human skeletal diameter measurements. Section~\ref{outlook} concludes -the article with future investigations. - - - -\section[Archetypal analysis]{Archetypal analysis\label{algorithm}} - -Given is an $n \times m$ matrix $X$ representing a multivariate data -set with $n$ observations and $m$ attributes. For a given $k$ the -archetypal analysis finds the matrix $Z$ of $k$ $m$-dimensional -archetypes according to the two fundamentals: -\begin{enumerate} - [label=(\arabic{enumi})] - - \item The data are best approximated by convex combinations of the - archetypes, i.e., they minimize - \[ - \mbox{RSS} = \|X - \alpha Z^{\top}\|_2 - \] - with $\alpha$, the coefficients of the archetypes, an $n \times k$ - matrix; the elements are required to be greater equal $0$ and - their sum must be $1$, i.e., $\sum_{j=1}^{k} \alpha_{ij} = 1$ - with $\alpha_{ij} \geq 0$ and $i = 1, \ldots, n$. $\|\cdot\|_2$ - denotes an appropriate matrix norm. - - \item The archetypes are convex combinations of the data points: - \[ - Z = X^{\top} \beta - \] - with $\beta$, the coefficients of the data set, a $n \times k$ - matrix where the elements are required to be greater equal $0$ and - their sum must be $1$, i.e., $\sum_{i=1}^{n} \beta_{ji} = 1$ - with $\beta_{ji} \geq 0$ and $j = 1, \ldots, k$. -\end{enumerate} -These two fundamentals also define the basic principles of the -estimation algorithm: -it alternates between finding the best $\alpha$ for given -archetypes $Z$ and finding the best archetypes $Z$ for given -$\alpha$; at each step several convex least squares problems are -solved, the overall $\mbox{RSS}$ is reduced successively. - -With a view to the implementation, the algorithm consists of the -following steps: -\begin{enumerate} - [label=\arabic{enumi}.,ref=\arabic{enumi},itemsep=6pt] - - \item[] Given the number of archetypes $k$: - - \item\label{alg:pre} Data preparation and initialization: scale - data, add a dummy row (see below) and initialize $\beta$ - in a way that the the constraints are fulfilled to calculate the - starting archetypes $Z$. - - \item\label{alg:loop} Loop until $\mbox{RSS}$ reduction is - sufficiently small or the number of maximum iterations is reached: - - \begin{enumerate} - [label=\arabic{enumi}.\arabic{enumii}., - ref=\arabic{enumi}.\arabic{enumii}, itemsep=6pt] - - \item\label{alg:loop-alpha} Find best $\alpha$ for - the given set of archetypes $Z$: solve $n$ convex least - squares problems ($i = 1, \ldots, n$) - \[ - \min_{\alpha_i} \frac{1}{2} \|X_i - Z \alpha_i\|_2 - \mbox{ subject to} \; \alpha_i \geq 0 \; - \mbox{ and } \; \sum_{j=1}^{k} \alpha_{ij} = 1\mbox{.} - \] - - \item\label{alg:loop-zt} Recalculate archetypes $\tilde{Z}$: - solve system of linear equations $X = \alpha \tilde{Z}^{\top}$. - - \item\label{alg:loop-beta} Find best $\beta$ for the - given set of archetypes $\tilde{Z}$: solve $k$ convex least - squares problems ($j = 1, \ldots, k$) - \[ - \min_{\beta_j} \frac{1}{2} \|\tilde{Z}_j - X \beta_j\|_2 - \mbox{ subject to} \; \beta_j \ge 0 \; - \mbox{ and } \; \sum_{i=1}^{n} \beta_{ji} = 1\mbox{.} - \] - - \item\label{alg:loop-z} Recalculate archetypes $Z$: $Z = X \beta$. - - \item\label{alg:loop-rss} Calculate residual sum of squares - $\mbox{RSS}$. - \end{enumerate} - - \item\label{alg:post} Post-processing: remove dummy row and rescale - archetypes. -\end{enumerate} -The algorithm has to deal with several numerical problems, -i.e. systems of linear equations and convex least squares -problems. In the following we explain each step in detail. - -\paragraph{Solving the convex least squares problems:} In Step -\ref{alg:loop-alpha} and \ref{alg:loop-beta} several convex -least squares problems have to be solved. \citet{Cutler+Breiman at 1994} -use a penalized version of the non-negative least squares algorithm by -\citet{Lawson+Hanson at 1974} \citep[as general reference -see,e.g.,][]{Luenberger at 1984}. In detail, the problems to solve are of -the form $\|u - Tw\|_2$ with $u, w$ vectors and $T$ a matrix, all of -appropriate dimensions, and the non-negativity and equality -constraints. The penalized version adds an extra element $M$ to $u$ -and to each observation of $T$; then -\[ - \|u - Tw\|_2 + M^2 \|1 - w\|_2 -\] -is minimized under non-negativity restrictions. For large -$M$, the second term dominates and forces the equality constraint to be -approximately satisfied while maintaining the non-negativity -constraint. The hugeness of the value $M$ varies from problem to -problem and thus can be seen as a hyperparameter of the -algorithm. Default value in the package is $200$. - -\paragraph{Solving the system of linear equations:} In Step -\ref{alg:loop-zt} the system of linear equations -\[ - \tilde{Z} = \alpha^{-1} X -\] -has to be solved. A lot of methods exist, one approach is the -Moore-Penrose pseudoinverse which provides an approximated unique -solution by a least squares approach: given the pseudoinverse -$\alpha^{+}$ of $\alpha$, -\[ - \tilde{Z} = \alpha^{+} X\mbox{,} -\] -is solved. Another approach is the usage of $QR$ decomposition: -$\alpha = QR$, where $Q$ is an orthogonal and $R$ an upper triangular -matrix, then -\[ - \tilde{Z} = Q^{\top} X R^{-1}\mbox{,} -\] -is solved. Default approach in the package is the $QR$ decomposition -using the \code{solve()} function. - [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/archetypes -r 66