[Analogue-commits] r313 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Mar 21 23:04:39 CET 2013


Author: gsimpson
Date: 2013-03-21 23:04:39 +0100 (Thu, 21 Mar 2013)
New Revision: 313

Modified:
   pkg/R/distance2.R
Log:
squash distance2()

Modified: pkg/R/distance2.R
===================================================================
--- pkg/R/distance2.R	2013-03-21 22:04:00 UTC (rev 312)
+++ pkg/R/distance2.R	2013-03-21 22:04:39 UTC (rev 313)
@@ -1,210 +1,210 @@
-`distance2` <- function(x, ...)
-    UseMethod("distance2")
+## `distance2` <- function(x, ...)
+##     UseMethod("distance2")
 
-`distance2.default` <- function(x, y,
-                                method = c("euclidean",
-                                "SQeuclidean", "chord",
-                                "SQchord", "bray", "chi.square",
-                                "SQchi.square", "information",
-                                "chi.distance", "manhattan",
-                                "kendall", "gower", "alt.gower",
-                                "mixed"),
-                                weights = NULL, R = NULL,
-                                type = list(),
-                                ordinal = c("gower","rank","metric"), ...) {
-    pColl <- function(n) paste(n, collapse = ", ")
-    ## Euclid?an could be spelled variously
-    if(!is.na(pmatch(method, "euclidian")))
-	method <- "euclidean"
-    if (missing(method))
-        method <- "euclidean"
-    METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-                 "bray", "chi.square", "SQchi.square",
-                 "information","chi.distance", "manhattan",
-                 "kendall", "gower", "alt.gower", "mixed")
-    method <- match.arg(method)
-    DCOEF <- pmatch(method, METHODS)
-    ordinal <- match.arg(ordinal)
-    ORDTYPES <- c("gower","rank","metric")
-    if(missing(y)) { ## only a single matrix
-        ## TODO
-    } else { ## two matrices
-        ## check x and y have same columns
-        if(!isTRUE(all.equal(names(x), names(y))))
-            stop("'x' and 'y' appear to have different variables.")
-        if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
-            stop("'x' and 'y' have different numbers of columns.")
-        ## variables
-        nrx <- nrow(x)
-        nry <- nrow(y)
-        d <- numeric(length = nrx * nry)
-        ## object names (row names)
-        x.names <- rownames(x)
-        y.names <- rownames(y)
-        ## some preprocessing steps required for some coefs
-        ## so dealt with separately
-        if(method %in% c("chi.distance", "gower", "alt.gower",
-                         "mixed", "kendall")) {
-            if(method == "chi.distance") {
-                x <- data.matrix(x)
-                y <- data.matrix(y)
-                csum <- colSums(rbind(x, y))
-                y <- y / rowSums(y)
-                x <- x / rowSums(x)
-                d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
-                        nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                        nc = as.integer(n.vars), d = as.double(d),
-                        csum = as.double(csum), NAOK = as.integer(FALSE),
-                        PACKAGE = "analogue")$d
-            }
-            if(method %in% c("gower", "alt.gower", "mixed")) {
-                if(method == "mixed") {
-                    if(is.null(weights))
-                        weights <- rep(1, n.vars)
-                    else {
-                        if(length(weights) != n.vars)
-                            stop("'weights' must be of length 'ncol(x)'")
-                    }
-                    ## process vtypes
-                    if(length(type)) {
-                        ## if 'type's supplied, validate
-                    }
-                    ## TODO
-                    if(is.data.frame(x)) {
-                        type2x <- sapply(x, data.class, USE.NAMES = FALSE)
-                        ##x <- data.matrix(x)
-                    } else {
-                        type2x <- rep("numeric", n.vars)
-                        names(type2x) <- colnames(x)
-                    }
-                    if(is.data.frame(y)) {
-                        type2y <- sapply(y, data.class, USE.NAMES = FALSE)
-                        ##y <- data.matrix(y)
-                    } else {
-                        type2y <- rep("numeric", n.vars)
-                        names(type2y) <- colnames(y)
-                    }
-                    ## x and y should have same column types
-                    if(!isTRUE(all.equal(type2x, type2y)))
-                        stop("Variable types in 'x' and 'y' differ.
-Did you forget  to 'join' 'x' and 'y' before calling 'distance'?")
-                    type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
-                    ## save which are ordinal for rank conversion below
-                    type2x[(ordinal <- type2x == "ordered")] <- "O"
-                    type2x[type2x == "factor"] <- "N"
-                    type2x[type2x == "logical"] <- "A"
-                    typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
-                    type3 <- match(type2x, typeCodes)
-                    if (any(ina <- is.na(type3)))
-                        stop("invalid type ", type2x[ina], " for column numbers ",
-                             pColl(which(ina)))
+## `distance2.default` <- function(x, y,
+##                                 method = c("euclidean",
+##                                 "SQeuclidean", "chord",
+##                                 "SQchord", "bray", "chi.square",
+##                                 "SQchi.square", "information",
+##                                 "chi.distance", "manhattan",
+##                                 "kendall", "gower", "alt.gower",
+##                                 "mixed"),
+##                                 weights = NULL, R = NULL,
+##                                 type = list(),
+##                                 ordinal = c("gower","rank","metric"), ...) {
+##     pColl <- function(n) paste(n, collapse = ", ")
+##     ## Euclid?an could be spelled variously
+##     if(!is.na(pmatch(method, "euclidian")))
+## 	method <- "euclidean"
+##     if (missing(method))
+##         method <- "euclidean"
+##     METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
+##                  "bray", "chi.square", "SQchi.square",
+##                  "information","chi.distance", "manhattan",
+##                  "kendall", "gower", "alt.gower", "mixed")
+##     method <- match.arg(method)
+##     DCOEF <- pmatch(method, METHODS)
+##     ordinal <- match.arg(ordinal)
+##     ORDTYPES <- c("gower","rank","metric")
+##     if(missing(y)) { ## only a single matrix
+##         ## TODO
+##     } else { ## two matrices
+##         ## check x and y have same columns
+##         if(!isTRUE(all.equal(names(x), names(y))))
+##             stop("'x' and 'y' appear to have different variables.")
+##         if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
+##             stop("'x' and 'y' have different numbers of columns.")
+##         ## variables
+##         nrx <- nrow(x)
+##         nry <- nrow(y)
+##         d <- numeric(length = nrx * nry)
+##         ## object names (row names)
+##         x.names <- rownames(x)
+##         y.names <- rownames(y)
+##         ## some preprocessing steps required for some coefs
+##         ## so dealt with separately
+##         if(method %in% c("chi.distance", "gower", "alt.gower",
+##                          "mixed", "kendall")) {
+##             if(method == "chi.distance") {
+##                 x <- data.matrix(x)
+##                 y <- data.matrix(y)
+##                 csum <- colSums(rbind(x, y))
+##                 y <- y / rowSums(y)
+##                 x <- x / rowSums(x)
+##                 d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
+##                         nr1 = as.integer(nrx), nr2 = as.integer(nry),
+##                         nc = as.integer(n.vars), d = as.double(d),
+##                         csum = as.double(csum), NAOK = as.integer(FALSE),
+##                         PACKAGE = "analogue")$d
+##             }
+##             if(method %in% c("gower", "alt.gower", "mixed")) {
+##                 if(method == "mixed") {
+##                     if(is.null(weights))
+##                         weights <- rep(1, n.vars)
+##                     else {
+##                         if(length(weights) != n.vars)
+##                             stop("'weights' must be of length 'ncol(x)'")
+##                     }
+##                     ## process vtypes
+##                     if(length(type)) {
+##                         ## if 'type's supplied, validate
+##                     }
+##                     ## TODO
+##                     if(is.data.frame(x)) {
+##                         type2x <- sapply(x, data.class, USE.NAMES = FALSE)
+##                         ##x <- data.matrix(x)
+##                     } else {
+##                         type2x <- rep("numeric", n.vars)
+##                         names(type2x) <- colnames(x)
+##                     }
+##                     if(is.data.frame(y)) {
+##                         type2y <- sapply(y, data.class, USE.NAMES = FALSE)
+##                         ##y <- data.matrix(y)
+##                     } else {
+##                         type2y <- rep("numeric", n.vars)
+##                         names(type2y) <- colnames(y)
+##                     }
+##                     ## x and y should have same column types
+##                     if(!isTRUE(all.equal(type2x, type2y)))
+##                         stop("Variable types in 'x' and 'y' differ.
+## Did you forget  to 'join' 'x' and 'y' before calling 'distance'?")
+##                     type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
+##                     ## save which are ordinal for rank conversion below
+##                     type2x[(ordinal <- type2x == "ordered")] <- "O"
+##                     type2x[type2x == "factor"] <- "N"
+##                     type2x[type2x == "logical"] <- "A"
+##                     typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
+##                     type3 <- match(type2x, typeCodes)
+##                     if (any(ina <- is.na(type3)))
+##                         stop("invalid type ", type2x[ina], " for column numbers ",
+##                              pColl(which(ina)))
 
-                    ## Convert to matrices from now on
-                    ## also takes care of ordinal == metric as all factors
-                    ## are converted to internal numeric codes
-                    x <- data.matrix(x)
-                    y <- data.matrix(y)
+##                     ## Convert to matrices from now on
+##                     ## also takes care of ordinal == metric as all factors
+##                     ## are converted to internal numeric codes
+##                     x <- data.matrix(x)
+##                     y <- data.matrix(y)
 
-                    ## Convert ordinal variables to ranks or numerics
-                    ## implemented as per Podani 1999. Only do ranks here as
-                    ## conversion to matrices above handled the standard case
-                    x[, ordinal] <- apply(x[, ordinal], 2, rank, na.last = "keep")
-                    y[, ordinal] <- apply(y[, ordinal], 2, rank, na.last = "keep")
+##                     ## Convert ordinal variables to ranks or numerics
+##                     ## implemented as per Podani 1999. Only do ranks here as
+##                     ## conversion to matrices above handled the standard case
+##                     x[, ordinal] <- apply(x[, ordinal], 2, rank, na.last = "keep")
+##                     y[, ordinal] <- apply(y[, ordinal], 2, rank, na.last = "keep")
 
-                    ## Compute range Rj
-                    XY <- rbind(x, y)
-                    if(is.null(R)) {
-                        maxi <- apply(XY, 2, max, na.rm = TRUE)
-                        mini <- apply(XY, 2, min, na.rm = TRUE)
-                        R <- maxi - mini
-                    } else {
-                        if(length(R) != n.vars)
-                            stop("'R' must be of length 'ncol(x)'")
-                    }
+##                     ## Compute range Rj
+##                     XY <- rbind(x, y)
+##                     if(is.null(R)) {
+##                         maxi <- apply(XY, 2, max, na.rm = TRUE)
+##                         mini <- apply(XY, 2, min, na.rm = TRUE)
+##                         R <- maxi - mini
+##                     } else {
+##                         if(length(R) != n.vars)
+##                             stop("'R' must be of length 'ncol(x)'")
+##                     }
 
-                    ## For Ordinal we need TiMin and TiMax
-                    ## compute over all variables so they have same length as
-                    ## everything else
-                    doT <- function(X, which) {
-                        val <- if(which == "min") {
-                            min(X, na.rm = TRUE)
-                        } else {
-                            max(X, na.rm = TRUE)
-                        }
-                        nas <- is.na(X)
-                        length(which(X[!nas] == val))
-                    }
-                    tmin <- apply(XY, 2, doT, which = "min")
-                    tmax <- apply(XY, 2, doT, which = "max")
+##                     ## For Ordinal we need TiMin and TiMax
+##                     ## compute over all variables so they have same length as
+##                     ## everything else
+##                     doT <- function(X, which) {
+##                         val <- if(which == "min") {
+##                             min(X, na.rm = TRUE)
+##                         } else {
+##                             max(X, na.rm = TRUE)
+##                         }
+##                         nas <- is.na(X)
+##                         length(which(X[!nas] == val))
+##                     }
+##                     tmin <- apply(XY, 2, doT, which = "min")
+##                     tmax <- apply(XY, 2, doT, which = "max")
 
-                    ## How do we want to handle ordinals - convert to interger code
-                    ## for use in C
-                    podani <- match(ordinal, ORDTYPES)
+##                     ## How do we want to handle ordinals - convert to interger code
+##                     ## for use in C
+##                     podani <- match(ordinal, ORDTYPES)
 
-                    ## call the C code
-                    d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
-                            nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                            nc = as.integer(n.vars), d = as.double(d),
-                            vtype = as.integer(type3),
-                            weights = as.double(weights), R = as.double(R),
-                            tmin = as.integer(tmin), tmax = as.integer(tmax),
-                            podani = as.integer(podani),
-                            NAOK = as.integer(TRUE),
-                            PACKAGE = "analogue")$d
-                } else {
-                    if(is.null(R)) {
-                        XY <- rbind(x, y)
-                        maxi <- apply(XY, 2, max, na.rm = TRUE)
-                        mini <- apply(XY, 2, min, na.rm = TRUE)
-                        R <- maxi - mini
-                    } else {
-                        if(length(R) != n.vars)
-                            stop("'R' must be of length 'ncol(x)'")
-                    }
-                    x <- data.matrix(x)
-                    y <- data.matrix(y)
+##                     ## call the C code
+##                     d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
+##                             nr1 = as.integer(nrx), nr2 = as.integer(nry),
+##                             nc = as.integer(n.vars), d = as.double(d),
+##                             vtype = as.integer(type3),
+##                             weights = as.double(weights), R = as.double(R),
+##                             tmin = as.integer(tmin), tmax = as.integer(tmax),
+##                             podani = as.integer(podani),
+##                             NAOK = as.integer(TRUE),
+##                             PACKAGE = "analogue")$d
+##                 } else {
+##                     if(is.null(R)) {
+##                         XY <- rbind(x, y)
+##                         maxi <- apply(XY, 2, max, na.rm = TRUE)
+##                         mini <- apply(XY, 2, min, na.rm = TRUE)
+##                         R <- maxi - mini
+##                     } else {
+##                         if(length(R) != n.vars)
+##                             stop("'R' must be of length 'ncol(x)'")
+##                     }
+##                     x <- data.matrix(x)
+##                     y <- data.matrix(y)
 
-                    ## pre-process for gower and alt gower
-                    ## but these handled by xy_distance below
-                    x <- sweep(x, 2, R, "/")
-                    y <- sweep(y, 2, R, "/")
-                    d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-                            nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                            nc = as.integer(n.vars), d = as.double(d),
-                            method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-                            PACKAGE = "analogue")$d
-                }
-            }
-            if(method == "kendall") {
-                x <- data.matrix(x)
-                y <- data.matrix(y)
-                XY <- rbind(x, y)
-                maxi <- apply(XY, 2, max)
-                d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
-                        nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                        nc = as.integer(n.vars), d = as.double(d),
-                        maxi = as.double(maxi), NAOK = as.integer(FALSE),
-                        PACKAGE = "analogue")$d
-            }
-        } else {
-            ## must be one of the DC's handled by xy_distance
-            x <- data.matrix(x)
-            y <- data.matrix(y)
-            d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-                    nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                    nc = as.integer(n.vars), d = as.double(d),
-                    method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-                    PACKAGE = "analogue")$d
-        }
-        ## convert d to a matrix
-        d <- matrix(d, ncol = n.vars, byrow = TRUE)
-        colnames(d) <- y.names
-        rownames(d) <- x.names
-        attr(d, "method") <- method
-        class(d) <- c("distance","matrix")
-    }
-    return(d)
-}
+##                     ## pre-process for gower and alt gower
+##                     ## but these handled by xy_distance below
+##                     x <- sweep(x, 2, R, "/")
+##                     y <- sweep(y, 2, R, "/")
+##                     d <- .C("xy_distance", x = as.double(x), y = as.double(y),
+##                             nr1 = as.integer(nrx), nr2 = as.integer(nry),
+##                             nc = as.integer(n.vars), d = as.double(d),
+##                             method = as.integer(DCOEF), NAOK = as.integer(FALSE),
+##                             PACKAGE = "analogue")$d
+##                 }
+##             }
+##             if(method == "kendall") {
+##                 x <- data.matrix(x)
+##                 y <- data.matrix(y)
+##                 XY <- rbind(x, y)
+##                 maxi <- apply(XY, 2, max)
+##                 d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
+##                         nr1 = as.integer(nrx), nr2 = as.integer(nry),
+##                         nc = as.integer(n.vars), d = as.double(d),
+##                         maxi = as.double(maxi), NAOK = as.integer(FALSE),
+##                         PACKAGE = "analogue")$d
+##             }
+##         } else {
+##             ## must be one of the DC's handled by xy_distance
+##             x <- data.matrix(x)
+##             y <- data.matrix(y)
+##             d <- .C("xy_distance", x = as.double(x), y = as.double(y),
+##                     nr1 = as.integer(nrx), nr2 = as.integer(nry),
+##                     nc = as.integer(n.vars), d = as.double(d),
+##                     method = as.integer(DCOEF), NAOK = as.integer(FALSE),
+##                     PACKAGE = "analogue")$d
+##         }
+##         ## convert d to a matrix
+##         d <- matrix(d, ncol = n.vars, byrow = TRUE)
+##         colnames(d) <- y.names
+##         rownames(d) <- x.names
+##         attr(d, "method") <- method
+##         class(d) <- c("distance","matrix")
+##     }
+##     return(d)
+## }
 
 ## set.seed(1)
 ## bar <- matrix(sample(3, 9, replace = TRUE), ncol = 3)



More information about the Analogue-commits mailing list