[Analogue-commits] r368 - in pkg: . R inst man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Oct 5 23:11:53 CEST 2013


Author: gsimpson
Date: 2013-10-05 23:11:52 +0200 (Sat, 05 Oct 2013)
New Revision: 368

Added:
   pkg/R/new-distance.R
Removed:
   pkg/R/distance2.R
   pkg/R/distance_c.R
   pkg/man/distance3.Rd
   pkg/src/c_distx.c
   pkg/src/c_distxy.c
Modified:
   pkg/DESCRIPTION
   pkg/NAMESPACE
   pkg/R/distance.R
   pkg/inst/ChangeLog
   pkg/man/distance.Rd
   pkg/src/distx.c
   pkg/src/distxy.c
Log:
bump to 0.11-6; new distance function using compiled C code; cleanup old attempts to interface the C code

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/DESCRIPTION	2013-10-05 21:11:52 UTC (rev 368)
@@ -1,17 +1,18 @@
 Package: analogue
 Type: Package
 Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.11-4
+Version: 0.11-6
 Date: $Date$
 Depends: R (>= 2.15.0), vegan (>= 1.17-12), lattice, rgl
 Imports: mgcv, MASS, stats, graphics, grid, brglm, princurve
+Suggests: testthat
 Author: Gavin L. Simpson, Jari Oksanen
 Authors at R: c(person(given = c("Gavin", "L."), family = "Simpson",
                     role = c("aut", "cre"),
-                    email = "gavin.simpson at uregina.ca"),
+                    email = "ucfagls at gmail.com"),
              person(given = "Jari", family = "Oksanen",
                     role = "aut"))
-Maintainer: Gavin L. Simpson <gavin.simpson at uregina.ca>
+Maintainer: Gavin L. Simpson <ucfagls at gmail.com>
 BugReports: http://r-forge.r-project.org/tracker/?func=browse&group_id=69&atid=338
 NeedsCompilation: yes
 Description: Fits Modern Analogue Technique and Weighted Averaging transfer 

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/NAMESPACE	2013-10-05 21:11:52 UTC (rev 368)
@@ -57,9 +57,9 @@
        deshrink,
        dissim,
        dissimilarities,
-       distance,
-       distance3,
-       distanceX,
+       distance, oldDistance, ## oldDistance is the original R-only code
+       #distance3,
+       #distanceX,
        fuse,
        getK,
        gradientDist,
@@ -114,7 +114,9 @@
 S3method(dissimilarities, mat)
 S3method(distance, default)
 S3method(distance, join)
-S3method(distance3, default)
+S3method(oldDistance, default)
+S3method(oldDistance, join)
+#S3method(distance3, default)
 S3method(eigenvals, pcr)
 S3method(fuse, dist)
 S3method(fuse, matrix)

Modified: pkg/R/distance.R
===================================================================
--- pkg/R/distance.R	2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance.R	2013-10-05 21:11:52 UTC (rev 368)
@@ -11,9 +11,9 @@
 ##                                                                       ##
 ###########################################################################
 ## x = training data, y = fossil data
-distance <- function(x, ...) UseMethod("distance")
+oldDistance <- function(x, ...) UseMethod("distance")
 
-distance.join <- function(x, ...)
+oldDistance.join <- function(x, ...)
   {
     if(!inherits(x, "join"))
       stop("This method should only be used on objects of class 'join'")
@@ -26,7 +26,7 @@
     }
   }
 
-distance.default <- function(x, y,
+oldDistance.default <- function(x, y,
                              method = c("euclidean", "SQeuclidean", "chord",
                                "SQchord", "bray", "chi.square", "SQchi.square",
                                "information", "chi.distance", "manhattan",

Deleted: pkg/R/distance2.R
===================================================================
--- pkg/R/distance2.R	2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance2.R	2013-10-05 21:11:52 UTC (rev 368)
@@ -1,418 +0,0 @@
-## `distance2` <- function(x, ...)
-##     UseMethod("distance2")
-
-## `distance2.default` <- function(x, y,
-##                                 method = c("euclidean",
-##                                 "SQeuclidean", "chord",
-##                                 "SQchord", "bray", "chi.square",
-##                                 "SQchi.square", "information",
-##                                 "chi.distance", "manhattan",
-##                                 "kendall", "gower", "alt.gower",
-##                                 "mixed"),
-##                                 weights = NULL, R = NULL,
-##                                 type = list(),
-##                                 ordinal = c("gower","rank","metric"), ...) {
-##     pColl <- function(n) paste(n, collapse = ", ")
-##     ## Euclid?an could be spelled variously
-##     if(!is.na(pmatch(method, "euclidian")))
-## 	method <- "euclidean"
-##     if (missing(method))
-##         method <- "euclidean"
-##     METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-##                  "bray", "chi.square", "SQchi.square",
-##                  "information","chi.distance", "manhattan",
-##                  "kendall", "gower", "alt.gower", "mixed")
-##     method <- match.arg(method)
-##     DCOEF <- pmatch(method, METHODS)
-##     ordinal <- match.arg(ordinal)
-##     ORDTYPES <- c("gower","rank","metric")
-##     if(missing(y)) { ## only a single matrix
-##         ## TODO
-##     } else { ## two matrices
-##         ## check x and y have same columns
-##         if(!isTRUE(all.equal(names(x), names(y))))
-##             stop("'x' and 'y' appear to have different variables.")
-##         if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
-##             stop("'x' and 'y' have different numbers of columns.")
-##         ## variables
-##         nrx <- nrow(x)
-##         nry <- nrow(y)
-##         d <- numeric(length = nrx * nry)
-##         ## object names (row names)
-##         x.names <- rownames(x)
-##         y.names <- rownames(y)
-##         ## some preprocessing steps required for some coefs
-##         ## so dealt with separately
-##         if(method %in% c("chi.distance", "gower", "alt.gower",
-##                          "mixed", "kendall")) {
-##             if(method == "chi.distance") {
-##                 x <- data.matrix(x)
-##                 y <- data.matrix(y)
-##                 csum <- colSums(rbind(x, y))
-##                 y <- y / rowSums(y)
-##                 x <- x / rowSums(x)
-##                 d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
-##                         nr1 = as.integer(nrx), nr2 = as.integer(nry),
-##                         nc = as.integer(n.vars), d = as.double(d),
-##                         csum = as.double(csum), NAOK = as.integer(FALSE),
-##                         PACKAGE = "analogue")$d
-##             }
-##             if(method %in% c("gower", "alt.gower", "mixed")) {
-##                 if(method == "mixed") {
-##                     if(is.null(weights))
-##                         weights <- rep(1, n.vars)
-##                     else {
-##                         if(length(weights) != n.vars)
-##                             stop("'weights' must be of length 'ncol(x)'")
-##                     }
-##                     ## process vtypes
-##                     if(length(type)) {
-##                         ## if 'type's supplied, validate
-##                     }
-##                     ## TODO
-##                     if(is.data.frame(x)) {
-##                         type2x <- sapply(x, data.class, USE.NAMES = FALSE)
-##                         ##x <- data.matrix(x)
-##                     } else {
-##                         type2x <- rep("numeric", n.vars)
-##                         names(type2x) <- colnames(x)
-##                     }
-##                     if(is.data.frame(y)) {
-##                         type2y <- sapply(y, data.class, USE.NAMES = FALSE)
-##                         ##y <- data.matrix(y)
-##                     } else {
-##                         type2y <- rep("numeric", n.vars)
-##                         names(type2y) <- colnames(y)
-##                     }
-##                     ## x and y should have same column types
-##                     if(!isTRUE(all.equal(type2x, type2y)))
-##                         stop("Variable types in 'x' and 'y' differ.
-## Did you forget  to 'join' 'x' and 'y' before calling 'distance'?")
-##                     type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
-##                     ## save which are ordinal for rank conversion below
-##                     type2x[(ordinal <- type2x == "ordered")] <- "O"
-##                     type2x[type2x == "factor"] <- "N"
-##                     type2x[type2x == "logical"] <- "A"
-##                     typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
-##                     type3 <- match(type2x, typeCodes)
-##                     if (any(ina <- is.na(type3)))
-##                         stop("invalid type ", type2x[ina], " for column numbers ",
-##                              pColl(which(ina)))
-
-##                     ## Convert to matrices from now on
-##                     ## also takes care of ordinal == metric as all factors
-##                     ## are converted to internal numeric codes
-##                     x <- data.matrix(x)
-##                     y <- data.matrix(y)
-
-##                     ## Convert ordinal variables to ranks or numerics
-##                     ## implemented as per Podani 1999. Only do ranks here as
-##                     ## conversion to matrices above handled the standard case
-##                     x[, ordinal] <- apply(x[, ordinal], 2, rank, na.last = "keep")
-##                     y[, ordinal] <- apply(y[, ordinal], 2, rank, na.last = "keep")
-
-##                     ## Compute range Rj
-##                     XY <- rbind(x, y)
-##                     if(is.null(R)) {
-##                         maxi <- apply(XY, 2, max, na.rm = TRUE)
-##                         mini <- apply(XY, 2, min, na.rm = TRUE)
-##                         R <- maxi - mini
-##                     } else {
-##                         if(length(R) != n.vars)
-##                             stop("'R' must be of length 'ncol(x)'")
-##                     }
-
-##                     ## For Ordinal we need TiMin and TiMax
-##                     ## compute over all variables so they have same length as
-##                     ## everything else
-##                     doT <- function(X, which) {
-##                         val <- if(which == "min") {
-##                             min(X, na.rm = TRUE)
-##                         } else {
-##                             max(X, na.rm = TRUE)
-##                         }
-##                         nas <- is.na(X)
-##                         length(which(X[!nas] == val))
-##                     }
-##                     tmin <- apply(XY, 2, doT, which = "min")
-##                     tmax <- apply(XY, 2, doT, which = "max")
-
-##                     ## How do we want to handle ordinals - convert to interger code
-##                     ## for use in C
-##                     podani <- match(ordinal, ORDTYPES)
-
-##                     ## call the C code
-##                     d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
-##                             nr1 = as.integer(nrx), nr2 = as.integer(nry),
-##                             nc = as.integer(n.vars), d = as.double(d),
-##                             vtype = as.integer(type3),
-##                             weights = as.double(weights), R = as.double(R),
-##                             tmin = as.integer(tmin), tmax = as.integer(tmax),
-##                             podani = as.integer(podani),
-##                             NAOK = as.integer(TRUE),
-##                             PACKAGE = "analogue")$d
-##                 } else {
-##                     if(is.null(R)) {
-##                         XY <- rbind(x, y)
-##                         maxi <- apply(XY, 2, max, na.rm = TRUE)
-##                         mini <- apply(XY, 2, min, na.rm = TRUE)
-##                         R <- maxi - mini
-##                     } else {
-##                         if(length(R) != n.vars)
-##                             stop("'R' must be of length 'ncol(x)'")
-##                     }
-##                     x <- data.matrix(x)
-##                     y <- data.matrix(y)
-
-##                     ## pre-process for gower and alt gower
-##                     ## but these handled by xy_distance below
-##                     x <- sweep(x, 2, R, "/")
-##                     y <- sweep(y, 2, R, "/")
-##                     d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-##                             nr1 = as.integer(nrx), nr2 = as.integer(nry),
-##                             nc = as.integer(n.vars), d = as.double(d),
-##                             method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-##                             PACKAGE = "analogue")$d
-##                 }
-##             }
-##             if(method == "kendall") {
-##                 x <- data.matrix(x)
-##                 y <- data.matrix(y)
-##                 XY <- rbind(x, y)
-##                 maxi <- apply(XY, 2, max)
-##                 d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
-##                         nr1 = as.integer(nrx), nr2 = as.integer(nry),
-##                         nc = as.integer(n.vars), d = as.double(d),
-##                         maxi = as.double(maxi), NAOK = as.integer(FALSE),
-##                         PACKAGE = "analogue")$d
-##             }
-##         } else {
-##             ## must be one of the DC's handled by xy_distance
-##             x <- data.matrix(x)
-##             y <- data.matrix(y)
-##             d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-##                     nr1 = as.integer(nrx), nr2 = as.integer(nry),
-##                     nc = as.integer(n.vars), d = as.double(d),
-##                     method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-##                     PACKAGE = "analogue")$d
-##         }
-##         ## convert d to a matrix
-##         d <- matrix(d, ncol = n.vars, byrow = TRUE)
-##         colnames(d) <- y.names
-##         rownames(d) <- x.names
-##         attr(d, "method") <- method
-##         class(d) <- c("distance","matrix")
-##     }
-##     return(d)
-## }
-
-## set.seed(1)
-## bar <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foo <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foobar <- rbind(bar, foo)
-## out <- matrix(ncol = ncol(bar), nrow = nrow(foobar))
-## res <- numeric(length = nrow(foobar))
-## for(i in seq_len(nrow(foobar))) {
-##     for(j in seq_len(ncol(foobar))) {
-##         for(k in seq_along(foobar[,j])) {
-##             res[k] <- foobar[k,j] == foobar[i,j]
-##         }
-##         out[i, j] <- sum(res)
-##     }
-## }
-
-## set.seed(1)
-## bar <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foo <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## outbar <- matrix(0, ncol = ncol(bar), nrow = nrow(bar))
-## outfoo <- matrix(0, ncol = ncol(foo), nrow = nrow(foo))
-## resbar <- numeric(length = nrow(bar))# + nrow(foo))
-## resfoo <- numeric(length = nrow(bar))# + nrow(foo))
-
-## for(i in seq_len(ncol(bar))) {
-##     for(j in seq_len(nrow(bar))) {
-##         for(k in seq_len(nrow(bar))) {
-##             resbar[k] <- bar[k, i] == bar[j, i]
-##         }
-##         outbar[j, i] <- sum(resbar)
-##     }
-##     for(j in seq_len(nrow(foo))) {
-##         for(k in seq_len(nrow(foo))) {
-##             resfoo[k] <- foo[k, i] == bar[j, i]
-##         }
-##         outfoo[j, i] <- sum(resfoo)
-##     }
-## }
-
-`distance3` <- function(x, ...)
-    UseMethod("distance3")
-
-`distance3.default` <-
-    function(x, y, method = "euclidean",
-             weights = NULL, R = NULL,
-             ...)
-{
-    METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-                 "bray", "chi.square", "SQchi.square", "information",
-                 "chi.distance", "manhattan", "kendall", "gower", "alt.gower",
-                 "mixed")
-    pColl <- function(n) paste(n, collapse = ", ")
-    ## Euclid?an could be spelled variously
-    if(!is.na(pmatch(method, "euclidian")))
-	method <- "euclidean"
-    METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-                 "bray", "chi.square", "SQchi.square",
-                 "information","chi.distance", "manhattan",
-                 "kendall", "gower", "alt.gower", "mixed")
-    DCOEF <- pmatch(method, METHODS)
-    if(missing(y)) { ## only a single matrix
-        ## TODO
-    } else { ## two matrices
-        ## check x and y have same columns
-        if(!isTRUE(all.equal(names(x), names(y))))
-            stop("'x' and 'y' appear to have different variables.")
-        if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
-            stop("'x' and 'y' have different numbers of columns.")
-        ## variables
-        nrx <- nrow(x)
-        nry <- nrow(y)
-        d <- numeric(length = nrx * nry)
-        ## object names (row names)
-        x.names <- rownames(x)
-        y.names <- rownames(y)
-        ## some preprocessing steps required for some coefs
-        ## so dealt with separately
-        if(method %in% c("chi.distance", "gower", "alt.gower",
-                         "mixed", "kendall")) {
-            if(method == "chi.distance") {
-                x <- data.matrix(x)
-                y <- data.matrix(y)
-                csum <- colSums(rbind(x, y))
-                y <- y / rowSums(y)
-                x <- x / rowSums(x)
-                d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
-                        nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                        nc = as.integer(n.vars), d = as.double(d),
-                        csum = as.double(csum), NAOK = as.integer(FALSE),
-                        PACKAGE = "analogue")$d
-            }
-            if(method %in% c("gower", "alt.gower", "mixed")) {
-                if(method == "mixed") {
-                    if(is.null(weights))
-                        weights <- rep(1, n.vars)
-                    else {
-                        if(length(weights) != n.vars)
-                            stop("'weights' must be of length 'ncol(x)'")
-                    }
-                    ## process vtypes
-                    if(is.data.frame(x)) {
-                        type2x <- sapply(x, data.class, USE.NAMES = FALSE)
-                        ##x <- data.matrix(x)
-                    } else {
-                        type2x <- rep("numeric", n.vars)
-                        names(type2x) <- colnames(x)
-                    }
-                    if(is.data.frame(y)) {
-                        type2y <- sapply(y, data.class, USE.NAMES = FALSE)
-                        ##y <- data.matrix(y)
-                    } else {
-                        type2y <- rep("numeric", n.vars)
-                        names(type2y) <- colnames(y)
-                    }
-                    ## x and y should have same column types
-                    if(!isTRUE(all.equal(type2x, type2y)))
-                        stop("Variable types in 'x' and 'y' differ.
-Did you forget  to 'join' 'x' and 'y' before calling 'distance'?")
-
-                    ## Record the variable types
-                    type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
-                    ## save which are ordinal for rank conversion below - TODO
-                    type2x[(ordinal <- type2x == "ordered")] <- "O"
-                    type2x[type2x == "factor"] <- "N"
-                    type2x[type2x == "logical"] <- "A"
-                    typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
-                    type3 <- match(type2x, typeCodes)
-                    if (any(ina <- is.na(type3)))
-                        stop("invalid type ", type2x[ina], " for column numbers ",
-                             pColl(which(ina)))
-
-                    ## Convert to matrices from now on
-                    ## also takes care of ordinal == metric as all factors
-                    ## are converted to internal numeric codes
-                    x <- data.matrix(x)
-                    y <- data.matrix(y)
-
-                    ## Compute range Rj
-                    XY <- rbind(x, y)
-                    if(is.null(R)) {
-                        maxi <- apply(XY, 2, max, na.rm = TRUE)
-                        mini <- apply(XY, 2, min, na.rm = TRUE)
-                        R <- maxi - mini
-                    } else {
-                        if(length(R) != n.vars)
-                            stop("'R' must be of length 'ncol(x)'")
-                    }
-
-                    ## call the C code
-                    d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
-                            nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                            nc = as.integer(n.vars), d = as.double(d),
-                            vtype = as.integer(type3),
-                            weights = as.double(weights), R = as.double(R),
-                            NAOK = as.integer(TRUE),
-                            PACKAGE = "analogue")$d
-                } else {
-                    if(is.null(R)) {
-                        XY <- rbind(x, y)
-                        maxi <- apply(XY, 2, max, na.rm = TRUE)
-                        mini <- apply(XY, 2, min, na.rm = TRUE)
-                        R <- maxi - mini
-                    } else {
-                        if(length(R) != n.vars)
-                            stop("'R' must be of length 'ncol(x)'")
-                    }
-                    x <- data.matrix(x)
-                    y <- data.matrix(y)
-
-                    ## pre-process for gower and alt gower
-                    ## but these handled by xy_distance below
-                    x <- sweep(x, 2, R, "/")
-                    y <- sweep(y, 2, R, "/")
-                    d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-                            nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                            nc = as.integer(n.vars), d = as.double(d),
-                            method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-                            PACKAGE = "analogue")$d
-                }
-            }
-            if(method == "kendall") {
-                x <- data.matrix(x)
-                y <- data.matrix(y)
-                XY <- rbind(x, y)
-                maxi <- apply(XY, 2, max)
-                d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
-                        nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                        nc = as.integer(n.vars), d = as.double(d),
-                        maxi = as.double(maxi), NAOK = as.integer(FALSE),
-                        PACKAGE = "analogue")$d
-            }
-        } else {
-            ## must be one of the DC's handled by xy_distance
-            x <- data.matrix(x)
-            y <- data.matrix(y)
-            d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-                    nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                    nc = as.integer(n.vars), d = as.double(d),
-                    method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-                    PACKAGE = "analogue")$d
-        }
-        ## convert d to a matrix
-        d <- matrix(d, ncol = nry, byrow = TRUE)
-        colnames(d) <- y.names
-        rownames(d) <- x.names
-        attr(d, "method") <- method
-        attr(d, "type") <- "asymmetric"
-        class(d) <- c("distance","matrix")
-    }
-    return(d)
-}

Deleted: pkg/R/distance_c.R
===================================================================
--- pkg/R/distance_c.R	2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance_c.R	2013-10-05 21:11:52 UTC (rev 368)
@@ -1,110 +0,0 @@
-distanceX <- function(x, y, method = "euclidean", weights = NULL, R = NULL,
-                      as.dist = FALSE, ...) {
-    ## Euclid?an could be spelled variously
-    if(!is.na(pmatch(method, "euclidian")))
-	method <- "euclidean"
-    METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-                 "bray", "chi.square", "SQchi.square",
-                 "information","chi.distance", "manhattan",
-                 "kendall", "gower", "alt.gower", "mixed")
-    DCOEF <- pmatch(method, METHODS)
-    if(missing(y)) { ## only a single matrix
-        ## variables
-        nr <- nrow(x)
-        nc <- ncol(x)
-        ## object names (row names)
-        x.names <- rownames(x)
-        ## some preprocessing steps required for some coefs
-        ## so dealt with separately
-        if(method %in% c("chi.distance", "gower", "alt.gower",
-                         "mixed", "kendall")) {
-          if(method == "chi.distance") {
-            x <- data.matrix(x)
-            csum <- colSums(x)
-            x <- x / rowSums(x)
-            d <- .Call("Cchisqdistxx", x, csum, PACKAGE = "analogue")
-          }
-          if(method == "kendall") {
-            x <- data.matrix(x)
-            maxi <- apply(x, 2, max)
-            d <- .Call("Ckendallxx", x, maxi, PACKAGE = "analogue")
-          }
-          if(method %in% c("gower", "alt.gower")) {
-            if(is.null(R)) {
-              x <- data.matrix(x)
-              maxi <- apply(x, 2, max, na.rm = TRUE)
-              mini <- apply(x, 2, min, na.rm = TRUE)
-              R <- maxi - mini
-            } else {
-              if(length(R) != nc)
-                stop("'R' must be of length 'ncol(x)'")
-            }
-            ## pre-process here for gower and alt.gower
-            ## but note we call the main driver Cdistxx
-            x <- sweep(x, 2, R, "/")
-            d <- .Call("Cdistxx", x, DCOEF, PACKAGE = "analogue")
-          }
-        } else {
-            ## must be one of the DC's handled by xy_distance
-            x <- data.matrix(x)
-            d <- .Call("Cdistxx", x, DCOEF, PACKAGE = "analogue")
-        }
-        attr(d, "Size") <- nr
-        attr(d, "Labels") <- x.names
-        attr(d, "Diag") <- FALSE
-        attr(d, "Upper") <- FALSE
-        attr(d, "method") <- method
-        attr(d, "call") <- match.call()
-        class(d) <- "dist"
-        if(!as.dist) {
-            d <- as.matrix(d)
-            attr(d, "method") <- method
-            attr(d, "type") <- "symmetric"
-            class(d) <- c("distance","matrix")
-        }
-    } else { ## two matrices
-        ## check x and y have same columns
-        if(!isTRUE(all.equal(names(x), names(y))))
-            stop("'x' and 'y' appear to have different variables.")
-        if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
-            stop("'x' and 'y' have different numbers of columns.")
-        ## variables
-        nrx <- nrow(x)
-        nry <- nrow(y)
-        ## object names (row names)
-        x.names <- rownames(x)
-        y.names <- rownames(y)
-        ## some preprocessing steps required for some coefs
-        ## so dealt with separately
-        if(method %in% c("chi.distance", "gower", "alt.gower",
-                         "mixed", "kendall")) {
-          if(method == "chi.distance") {
-            x <- data.matrix(x)
-            y <- data.matrix(y)
-            csum <- colSums(rbind(x, y))
-            y <- y / rowSums(y)
-            x <- x / rowSums(x)
-            d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
-                    nr1 = as.integer(nrx), nr2 = as.integer(nry),
-                    nc = as.integer(n.vars), d = as.double(d),
-                    csum = as.double(csum), NAOK = as.integer(FALSE),
-                    PACKAGE = "analogue")$d
-            ##d <- .Call("Cchisqdistxy", x, y, )
-          }
-        } else {
-          ## must be one of the DC's handled by xy_distance
-          x <- data.matrix(x)
-          y <- data.matrix(y)
-          d <- .Call("Cdistxy", x, y, DCOEF, PACKAGE = "analogue")
-        }
-
-        ## convert d to a matrix
-        d <- matrix(d, ncol = nry, byrow = TRUE)
-        colnames(d) <- y.names
-        rownames(d) <- x.names
-        attr(d, "method") <- method
-        attr(d, "type") <- "asymmetric"
-        class(d) <- c("distance","matrix")
-    }
-    d
-}

Added: pkg/R/new-distance.R
===================================================================
--- pkg/R/new-distance.R	                        (rev 0)
+++ pkg/R/new-distance.R	2013-10-05 21:11:52 UTC (rev 368)
@@ -0,0 +1,227 @@
+## New distance() generic and methods
+
+`distance` <- function(x, ...) {
+    UseMethod("distance")
+}
+
+distance.join <- function(x, ...) {
+    if(!inherits(x, "join"))
+        stop("This method should only be used on objects of class 'join'")
+    if(inherits(x, "data.frame")) {
+        distance.default(x, ...)
+    } else {
+        if(length(x) != 2)
+            warning("Object contains more than 2 data sets.\n  Only the first 2 data sets used")
+        distance.default(x[[1]], x[[2]], ...)
+    }
+}
+
+`distance.default` <- function(x, y, method = "euclidean", weights = NULL,
+                               R = NULL, dist = FALSE, ...){
+    ## Euclid?an could be spelled variously
+    if(!is.na(pmatch(method, "euclidian")))
+	method <- "euclidean"
+    METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
+                 "bray", "chi.square", "SQchi.square",
+                 "information","chi.distance", "manhattan",
+                 "kendall", "gower", "alt.gower", "mixed")
+    DCOEF <- pmatch(method, METHODS)
+    if(miss.y <- missing(y)) {
+        dmat <- dxx(x = x, DCOEF = DCOEF, weights = weights,
+                    R = R, dist = dist, ...)
+    } else {
+        dmat <- dxy(x = x, y = y, DCOEF = DCOEF, weights = weights,
+                    R = R, ...)
+    }
+
+    ## add attributes, classes, and return
+    attr(dmat, "method") <- method
+    if(!dist) {
+        class(dmat) <- c("distance", "matrix")
+        attr(dmat, "type") <- if(miss.y) "symmetric" else "asymmetric"
+    }
+    dmat
+}
+
+## Internal, not exported, function for computing distances when
+## only `x` is available
+`dxx` <- function(x, DCOEF, weights, R, dist = FALSE, ...) {
+    ## variables
+    nr <- nrow(x)
+    nc <- ncol(x)
+    ## object names (row names)
+    x.names <- rownames(x)
+
+    ## allocate storage
+    d <- double(nr * (nr - 1)/2)
+
+    ## some preprocessing steps required for some coefs
+    ## so dealt with separately
+    if(DCOEF %in% c(9L, 11L, 12L, 13L, 14L)) {
+        ## "chi.distance", "gower", "alt.gower","mixed", "kendall"
+        if(DCOEF == 9L) { ## "chi.distance"
+            x <- data.matrix(x)
+            csum <- colSums(x)
+            x <- x / rowSums(x)
+            d <- .C("xx_chisq_dist", x = as.double(x), nr = as.integer(nr),
+                    nc = as.integer(nc), d = as.double(d),
+                    diag = as.integer(FALSE),
+                    csum = as.double(csum), NAOK = as.integer(FALSE),
+                    PACKAGE = "analogue")$d
+        }
+        if(DCOEF == 11L) { ## "kendall"
+            x <- data.matrix(x)
+            maxi <- apply(x, 2, max)
+            d <- .C("xx_kendall", x = as.double(x), nr = as.integer(nr),
+                    nc = as.integer(nc), d = as.double(d),
+                    diag = as.integer(FALSE),
+                    maxi = as.double(maxi), NAOK = as.integer(FALSE),
+                    PACKAGE = "analogue")$d
+        }
+        if(DCOEF == 14L) { ## "mixed"
+            ## TODO
+        }
+        if(DCOEF %in% c(12L, 13L)) { ## "gower", "alt.gower"
+            if(is.null(R)) {
+                x <- data.matrix(x)
+                maxi <- apply(x, 2, max, na.rm = TRUE)
+                mini <- apply(x, 2, min, na.rm = TRUE)
+                R <- maxi - mini
+            } else {
+                if(length(R) != nc)
+                    stop("'R' must be of length 'ncol(x)'")
+            }
+            ## pre-process here for gower and alt.gower
+            ## but note we call the main driver Cdistxx
+            x <- sweep(x, 2, R, "/")
+            d <- .C("xx_distance", x = as.double(x),
+                    nr = as.integer(nr), nc = as.integer(nc),
+                    d = as.double(d), diag = as.integer(FALSE),
+                    method = as.integer(DCOEF),
+                    NAOK = as.integer(FALSE),
+                    PACKAGE = "analogue")$d
+        }
+    } else {
+        ## must be one of the DC's handled by xy_distance
+        x <- data.matrix(x)
+        d <- .C("xx_distance", x = as.double(x),
+                nr = as.integer(nr), nc = as.integer(nc),
+                d = as.double(d), diag = as.integer(FALSE),
+                method = as.integer(DCOEF),
+                NAOK = as.integer(FALSE), PACKAGE = "analogue")$d
+    }
+
+    ## convert d to a matrix
+    ZAP <- 1e-15
+    d[d < ZAP] <- 0
+    if (any(is.na(d)))
+        warning("missing values in results")
+    attr(d, "Size") <- nr
+    attr(d, "Labels") <- x.names #dimnames(x)[[1]]
+    attr(d, "Diag") <- FALSE
+    attr(d, "Upper") <- FALSE
+    attr(d, "method") <- DCOEF
+    attr(d, "call") <- match.call()
+    class(d) <- "dist"
+
+    ## convert to matrix? Only if dist == FALSE
+    if(!dist) {
+        d <- as.matrix(d)
+    }
+
+    ## return
+    d
+}
+
+## Internal, not exported, function for computing distances when
+## both `x` and `y` are available
+`dxy` <- function(x, y, DCOEF, weights, R, ...) {
+    ## check x and y have same columns
+    if(!isTRUE(all.equal(colnames(x), colnames(y))))
+        stop("'x' and 'y' appear to have different variables.")
+    if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
+        stop("'x' and 'y' have different numbers of columns.")
+    ## variables
+    nrx <- nrow(x)
+    nry <- nrow(y)
+    nc <- ncol(x)
+
+    ## object names (row names)
+    x.names <- rownames(x)
+    y.names <- rownames(y)
+
+    ## allocate storage
+    d <- numeric(length = nrx * nry)
+
+    ## some preprocessing steps required for some coefs
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/analogue -r 368


More information about the Analogue-commits mailing list