[Analogue-commits] r368 - in pkg: . R inst man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Oct 5 23:11:53 CEST 2013
Author: gsimpson
Date: 2013-10-05 23:11:52 +0200 (Sat, 05 Oct 2013)
New Revision: 368
Added:
pkg/R/new-distance.R
Removed:
pkg/R/distance2.R
pkg/R/distance_c.R
pkg/man/distance3.Rd
pkg/src/c_distx.c
pkg/src/c_distxy.c
Modified:
pkg/DESCRIPTION
pkg/NAMESPACE
pkg/R/distance.R
pkg/inst/ChangeLog
pkg/man/distance.Rd
pkg/src/distx.c
pkg/src/distxy.c
Log:
bump to 0.11-6; new distance function using compiled C code; cleanup old attempts to interface the C code
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/DESCRIPTION 2013-10-05 21:11:52 UTC (rev 368)
@@ -1,17 +1,18 @@
Package: analogue
Type: Package
Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.11-4
+Version: 0.11-6
Date: $Date$
Depends: R (>= 2.15.0), vegan (>= 1.17-12), lattice, rgl
Imports: mgcv, MASS, stats, graphics, grid, brglm, princurve
+Suggests: testthat
Author: Gavin L. Simpson, Jari Oksanen
Authors at R: c(person(given = c("Gavin", "L."), family = "Simpson",
role = c("aut", "cre"),
- email = "gavin.simpson at uregina.ca"),
+ email = "ucfagls at gmail.com"),
person(given = "Jari", family = "Oksanen",
role = "aut"))
-Maintainer: Gavin L. Simpson <gavin.simpson at uregina.ca>
+Maintainer: Gavin L. Simpson <ucfagls at gmail.com>
BugReports: http://r-forge.r-project.org/tracker/?func=browse&group_id=69&atid=338
NeedsCompilation: yes
Description: Fits Modern Analogue Technique and Weighted Averaging transfer
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/NAMESPACE 2013-10-05 21:11:52 UTC (rev 368)
@@ -57,9 +57,9 @@
deshrink,
dissim,
dissimilarities,
- distance,
- distance3,
- distanceX,
+ distance, oldDistance, ## oldDistance is the original R-only code
+ #distance3,
+ #distanceX,
fuse,
getK,
gradientDist,
@@ -114,7 +114,9 @@
S3method(dissimilarities, mat)
S3method(distance, default)
S3method(distance, join)
-S3method(distance3, default)
+S3method(oldDistance, default)
+S3method(oldDistance, join)
+#S3method(distance3, default)
S3method(eigenvals, pcr)
S3method(fuse, dist)
S3method(fuse, matrix)
Modified: pkg/R/distance.R
===================================================================
--- pkg/R/distance.R 2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance.R 2013-10-05 21:11:52 UTC (rev 368)
@@ -11,9 +11,9 @@
## ##
###########################################################################
## x = training data, y = fossil data
-distance <- function(x, ...) UseMethod("distance")
+oldDistance <- function(x, ...) UseMethod("distance")
-distance.join <- function(x, ...)
+oldDistance.join <- function(x, ...)
{
if(!inherits(x, "join"))
stop("This method should only be used on objects of class 'join'")
@@ -26,7 +26,7 @@
}
}
-distance.default <- function(x, y,
+oldDistance.default <- function(x, y,
method = c("euclidean", "SQeuclidean", "chord",
"SQchord", "bray", "chi.square", "SQchi.square",
"information", "chi.distance", "manhattan",
Deleted: pkg/R/distance2.R
===================================================================
--- pkg/R/distance2.R 2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance2.R 2013-10-05 21:11:52 UTC (rev 368)
@@ -1,418 +0,0 @@
-## `distance2` <- function(x, ...)
-## UseMethod("distance2")
-
-## `distance2.default` <- function(x, y,
-## method = c("euclidean",
-## "SQeuclidean", "chord",
-## "SQchord", "bray", "chi.square",
-## "SQchi.square", "information",
-## "chi.distance", "manhattan",
-## "kendall", "gower", "alt.gower",
-## "mixed"),
-## weights = NULL, R = NULL,
-## type = list(),
-## ordinal = c("gower","rank","metric"), ...) {
-## pColl <- function(n) paste(n, collapse = ", ")
-## ## Euclid?an could be spelled variously
-## if(!is.na(pmatch(method, "euclidian")))
-## method <- "euclidean"
-## if (missing(method))
-## method <- "euclidean"
-## METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
-## "bray", "chi.square", "SQchi.square",
-## "information","chi.distance", "manhattan",
-## "kendall", "gower", "alt.gower", "mixed")
-## method <- match.arg(method)
-## DCOEF <- pmatch(method, METHODS)
-## ordinal <- match.arg(ordinal)
-## ORDTYPES <- c("gower","rank","metric")
-## if(missing(y)) { ## only a single matrix
-## ## TODO
-## } else { ## two matrices
-## ## check x and y have same columns
-## if(!isTRUE(all.equal(names(x), names(y))))
-## stop("'x' and 'y' appear to have different variables.")
-## if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
-## stop("'x' and 'y' have different numbers of columns.")
-## ## variables
-## nrx <- nrow(x)
-## nry <- nrow(y)
-## d <- numeric(length = nrx * nry)
-## ## object names (row names)
-## x.names <- rownames(x)
-## y.names <- rownames(y)
-## ## some preprocessing steps required for some coefs
-## ## so dealt with separately
-## if(method %in% c("chi.distance", "gower", "alt.gower",
-## "mixed", "kendall")) {
-## if(method == "chi.distance") {
-## x <- data.matrix(x)
-## y <- data.matrix(y)
-## csum <- colSums(rbind(x, y))
-## y <- y / rowSums(y)
-## x <- x / rowSums(x)
-## d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
-## nr1 = as.integer(nrx), nr2 = as.integer(nry),
-## nc = as.integer(n.vars), d = as.double(d),
-## csum = as.double(csum), NAOK = as.integer(FALSE),
-## PACKAGE = "analogue")$d
-## }
-## if(method %in% c("gower", "alt.gower", "mixed")) {
-## if(method == "mixed") {
-## if(is.null(weights))
-## weights <- rep(1, n.vars)
-## else {
-## if(length(weights) != n.vars)
-## stop("'weights' must be of length 'ncol(x)'")
-## }
-## ## process vtypes
-## if(length(type)) {
-## ## if 'type's supplied, validate
-## }
-## ## TODO
-## if(is.data.frame(x)) {
-## type2x <- sapply(x, data.class, USE.NAMES = FALSE)
-## ##x <- data.matrix(x)
-## } else {
-## type2x <- rep("numeric", n.vars)
-## names(type2x) <- colnames(x)
-## }
-## if(is.data.frame(y)) {
-## type2y <- sapply(y, data.class, USE.NAMES = FALSE)
-## ##y <- data.matrix(y)
-## } else {
-## type2y <- rep("numeric", n.vars)
-## names(type2y) <- colnames(y)
-## }
-## ## x and y should have same column types
-## if(!isTRUE(all.equal(type2x, type2y)))
-## stop("Variable types in 'x' and 'y' differ.
-## Did you forget to 'join' 'x' and 'y' before calling 'distance'?")
-## type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
-## ## save which are ordinal for rank conversion below
-## type2x[(ordinal <- type2x == "ordered")] <- "O"
-## type2x[type2x == "factor"] <- "N"
-## type2x[type2x == "logical"] <- "A"
-## typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
-## type3 <- match(type2x, typeCodes)
-## if (any(ina <- is.na(type3)))
-## stop("invalid type ", type2x[ina], " for column numbers ",
-## pColl(which(ina)))
-
-## ## Convert to matrices from now on
-## ## also takes care of ordinal == metric as all factors
-## ## are converted to internal numeric codes
-## x <- data.matrix(x)
-## y <- data.matrix(y)
-
-## ## Convert ordinal variables to ranks or numerics
-## ## implemented as per Podani 1999. Only do ranks here as
-## ## conversion to matrices above handled the standard case
-## x[, ordinal] <- apply(x[, ordinal], 2, rank, na.last = "keep")
-## y[, ordinal] <- apply(y[, ordinal], 2, rank, na.last = "keep")
-
-## ## Compute range Rj
-## XY <- rbind(x, y)
-## if(is.null(R)) {
-## maxi <- apply(XY, 2, max, na.rm = TRUE)
-## mini <- apply(XY, 2, min, na.rm = TRUE)
-## R <- maxi - mini
-## } else {
-## if(length(R) != n.vars)
-## stop("'R' must be of length 'ncol(x)'")
-## }
-
-## ## For Ordinal we need TiMin and TiMax
-## ## compute over all variables so they have same length as
-## ## everything else
-## doT <- function(X, which) {
-## val <- if(which == "min") {
-## min(X, na.rm = TRUE)
-## } else {
-## max(X, na.rm = TRUE)
-## }
-## nas <- is.na(X)
-## length(which(X[!nas] == val))
-## }
-## tmin <- apply(XY, 2, doT, which = "min")
-## tmax <- apply(XY, 2, doT, which = "max")
-
-## ## How do we want to handle ordinals - convert to interger code
-## ## for use in C
-## podani <- match(ordinal, ORDTYPES)
-
-## ## call the C code
-## d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
-## nr1 = as.integer(nrx), nr2 = as.integer(nry),
-## nc = as.integer(n.vars), d = as.double(d),
-## vtype = as.integer(type3),
-## weights = as.double(weights), R = as.double(R),
-## tmin = as.integer(tmin), tmax = as.integer(tmax),
-## podani = as.integer(podani),
-## NAOK = as.integer(TRUE),
-## PACKAGE = "analogue")$d
-## } else {
-## if(is.null(R)) {
-## XY <- rbind(x, y)
-## maxi <- apply(XY, 2, max, na.rm = TRUE)
-## mini <- apply(XY, 2, min, na.rm = TRUE)
-## R <- maxi - mini
-## } else {
-## if(length(R) != n.vars)
-## stop("'R' must be of length 'ncol(x)'")
-## }
-## x <- data.matrix(x)
-## y <- data.matrix(y)
-
-## ## pre-process for gower and alt gower
-## ## but these handled by xy_distance below
-## x <- sweep(x, 2, R, "/")
-## y <- sweep(y, 2, R, "/")
-## d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-## nr1 = as.integer(nrx), nr2 = as.integer(nry),
-## nc = as.integer(n.vars), d = as.double(d),
-## method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-## PACKAGE = "analogue")$d
-## }
-## }
-## if(method == "kendall") {
-## x <- data.matrix(x)
-## y <- data.matrix(y)
-## XY <- rbind(x, y)
-## maxi <- apply(XY, 2, max)
-## d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
-## nr1 = as.integer(nrx), nr2 = as.integer(nry),
-## nc = as.integer(n.vars), d = as.double(d),
-## maxi = as.double(maxi), NAOK = as.integer(FALSE),
-## PACKAGE = "analogue")$d
-## }
-## } else {
-## ## must be one of the DC's handled by xy_distance
-## x <- data.matrix(x)
-## y <- data.matrix(y)
-## d <- .C("xy_distance", x = as.double(x), y = as.double(y),
-## nr1 = as.integer(nrx), nr2 = as.integer(nry),
-## nc = as.integer(n.vars), d = as.double(d),
-## method = as.integer(DCOEF), NAOK = as.integer(FALSE),
-## PACKAGE = "analogue")$d
-## }
-## ## convert d to a matrix
-## d <- matrix(d, ncol = n.vars, byrow = TRUE)
-## colnames(d) <- y.names
-## rownames(d) <- x.names
-## attr(d, "method") <- method
-## class(d) <- c("distance","matrix")
-## }
-## return(d)
-## }
-
-## set.seed(1)
-## bar <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foo <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foobar <- rbind(bar, foo)
-## out <- matrix(ncol = ncol(bar), nrow = nrow(foobar))
-## res <- numeric(length = nrow(foobar))
-## for(i in seq_len(nrow(foobar))) {
-## for(j in seq_len(ncol(foobar))) {
-## for(k in seq_along(foobar[,j])) {
-## res[k] <- foobar[k,j] == foobar[i,j]
-## }
-## out[i, j] <- sum(res)
-## }
-## }
-
-## set.seed(1)
-## bar <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## foo <- matrix(sample(3, 9, replace = TRUE), ncol = 3)
-## outbar <- matrix(0, ncol = ncol(bar), nrow = nrow(bar))
-## outfoo <- matrix(0, ncol = ncol(foo), nrow = nrow(foo))
-## resbar <- numeric(length = nrow(bar))# + nrow(foo))
-## resfoo <- numeric(length = nrow(bar))# + nrow(foo))
-
-## for(i in seq_len(ncol(bar))) {
-## for(j in seq_len(nrow(bar))) {
-## for(k in seq_len(nrow(bar))) {
-## resbar[k] <- bar[k, i] == bar[j, i]
-## }
-## outbar[j, i] <- sum(resbar)
-## }
-## for(j in seq_len(nrow(foo))) {
-## for(k in seq_len(nrow(foo))) {
-## resfoo[k] <- foo[k, i] == bar[j, i]
-## }
-## outfoo[j, i] <- sum(resfoo)
-## }
-## }
-
-`distance3` <- function(x, ...)
- UseMethod("distance3")
-
-`distance3.default` <-
- function(x, y, method = "euclidean",
- weights = NULL, R = NULL,
- ...)
-{
- METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
- "bray", "chi.square", "SQchi.square", "information",
- "chi.distance", "manhattan", "kendall", "gower", "alt.gower",
- "mixed")
- pColl <- function(n) paste(n, collapse = ", ")
- ## Euclid?an could be spelled variously
- if(!is.na(pmatch(method, "euclidian")))
- method <- "euclidean"
- METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
- "bray", "chi.square", "SQchi.square",
- "information","chi.distance", "manhattan",
- "kendall", "gower", "alt.gower", "mixed")
- DCOEF <- pmatch(method, METHODS)
- if(missing(y)) { ## only a single matrix
- ## TODO
- } else { ## two matrices
- ## check x and y have same columns
- if(!isTRUE(all.equal(names(x), names(y))))
- stop("'x' and 'y' appear to have different variables.")
- if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
- stop("'x' and 'y' have different numbers of columns.")
- ## variables
- nrx <- nrow(x)
- nry <- nrow(y)
- d <- numeric(length = nrx * nry)
- ## object names (row names)
- x.names <- rownames(x)
- y.names <- rownames(y)
- ## some preprocessing steps required for some coefs
- ## so dealt with separately
- if(method %in% c("chi.distance", "gower", "alt.gower",
- "mixed", "kendall")) {
- if(method == "chi.distance") {
- x <- data.matrix(x)
- y <- data.matrix(y)
- csum <- colSums(rbind(x, y))
- y <- y / rowSums(y)
- x <- x / rowSums(x)
- d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- csum = as.double(csum), NAOK = as.integer(FALSE),
- PACKAGE = "analogue")$d
- }
- if(method %in% c("gower", "alt.gower", "mixed")) {
- if(method == "mixed") {
- if(is.null(weights))
- weights <- rep(1, n.vars)
- else {
- if(length(weights) != n.vars)
- stop("'weights' must be of length 'ncol(x)'")
- }
- ## process vtypes
- if(is.data.frame(x)) {
- type2x <- sapply(x, data.class, USE.NAMES = FALSE)
- ##x <- data.matrix(x)
- } else {
- type2x <- rep("numeric", n.vars)
- names(type2x) <- colnames(x)
- }
- if(is.data.frame(y)) {
- type2y <- sapply(y, data.class, USE.NAMES = FALSE)
- ##y <- data.matrix(y)
- } else {
- type2y <- rep("numeric", n.vars)
- names(type2y) <- colnames(y)
- }
- ## x and y should have same column types
- if(!isTRUE(all.equal(type2x, type2y)))
- stop("Variable types in 'x' and 'y' differ.
-Did you forget to 'join' 'x' and 'y' before calling 'distance'?")
-
- ## Record the variable types
- type2x[tI <- type2x %in% c("numeric", "integer")] <- "Q"
- ## save which are ordinal for rank conversion below - TODO
- type2x[(ordinal <- type2x == "ordered")] <- "O"
- type2x[type2x == "factor"] <- "N"
- type2x[type2x == "logical"] <- "A"
- typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
- type3 <- match(type2x, typeCodes)
- if (any(ina <- is.na(type3)))
- stop("invalid type ", type2x[ina], " for column numbers ",
- pColl(which(ina)))
-
- ## Convert to matrices from now on
- ## also takes care of ordinal == metric as all factors
- ## are converted to internal numeric codes
- x <- data.matrix(x)
- y <- data.matrix(y)
-
- ## Compute range Rj
- XY <- rbind(x, y)
- if(is.null(R)) {
- maxi <- apply(XY, 2, max, na.rm = TRUE)
- mini <- apply(XY, 2, min, na.rm = TRUE)
- R <- maxi - mini
- } else {
- if(length(R) != n.vars)
- stop("'R' must be of length 'ncol(x)'")
- }
-
- ## call the C code
- d <- .C("xy_mixed", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- vtype = as.integer(type3),
- weights = as.double(weights), R = as.double(R),
- NAOK = as.integer(TRUE),
- PACKAGE = "analogue")$d
- } else {
- if(is.null(R)) {
- XY <- rbind(x, y)
- maxi <- apply(XY, 2, max, na.rm = TRUE)
- mini <- apply(XY, 2, min, na.rm = TRUE)
- R <- maxi - mini
- } else {
- if(length(R) != n.vars)
- stop("'R' must be of length 'ncol(x)'")
- }
- x <- data.matrix(x)
- y <- data.matrix(y)
-
- ## pre-process for gower and alt gower
- ## but these handled by xy_distance below
- x <- sweep(x, 2, R, "/")
- y <- sweep(y, 2, R, "/")
- d <- .C("xy_distance", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- method = as.integer(DCOEF), NAOK = as.integer(FALSE),
- PACKAGE = "analogue")$d
- }
- }
- if(method == "kendall") {
- x <- data.matrix(x)
- y <- data.matrix(y)
- XY <- rbind(x, y)
- maxi <- apply(XY, 2, max)
- d <- .C("xy_kendall", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- maxi = as.double(maxi), NAOK = as.integer(FALSE),
- PACKAGE = "analogue")$d
- }
- } else {
- ## must be one of the DC's handled by xy_distance
- x <- data.matrix(x)
- y <- data.matrix(y)
- d <- .C("xy_distance", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- method = as.integer(DCOEF), NAOK = as.integer(FALSE),
- PACKAGE = "analogue")$d
- }
- ## convert d to a matrix
- d <- matrix(d, ncol = nry, byrow = TRUE)
- colnames(d) <- y.names
- rownames(d) <- x.names
- attr(d, "method") <- method
- attr(d, "type") <- "asymmetric"
- class(d) <- c("distance","matrix")
- }
- return(d)
-}
Deleted: pkg/R/distance_c.R
===================================================================
--- pkg/R/distance_c.R 2013-10-05 21:10:47 UTC (rev 367)
+++ pkg/R/distance_c.R 2013-10-05 21:11:52 UTC (rev 368)
@@ -1,110 +0,0 @@
-distanceX <- function(x, y, method = "euclidean", weights = NULL, R = NULL,
- as.dist = FALSE, ...) {
- ## Euclid?an could be spelled variously
- if(!is.na(pmatch(method, "euclidian")))
- method <- "euclidean"
- METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
- "bray", "chi.square", "SQchi.square",
- "information","chi.distance", "manhattan",
- "kendall", "gower", "alt.gower", "mixed")
- DCOEF <- pmatch(method, METHODS)
- if(missing(y)) { ## only a single matrix
- ## variables
- nr <- nrow(x)
- nc <- ncol(x)
- ## object names (row names)
- x.names <- rownames(x)
- ## some preprocessing steps required for some coefs
- ## so dealt with separately
- if(method %in% c("chi.distance", "gower", "alt.gower",
- "mixed", "kendall")) {
- if(method == "chi.distance") {
- x <- data.matrix(x)
- csum <- colSums(x)
- x <- x / rowSums(x)
- d <- .Call("Cchisqdistxx", x, csum, PACKAGE = "analogue")
- }
- if(method == "kendall") {
- x <- data.matrix(x)
- maxi <- apply(x, 2, max)
- d <- .Call("Ckendallxx", x, maxi, PACKAGE = "analogue")
- }
- if(method %in% c("gower", "alt.gower")) {
- if(is.null(R)) {
- x <- data.matrix(x)
- maxi <- apply(x, 2, max, na.rm = TRUE)
- mini <- apply(x, 2, min, na.rm = TRUE)
- R <- maxi - mini
- } else {
- if(length(R) != nc)
- stop("'R' must be of length 'ncol(x)'")
- }
- ## pre-process here for gower and alt.gower
- ## but note we call the main driver Cdistxx
- x <- sweep(x, 2, R, "/")
- d <- .Call("Cdistxx", x, DCOEF, PACKAGE = "analogue")
- }
- } else {
- ## must be one of the DC's handled by xy_distance
- x <- data.matrix(x)
- d <- .Call("Cdistxx", x, DCOEF, PACKAGE = "analogue")
- }
- attr(d, "Size") <- nr
- attr(d, "Labels") <- x.names
- attr(d, "Diag") <- FALSE
- attr(d, "Upper") <- FALSE
- attr(d, "method") <- method
- attr(d, "call") <- match.call()
- class(d) <- "dist"
- if(!as.dist) {
- d <- as.matrix(d)
- attr(d, "method") <- method
- attr(d, "type") <- "symmetric"
- class(d) <- c("distance","matrix")
- }
- } else { ## two matrices
- ## check x and y have same columns
- if(!isTRUE(all.equal(names(x), names(y))))
- stop("'x' and 'y' appear to have different variables.")
- if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
- stop("'x' and 'y' have different numbers of columns.")
- ## variables
- nrx <- nrow(x)
- nry <- nrow(y)
- ## object names (row names)
- x.names <- rownames(x)
- y.names <- rownames(y)
- ## some preprocessing steps required for some coefs
- ## so dealt with separately
- if(method %in% c("chi.distance", "gower", "alt.gower",
- "mixed", "kendall")) {
- if(method == "chi.distance") {
- x <- data.matrix(x)
- y <- data.matrix(y)
- csum <- colSums(rbind(x, y))
- y <- y / rowSums(y)
- x <- x / rowSums(x)
- d <- .C("xy_chisq_dist", x = as.double(x), y = as.double(y),
- nr1 = as.integer(nrx), nr2 = as.integer(nry),
- nc = as.integer(n.vars), d = as.double(d),
- csum = as.double(csum), NAOK = as.integer(FALSE),
- PACKAGE = "analogue")$d
- ##d <- .Call("Cchisqdistxy", x, y, )
- }
- } else {
- ## must be one of the DC's handled by xy_distance
- x <- data.matrix(x)
- y <- data.matrix(y)
- d <- .Call("Cdistxy", x, y, DCOEF, PACKAGE = "analogue")
- }
-
- ## convert d to a matrix
- d <- matrix(d, ncol = nry, byrow = TRUE)
- colnames(d) <- y.names
- rownames(d) <- x.names
- attr(d, "method") <- method
- attr(d, "type") <- "asymmetric"
- class(d) <- c("distance","matrix")
- }
- d
-}
Added: pkg/R/new-distance.R
===================================================================
--- pkg/R/new-distance.R (rev 0)
+++ pkg/R/new-distance.R 2013-10-05 21:11:52 UTC (rev 368)
@@ -0,0 +1,227 @@
+## New distance() generic and methods
+
+`distance` <- function(x, ...) {
+ UseMethod("distance")
+}
+
+distance.join <- function(x, ...) {
+ if(!inherits(x, "join"))
+ stop("This method should only be used on objects of class 'join'")
+ if(inherits(x, "data.frame")) {
+ distance.default(x, ...)
+ } else {
+ if(length(x) != 2)
+ warning("Object contains more than 2 data sets.\n Only the first 2 data sets used")
+ distance.default(x[[1]], x[[2]], ...)
+ }
+}
+
+`distance.default` <- function(x, y, method = "euclidean", weights = NULL,
+ R = NULL, dist = FALSE, ...){
+ ## Euclid?an could be spelled variously
+ if(!is.na(pmatch(method, "euclidian")))
+ method <- "euclidean"
+ METHODS <- c("euclidean", "SQeuclidean", "chord", "SQchord",
+ "bray", "chi.square", "SQchi.square",
+ "information","chi.distance", "manhattan",
+ "kendall", "gower", "alt.gower", "mixed")
+ DCOEF <- pmatch(method, METHODS)
+ if(miss.y <- missing(y)) {
+ dmat <- dxx(x = x, DCOEF = DCOEF, weights = weights,
+ R = R, dist = dist, ...)
+ } else {
+ dmat <- dxy(x = x, y = y, DCOEF = DCOEF, weights = weights,
+ R = R, ...)
+ }
+
+ ## add attributes, classes, and return
+ attr(dmat, "method") <- method
+ if(!dist) {
+ class(dmat) <- c("distance", "matrix")
+ attr(dmat, "type") <- if(miss.y) "symmetric" else "asymmetric"
+ }
+ dmat
+}
+
+## Internal, not exported, function for computing distances when
+## only `x` is available
+`dxx` <- function(x, DCOEF, weights, R, dist = FALSE, ...) {
+ ## variables
+ nr <- nrow(x)
+ nc <- ncol(x)
+ ## object names (row names)
+ x.names <- rownames(x)
+
+ ## allocate storage
+ d <- double(nr * (nr - 1)/2)
+
+ ## some preprocessing steps required for some coefs
+ ## so dealt with separately
+ if(DCOEF %in% c(9L, 11L, 12L, 13L, 14L)) {
+ ## "chi.distance", "gower", "alt.gower","mixed", "kendall"
+ if(DCOEF == 9L) { ## "chi.distance"
+ x <- data.matrix(x)
+ csum <- colSums(x)
+ x <- x / rowSums(x)
+ d <- .C("xx_chisq_dist", x = as.double(x), nr = as.integer(nr),
+ nc = as.integer(nc), d = as.double(d),
+ diag = as.integer(FALSE),
+ csum = as.double(csum), NAOK = as.integer(FALSE),
+ PACKAGE = "analogue")$d
+ }
+ if(DCOEF == 11L) { ## "kendall"
+ x <- data.matrix(x)
+ maxi <- apply(x, 2, max)
+ d <- .C("xx_kendall", x = as.double(x), nr = as.integer(nr),
+ nc = as.integer(nc), d = as.double(d),
+ diag = as.integer(FALSE),
+ maxi = as.double(maxi), NAOK = as.integer(FALSE),
+ PACKAGE = "analogue")$d
+ }
+ if(DCOEF == 14L) { ## "mixed"
+ ## TODO
+ }
+ if(DCOEF %in% c(12L, 13L)) { ## "gower", "alt.gower"
+ if(is.null(R)) {
+ x <- data.matrix(x)
+ maxi <- apply(x, 2, max, na.rm = TRUE)
+ mini <- apply(x, 2, min, na.rm = TRUE)
+ R <- maxi - mini
+ } else {
+ if(length(R) != nc)
+ stop("'R' must be of length 'ncol(x)'")
+ }
+ ## pre-process here for gower and alt.gower
+ ## but note we call the main driver Cdistxx
+ x <- sweep(x, 2, R, "/")
+ d <- .C("xx_distance", x = as.double(x),
+ nr = as.integer(nr), nc = as.integer(nc),
+ d = as.double(d), diag = as.integer(FALSE),
+ method = as.integer(DCOEF),
+ NAOK = as.integer(FALSE),
+ PACKAGE = "analogue")$d
+ }
+ } else {
+ ## must be one of the DC's handled by xy_distance
+ x <- data.matrix(x)
+ d <- .C("xx_distance", x = as.double(x),
+ nr = as.integer(nr), nc = as.integer(nc),
+ d = as.double(d), diag = as.integer(FALSE),
+ method = as.integer(DCOEF),
+ NAOK = as.integer(FALSE), PACKAGE = "analogue")$d
+ }
+
+ ## convert d to a matrix
+ ZAP <- 1e-15
+ d[d < ZAP] <- 0
+ if (any(is.na(d)))
+ warning("missing values in results")
+ attr(d, "Size") <- nr
+ attr(d, "Labels") <- x.names #dimnames(x)[[1]]
+ attr(d, "Diag") <- FALSE
+ attr(d, "Upper") <- FALSE
+ attr(d, "method") <- DCOEF
+ attr(d, "call") <- match.call()
+ class(d) <- "dist"
+
+ ## convert to matrix? Only if dist == FALSE
+ if(!dist) {
+ d <- as.matrix(d)
+ }
+
+ ## return
+ d
+}
+
+## Internal, not exported, function for computing distances when
+## both `x` and `y` are available
+`dxy` <- function(x, y, DCOEF, weights, R, ...) {
+ ## check x and y have same columns
+ if(!isTRUE(all.equal(colnames(x), colnames(y))))
+ stop("'x' and 'y' appear to have different variables.")
+ if(!isTRUE(all.equal((n.vars <- ncol(x)), ncol(y))))
+ stop("'x' and 'y' have different numbers of columns.")
+ ## variables
+ nrx <- nrow(x)
+ nry <- nrow(y)
+ nc <- ncol(x)
+
+ ## object names (row names)
+ x.names <- rownames(x)
+ y.names <- rownames(y)
+
+ ## allocate storage
+ d <- numeric(length = nrx * nry)
+
+ ## some preprocessing steps required for some coefs
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/analogue -r 368
More information about the Analogue-commits
mailing list