[Analogue-commits] r371 - in pkg: R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Oct 7 00:48:37 CEST 2013
Author: gsimpson
Date: 2013-10-07 00:48:37 +0200 (Mon, 07 Oct 2013)
New Revision: 371
Modified:
pkg/R/distance.R
pkg/R/new-distance.R
pkg/man/distance.Rd
pkg/src/distxy.c
Log:
now Gowers mixed coef also works for case of supplied x and y
Modified: pkg/R/distance.R
===================================================================
--- pkg/R/distance.R 2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/R/distance.R 2013-10-06 22:48:37 UTC (rev 371)
@@ -11,7 +11,7 @@
## ##
###########################################################################
## x = training data, y = fossil data
-oldDistance <- function(x, ...) UseMethod("distance")
+oldDistance <- function(x, ...) UseMethod("oldDistance")
oldDistance.join <- function(x, ...)
{
Modified: pkg/R/new-distance.R
===================================================================
--- pkg/R/new-distance.R 2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/R/new-distance.R 2013-10-06 22:48:37 UTC (rev 371)
@@ -89,7 +89,7 @@
if(is.data.frame(x)) {
xType <- sapply(x, data.class, USE.NAMES = FALSE)
} else {
- xType <- rep("numeric", n.vars)
+ xType <- rep("numeric", nc)
names(xType) <- colnames(x)
}
## Record the variable types
@@ -102,7 +102,7 @@
xType <- match(xType, typeCodes)
if (any(ina <- is.na(xType)))
stop("invalid type ", xType[ina], " for column numbers ",
- paste(pColl(which(ina)), collapse = ", "))
+ paste(which(ina), collapse = ", "))
## convert to matrix, preserving factor info as numeric
x <- data.matrix(x)
@@ -230,7 +230,74 @@
PACKAGE = "analogue")$d
}
if(DCOEF == 14L) { ## "mixed"
- ## TODO
+ if(is.null(weights))
+ weights <- rep(1, nc)
+ else {
+ if(length(weights) != nc)
+ stop("'weights' must be of length 'ncol(x)'")
+ }
+ ## process vtypes
+ if(is.data.frame(x)) {
+ xType <- sapply(x, data.class, USE.NAMES = FALSE)
+ ##x <- data.matrix(x)
+ } else {
+ xType <- rep("numeric", nc)
+ names(xType) <- colnames(x)
+ }
+ if(is.data.frame(y)) {
+ yType <- sapply(y, data.class, USE.NAMES = FALSE)
+ ##y <- data.matrix(y)
+ } else {
+ yType <- rep("numeric", nc)
+ names(yType) <- colnames(y)
+ }
+ ## x and y should have same column types
+ if(!isTRUE(all.equal(xType, yType)))
+ stop("Variable types in 'x' and 'y' differ.
+Did you forget to 'join' 'x' and 'y' before calling 'distance'?")
+
+ ## Record the variable types
+ xType[tI <- xType %in% c("numeric", "integer")] <- "Q"
+ ## save which are ordinal for rank conversion below - TODO
+ xType[(ordinal <- xType == "ordered")] <- "O"
+ xType[xType == "factor"] <- "N"
+ xType[xType == "logical"] <- "A"
+ typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
+ xType <- match(xType, typeCodes)
+ if (any(ina <- is.na(xType)))
+ stop("invalid type ", xType[ina], " for column numbers ",
+ paste(which(ina), collapse = ", "))
+
+ ## Convert to matrices from now on
+ ## also takes care of ordinal == metric as all factors
+ ## are converted to internal numeric codes
+ x <- data.matrix(x)
+ y <- data.matrix(y)
+
+ ## Compute range Rj
+ XY <- rbind(x, y)
+ if(is.null(R)) {
+ maxi <- apply(XY, 2, max, na.rm = TRUE)
+ mini <- apply(XY, 2, min, na.rm = TRUE)
+ R <- maxi - mini
+ } else {
+ if(length(R) != nc)
+ stop("'R' must be of length 'ncol(x)'")
+ }
+
+ ## call the C code
+ d <- .C("xy_mixed",
+ x = as.double(x),
+ y = as.double(y),
+ nr1 = as.integer(nrx),
+ nr2 = as.integer(nry),
+ nc = as.integer(nc),
+ d = as.double(d),
+ vtype = as.integer(xType),
+ weights = as.double(weights),
+ R = as.double(R),
+ NAOK = as.integer(TRUE),
+ PACKAGE = "analogue")$d
}
if(DCOEF %in% c(12L, 13L)) { ## "gower", "alt.gower"
if(is.null(R)) {
Modified: pkg/man/distance.Rd
===================================================================
--- pkg/man/distance.Rd 2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/man/distance.Rd 2013-10-06 22:48:37 UTC (rev 371)
@@ -238,22 +238,22 @@
## calculate Gower's general coefficient for mixed data
## first, make a couple of variables factors
+
fossil[,4] <- factor(sample(rep(1:4, length = 10), 10))
train[,4] <- factor(sample(rep(1:4, length = 20), 20))
## now fit the mixed coefficient
-#test3 <- distance(train, fossil, "mixed")
+test3 <- distance(train, fossil, "mixed")
## Example from page 260 of Legendre & Legendre (1998)
x1 <- t(c(2,2,NA,2,2,4,2,6))
x2 <- t(c(1,3,3,1,2,2,2,5))
Rj <- c(1,4,2,4,1,3,2,5) # supplied ranges
-#distance(x1, x2, method = "mixed", R = Rj)
+1 - distance(x1, x2, method = "mixed", R = Rj)
-## note this gives 1 - 0.66 (not 0.66 as the answer in
-## Legendre & Legendre) as this is expressed as a
-## distance whereas Legendre & Legendre describe the
-## coefficient as similarity coefficient
+## note this gives ~0.66 as Legendre & Legendre describe the
+## coefficient as a similarity coefficient. Hence here we do
+## 1 - Dij here to get the same answer.
}
\keyword{multivariate}% at least one, from doc/KEYWORDS
\keyword{methods}
Modified: pkg/src/distxy.c
===================================================================
--- pkg/src/distxy.c 2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/src/distxy.c 2013-10-06 22:48:37 UTC (rev 371)
@@ -587,84 +587,89 @@
int nc, int i1, int i2, int *vtype,
double *weights, double *R, double wsum)
{
- double dist, dev;
- int count, j;
+ double dist, dev;
+ int count, j;
+
+ count = 0;
+ dist = 0.0;
+ wsum = 0.0;
+ //curweights = weights; /* current weights */
- count = 0;
- dist = 0.0;
- wsum = 0.0;
- //curweights = weights; /* current weights */
-
- for (j=0; j<nc; j++) {
- if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
- if(vtype[j] == 1) { // Symmetric binary
- dev = (x[i1] == y[i2]) ? 1 : 0;
- dist += dev * weights[j];
- }
- if(vtype[j] == 2) { // Asymmetric binary
- if((x[i1] != 0) || (y[i2] != 0)) {
- // both x and y not zero for this variables
- dev = (x[i1] == y[i2]) ? 1 : 0;
- dist += dev * weights[j];
- } else {
- /* set jth current weight to zero and do not
- increment dist as ignoring double zero
- We actually subtract the weight as it gets added
- later on.
- */
- wsum -= weights[j];
- }
- }
- if(vtype[j] == 3) { // Nominal
- dev = (x[i1] == y[i2]) ? 1 : 0;
- dist += dev * weights[j];
- }
- if(vtype[j] == 4) { // Ordinal
- dev = (x[i1] == y[i2]) ? 1 : 0;
- dist += dev * weights[j];
- break;
- /* ordinal data current not handled
- * so don't call this yet
- */
- /* switch(ord) {
- case 1: { // classic gower as per nominal
- dev = (x[i1] == y[i2]) ? 1 : 0;
- dist += dev * weights[j];
- break;
- }
- case 2: { // podanis rank version
- if(x[i1] == y[i2]) {
- dev = 1;
- } else {
- dev = (fabs(x[i1] - y[i2])) /
- (R[j] - (tmax - 1)/2 - (tmin - 1)/2);
- }
- break;
- }
- case 3: { // podanis metric version treat as Quantitative??
- dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
- dist += dev * weights[j];
- break;
- }
- default: {
- dist += 0;
- break;
- }
- }*/
- }
- if(vtype[j] == 5) { // Quantitative
- dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
- dist += dev * weights[j];
- }
- count++;
- // only summing weights for non-NA comparisons
- wsum += weights[j];
+ for (j=0; j<nc; j++) {
+ if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
+ // Symmetric binary
+ if(vtype[j] == 1) {
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ // Asymmetric binary
+ if(vtype[j] == 2) {
+ if((x[i1] != 0) || (y[i2] != 0)) {
+ // both x and y not zero for this variables
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ } else {
+ /* set jth current weight to zero and do not
+ increment dist as ignoring double zero
+ We actually subtract the weight as it gets added
+ later on.
+ */
+ wsum -= weights[j];
}
- i1 += nr1;
- i2 += nr2;
+ }
+ // Nominal
+ if(vtype[j] == 3) {
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ // Ordinal
+ if(vtype[j] == 4) {
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ break;
+ /* ordinal data current not handled
+ * so don't call this yet
+ */
+ /* switch(ord) {
+ case 1: { // classic gower as per nominal
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ break;
+ }
+ case 2: { // podanis rank version
+ if(x[i1] == y[i2]) {
+ dev = 1;
+ } else {
+ dev = (fabs(x[i1] - y[i2])) /
+ (R[j] - (tmax - 1)/2 - (tmin - 1)/2);
+ }
+ break;
+ }
+ case 3: { // podanis metric version treat as Quantitative??
+ dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+ dist += dev * weights[j];
+ break;
+ }
+ default: {
+ dist += 0;
+ break;
+ }
+ }*/
+ }
+ // Quantitative
+ if(vtype[j] == 5) {
+ dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+ dist += dev * weights[j];
+ }
+ count++;
+ // only summing weights for non-NA comparisons
+ wsum += weights[j];
}
- if (count == 0) return NA_REAL;
- return 1 - (dist / wsum);
+ i1 += nr1;
+ i2 += nr2;
+ }
+ if (count == 0) return NA_REAL;
+ return 1 - (dist / wsum);
}
double xy_calcTI(double *x, double *y, int nr1, int nr2, int nc, int i1, int i2)
More information about the Analogue-commits
mailing list