[Analogue-commits] r369 - in pkg: R src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Oct 6 22:32:05 CEST 2013
Author: gsimpson
Date: 2013-10-06 22:32:05 +0200 (Sun, 06 Oct 2013)
New Revision: 369
Modified:
pkg/R/new-distance.R
pkg/src/distx.c
Log:
Gowers mixed coef now implemented for the x-only case
Modified: pkg/R/new-distance.R
===================================================================
--- pkg/R/new-distance.R 2013-10-05 21:11:52 UTC (rev 368)
+++ pkg/R/new-distance.R 2013-10-06 20:32:05 UTC (rev 369)
@@ -79,7 +79,56 @@
PACKAGE = "analogue")$d
}
if(DCOEF == 14L) { ## "mixed"
- ## TODO
+ if(is.null(weights))
+ weights <- rep(1, nc)
+ else {
+ if(length(weights) != nc)
+ stop("'weights' must be of length 'ncol(x)'")
+ }
+ ## process vtypes
+ if(is.data.frame(x)) {
+ xType <- sapply(x, data.class, USE.NAMES = FALSE)
+ } else {
+ xType <- rep("numeric", n.vars)
+ names(xType) <- colnames(x)
+ }
+ ## Record the variable types
+ xType[tI <- xType %in% c("numeric", "integer")] <- "Q"
+ ## save which are ordinal for rank conversion below - TODO
+ xType[(ordinal <- xType == "ordered")] <- "O"
+ xType[xType == "factor"] <- "N"
+ xType[xType == "logical"] <- "A"
+ typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
+ xType <- match(xType, typeCodes)
+ if (any(ina <- is.na(xType)))
+ stop("invalid type ", xType[ina], " for column numbers ",
+ paste(pColl(which(ina)), collapse = ", "))
+
+ ## convert to matrix, preserving factor info as numeric
+ x <- data.matrix(x)
+
+ ## Compute range Rj
+ if(is.null(R)) {
+ maxi <- apply(x, 2, max, na.rm = TRUE)
+ mini <- apply(x, 2, min, na.rm = TRUE)
+ R <- maxi - mini
+ } else {
+ if(length(R) != nc)
+ stop("'R' must be of length 'ncol(x)'")
+ }
+
+ ## call the C code
+ d <- .C("xx_mixed",
+ x = as.double(x),
+ nr = as.integer(nr),
+ nc = as.integer(nc),
+ d = as.double(d),
+ diag = as.integer(FALSE),
+ vtype = as.integer(xType),
+ weights = as.double(weights),
+ R = as.double(R),
+ NAOK = as.integer(TRUE),
+ PACKAGE = "analogue")$d
}
if(DCOEF %in% c(12L, 13L)) { ## "gower", "alt.gower"
if(is.null(R)) {
Modified: pkg/src/distx.c
===================================================================
--- pkg/src/distx.c 2013-10-05 21:11:52 UTC (rev 368)
+++ pkg/src/distx.c 2013-10-06 20:32:05 UTC (rev 369)
@@ -496,74 +496,76 @@
int *vtype, double *weights, double *R,
double wsum)
{
- double dist, dev;
- int count, j;
-
- count = 0;
- dist = 0.0;
- wsum = 0.0;
-
- for (j=0; j<nc; j++) {
- if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
- if(vtype[j] == 1) {
- dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j];
- }
- if(vtype[j] == 2) { // Asymmetric binary
- /*dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j]; */
- if((x[i1] != 0) || (x[i2] != 0)) {
- // both x1 and x2 not zero for this variables
- dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j];
- } else {
- /* set jth current weight to zero and do not
- increment dist as ignoring double zero
- We actually subtract the weight as it gets added
- later on.
- */
- wsum -= weights[j];
- }
- }
- if(vtype[j] == 3) { // Nominal
- dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j];
- }
- if(vtype[j] == 4) { // Ordinal
- /* ordinal data currently handled as Nominal */
- dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j];
- break;
- }
- if(vtype[j] == 5) {
- dev = 1 - (fabs(x[i1] - x[i2]) / R[j]);
- dist += dev * weights[j];
- }
- count++;
- // only summing weights for non-NA comparisons
- wsum += weights[j];
+ double dist, dev;
+ int count, j;
+
+ count = 0;
+ dist = 0.0;
+ wsum = 0.0;
+
+ for (j=0; j<nc; j++) {
+ if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
+ if(vtype[j] == 1) {
+ dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ if(vtype[j] == 2) { // Asymmetric binary
+ /*dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j]; */
+ if((x[i1] != 0) || (x[i2] != 0)) {
+ // both x1 and x2 not zero for this variables
+ dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ } else {
+ /* set jth current weight to zero and do not
+ increment dist as ignoring double zero
+ We actually subtract the weight as it gets added
+ later on.
+ */
+ wsum -= weights[j];
}
- i1 += nr;
- i2 += nr;
+ }
+ if(vtype[j] == 3) { // Nominal
+ dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ if(vtype[j] == 4) { // Ordinal
+ /* ordinal data currently handled as Nominal */
+ dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ break;
+ }
+ if(vtype[j] == 5) {
+ dev = 1 - (fabs(x[i1] - x[i2]) / R[j]);
+ dist += dev * weights[j];
+ }
+ count++;
+ // only summing weights for non-NA comparisons
+ wsum += weights[j];
}
- if (count == 0) return NA_REAL;
- return 1 - (dist / wsum);
+ i1 += nr;
+ i2 += nr;
+ }
+ if (count == 0) return NA_REAL;
+ return 1 - (dist / wsum);
}
+/*
double xx_calcTI(double *x, double *y, int nr1, int nr2, int nc, int i1, int i2)
{
- int k;
- double ti;
+int k;
+double ti;
- ti = 0.0;
+ti = 0.0;
- for (k=0; k<nc; k++) {
- ti += (x[i1] == y[i2]) ? 1.0 : 0.0;
- i1 += nr1;
- i2 += nr2;
- }
- return ti;
+for (k=0; k<nc; k++) {
+ti += (x[i1] == y[i2]) ? 1.0 : 0.0;
+i1 += nr1;
+i2 += nr2;
}
+return ti;
+}
+*/
void xx_mixed(double *x, int *nr, int *nc, double *d,
int *diag, int *vtype, double *weights, double *R)
More information about the Analogue-commits
mailing list