[Analogue-commits] r371 - in pkg: R man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Oct 7 00:48:37 CEST 2013


Author: gsimpson
Date: 2013-10-07 00:48:37 +0200 (Mon, 07 Oct 2013)
New Revision: 371

Modified:
   pkg/R/distance.R
   pkg/R/new-distance.R
   pkg/man/distance.Rd
   pkg/src/distxy.c
Log:
now Gowers mixed coef also works for case of supplied x and y

Modified: pkg/R/distance.R
===================================================================
--- pkg/R/distance.R	2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/R/distance.R	2013-10-06 22:48:37 UTC (rev 371)
@@ -11,7 +11,7 @@
 ##                                                                       ##
 ###########################################################################
 ## x = training data, y = fossil data
-oldDistance <- function(x, ...) UseMethod("distance")
+oldDistance <- function(x, ...) UseMethod("oldDistance")
 
 oldDistance.join <- function(x, ...)
   {

Modified: pkg/R/new-distance.R
===================================================================
--- pkg/R/new-distance.R	2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/R/new-distance.R	2013-10-06 22:48:37 UTC (rev 371)
@@ -89,7 +89,7 @@
             if(is.data.frame(x)) {
                 xType <- sapply(x, data.class, USE.NAMES = FALSE)
             } else {
-                xType <- rep("numeric", n.vars)
+                xType <- rep("numeric", nc)
                 names(xType) <- colnames(x)
             }
             ## Record the variable types
@@ -102,7 +102,7 @@
             xType <- match(xType, typeCodes)
             if (any(ina <- is.na(xType)))
                 stop("invalid type ", xType[ina], " for column numbers ",
-                     paste(pColl(which(ina)), collapse = ", "))
+                     paste(which(ina), collapse = ", "))
 
             ## convert to matrix, preserving factor info as numeric
             x <- data.matrix(x)
@@ -230,7 +230,74 @@
                     PACKAGE = "analogue")$d
         }
         if(DCOEF == 14L) { ## "mixed"
-            ## TODO
+            if(is.null(weights))
+                weights <- rep(1, nc)
+            else {
+                if(length(weights) != nc)
+                    stop("'weights' must be of length 'ncol(x)'")
+            }
+            ## process vtypes
+            if(is.data.frame(x)) {
+                xType <- sapply(x, data.class, USE.NAMES = FALSE)
+                ##x <- data.matrix(x)
+            } else {
+                xType <- rep("numeric", nc)
+                names(xType) <- colnames(x)
+            }
+            if(is.data.frame(y)) {
+                yType <- sapply(y, data.class, USE.NAMES = FALSE)
+                ##y <- data.matrix(y)
+            } else {
+                yType <- rep("numeric", nc)
+                names(yType) <- colnames(y)
+            }
+            ## x and y should have same column types
+            if(!isTRUE(all.equal(xType, yType)))
+                stop("Variable types in 'x' and 'y' differ.
+Did you forget  to 'join' 'x' and 'y' before calling 'distance'?")
+
+            ## Record the variable types
+            xType[tI <- xType %in% c("numeric", "integer")] <- "Q"
+            ## save which are ordinal for rank conversion below - TODO
+            xType[(ordinal <- xType == "ordered")] <- "O"
+            xType[xType == "factor"] <- "N"
+            xType[xType == "logical"] <- "A"
+            typeCodes <- c("A", "S", "N", "O", "Q", "I", "T")
+            xType <- match(xType, typeCodes)
+            if (any(ina <- is.na(xType)))
+                stop("invalid type ", xType[ina], " for column numbers ",
+                     paste(which(ina), collapse = ", "))
+
+            ## Convert to matrices from now on
+            ## also takes care of ordinal == metric as all factors
+            ## are converted to internal numeric codes
+            x <- data.matrix(x)
+            y <- data.matrix(y)
+
+            ## Compute range Rj
+            XY <- rbind(x, y)
+            if(is.null(R)) {
+                maxi <- apply(XY, 2, max, na.rm = TRUE)
+                mini <- apply(XY, 2, min, na.rm = TRUE)
+                R <- maxi - mini
+            } else {
+                if(length(R) != nc)
+                    stop("'R' must be of length 'ncol(x)'")
+            }
+
+            ## call the C code
+            d <- .C("xy_mixed",
+                    x = as.double(x),
+                    y = as.double(y),
+                    nr1 = as.integer(nrx),
+                    nr2 = as.integer(nry),
+                    nc = as.integer(nc),
+                    d = as.double(d),
+                    vtype = as.integer(xType),
+                    weights = as.double(weights),
+                    R = as.double(R),
+                    NAOK = as.integer(TRUE),
+                    PACKAGE = "analogue")$d
         }
         if(DCOEF %in% c(12L, 13L)) { ## "gower", "alt.gower"
             if(is.null(R)) {

Modified: pkg/man/distance.Rd
===================================================================
--- pkg/man/distance.Rd	2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/man/distance.Rd	2013-10-06 22:48:37 UTC (rev 371)
@@ -238,22 +238,22 @@
 
 ## calculate Gower's general coefficient for mixed data
 ## first, make a couple of variables factors
+
 fossil[,4] <- factor(sample(rep(1:4, length = 10), 10))
 train[,4] <- factor(sample(rep(1:4, length = 20), 20))
 ## now fit the mixed coefficient
-#test3 <- distance(train, fossil, "mixed")
+test3 <- distance(train, fossil, "mixed")
 
 ## Example from page 260 of Legendre & Legendre (1998)
 x1 <- t(c(2,2,NA,2,2,4,2,6))
 x2 <- t(c(1,3,3,1,2,2,2,5))
 Rj <- c(1,4,2,4,1,3,2,5) # supplied ranges
 
-#distance(x1, x2, method = "mixed", R = Rj)
+1 - distance(x1, x2, method = "mixed", R = Rj)
 
-## note this gives 1 - 0.66 (not 0.66 as the answer in
-## Legendre & Legendre) as this is expressed as a
-## distance whereas Legendre & Legendre describe the
-## coefficient as similarity coefficient
+## note this gives ~0.66 as Legendre & Legendre describe the
+## coefficient as a similarity coefficient. Hence here we do
+## 1 - Dij here to get the same answer. 
 }
 \keyword{multivariate}% at least one, from doc/KEYWORDS
 \keyword{methods}

Modified: pkg/src/distxy.c
===================================================================
--- pkg/src/distxy.c	2013-10-06 20:40:32 UTC (rev 370)
+++ pkg/src/distxy.c	2013-10-06 22:48:37 UTC (rev 371)
@@ -587,84 +587,89 @@
 		int nc, int i1, int i2, int *vtype,
 		double *weights, double *R, double wsum)
 {
-    double dist, dev;
-    int count, j;
+  double dist, dev;
+  int count, j;
+  
+  count = 0;
+  dist = 0.0;
+  wsum = 0.0;
+  //curweights = weights; /* current weights */
     
-    count = 0;
-    dist = 0.0;
-    wsum = 0.0;
-    //curweights = weights; /* current weights */
-    
-    for (j=0; j<nc; j++) {
-	if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
-	    if(vtype[j] == 1) { // Symmetric binary
-		dev = (x[i1] == y[i2]) ? 1 : 0;
-		dist += dev * weights[j];
-	    }
-	    if(vtype[j] == 2) { // Asymmetric binary
-		if((x[i1] != 0) || (y[i2] != 0)) {
-		    // both x and y not zero for this variables
-		    dev = (x[i1] == y[i2]) ? 1 : 0;
-		    dist += dev * weights[j];
-		} else {
-		    /* set jth current weight to zero and do not
-		       increment dist as ignoring double zero
-		       We actually subtract the weight as it gets added
-		       later on.
-		    */
-		    wsum -= weights[j];
-		}
-	    }
-	    if(vtype[j] == 3) { // Nominal
-		dev = (x[i1] == y[i2]) ? 1 : 0;
-		dist += dev * weights[j];
-	    }
-	    if(vtype[j] == 4) { // Ordinal
-		dev = (x[i1] == y[i2]) ? 1 : 0;
-		dist += dev * weights[j];
-		break;
-		/* ordinal data current not handled 
-		 * so don't call this yet
-		 */
-		/* switch(ord) {
-		   case 1: { // classic gower as per nominal
-		   dev = (x[i1] == y[i2]) ? 1 : 0;
-		   dist += dev * weights[j];
-		   break;
-		   }
-		   case 2: { // podanis rank version
-		   if(x[i1] == y[i2]) {
-		   dev = 1;
-		   } else {
-		   dev = (fabs(x[i1] - y[i2])) / 
-		   (R[j] - (tmax - 1)/2 - (tmin - 1)/2);
-		   }
-		   break;
-		   }
-		   case 3: { // podanis metric version treat as Quantitative??
-		   dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
-		   dist += dev * weights[j];
-		   break;
-		   }
-		   default: {
-		   dist += 0;
-		   break;
-		   }
-		   }*/
-	    }
-	    if(vtype[j] == 5) { // Quantitative
-		dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
-		dist += dev * weights[j];
-	    }
-	    count++;
-	    // only summing weights for non-NA comparisons
-	    wsum += weights[j];
+  for (j=0; j<nc; j++) {
+    if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
+      // Symmetric binary
+      if(vtype[j] == 1) {
+	dev = (x[i1] == y[i2]) ? 1 : 0;
+	dist += dev * weights[j];
+      }
+      // Asymmetric binary
+      if(vtype[j] == 2) {
+	if((x[i1] != 0) || (y[i2] != 0)) {
+	  // both x and y not zero for this variables
+	  dev = (x[i1] == y[i2]) ? 1 : 0;
+	  dist += dev * weights[j];
+	} else {
+	  /* set jth current weight to zero and do not
+	     increment dist as ignoring double zero
+	     We actually subtract the weight as it gets added
+	     later on.
+	  */
+	  wsum -= weights[j];
 	}
-	i1 += nr1;
-	i2 += nr2;
+      }
+      // Nominal
+      if(vtype[j] == 3) {
+	dev = (x[i1] == y[i2]) ? 1 : 0;
+	dist += dev * weights[j];
+      }
+      // Ordinal
+      if(vtype[j] == 4) {
+	dev = (x[i1] == y[i2]) ? 1 : 0;
+	dist += dev * weights[j];
+	break;
+	/* ordinal data current not handled 
+	 * so don't call this yet
+	 */
+	/* switch(ord) {
+	   case 1: { // classic gower as per nominal
+	   dev = (x[i1] == y[i2]) ? 1 : 0;
+	   dist += dev * weights[j];
+	   break;
+	   }
+	   case 2: { // podanis rank version
+	   if(x[i1] == y[i2]) {
+	   dev = 1;
+	   } else {
+	   dev = (fabs(x[i1] - y[i2])) / 
+	   (R[j] - (tmax - 1)/2 - (tmin - 1)/2);
+	   }
+	   break;
+	   }
+	   case 3: { // podanis metric version treat as Quantitative??
+	   dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+	   dist += dev * weights[j];
+	   break;
+	   }
+	   default: {
+	   dist += 0;
+	   break;
+	   }
+	   }*/
+      }
+      // Quantitative
+      if(vtype[j] == 5) {
+	dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+	dist += dev * weights[j];
+      }
+      count++;
+      // only summing weights for non-NA comparisons
+      wsum += weights[j];
     }
-    if (count == 0) return NA_REAL;
-    return 1 - (dist / wsum);
+    i1 += nr1;
+    i2 += nr2;
+  }
+  if (count == 0) return NA_REAL;
+  return 1 - (dist / wsum);
 }
 
 double xy_calcTI(double *x, double *y, int nr1, int nr2, int nc, int i1, int i2)



More information about the Analogue-commits mailing list