[Analogue-commits] r247 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Nov 4 17:27:17 CET 2011


Author: gsimpson
Date: 2011-11-04 17:27:17 +0100 (Fri, 04 Nov 2011)
New Revision: 247

Modified:
   pkg/src/distx.c
   pkg/src/distxy.c
Log:
separate out the code for MIXED dissimilarity into one that replicates current distance and one that i can use to add extensions for handling ordinal data better.

Modified: pkg/src/distx.c
===================================================================
--- pkg/src/distx.c	2011-11-04 16:26:15 UTC (rev 246)
+++ pkg/src/distx.c	2011-11-04 16:27:17 UTC (rev 247)
@@ -495,15 +495,29 @@
     
     count = 0;
     dist = 0.0;
+    wsum = 0.0;
+
     for (j=0; j<nc; j++) {
 	if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
 	    if(vtype[j] == 1) {
 		dev = (x[i1] == x[i2]) ? 1 : 0;
 		dist += dev * weights[j];
 	    }
-	    if(vtype[j] == 2) {
-		dev = (x[i1] == x[i2]) ? 1 : 0;
-		dist += dev * weights[j];
+	    if(vtype[j] == 2) { // Asymmetric binary
+		    /*dev = (x[i1] == x[i2]) ? 1 : 0;
+		      dist += dev * weights[j]; */
+		    if((x[i1] != 0) || (x[i2] != 0)) {
+			    // both x1 and x2 not zero for this variables
+			    dev = (x[i1] == x[i2]) ? 1 : 0;
+			    dist += dev * weights[j];
+		    } else {
+			    /* set jth current weight to zero and do not
+			       increment dist as ignoring double zero
+			       We actually subtract the weight as it gets added
+			       later on.
+			    */
+			    wsum -= weights[j];
+		    }
 	    }
 	    if(vtype[j] == 3) {
 		dev = (x[i1] == x[i2]) ? 1 : 0;
@@ -519,6 +533,8 @@
 		dist += dev * weights[j];
 	    }
 	    count++;
+	    // only summing weights for non-NA comparisons
+	    wsum += weights[j];
 	}
 	i1 += nr;
 	i2 += nr;
@@ -545,21 +561,21 @@
 void xx_mixed(double *x, int *nr, int *nc, double *d, 
 	      int *vtype, double *weights, double *R)
 {
-    int i, j, k, ij;
-    double wsum;
-
-    wsum = 0.0;
-    
-    ij = 0;
-
-    for(k=0; k <*nc; k++) {
-	wsum += weights[k];
-    }
-
-    for(j=0; j < *nr; j++) {
-	for(i=0; i < *nr; i++) {
-	    d[ij++] = xx_MIXED(x, *nr, *nc, i, j, vtype,
-			       weights, R, wsum);
+	int i, j, k, ij;
+	double wsum;
+	
+	wsum = 0.0;
+	
+	ij = 0;
+	
+	for(k=0; k <*nc; k++) {
+		wsum += weights[k];
 	}
-    }
+	
+	for(j=0; j < *nr; j++) {
+		for(i=0; i < *nr; i++) {
+			d[ij++] = xx_MIXED(x, *nr, *nc, i, j, vtype,
+					   weights, R, wsum);
+		}
+	}
 }

Modified: pkg/src/distxy.c
===================================================================
--- pkg/src/distxy.c	2011-11-04 16:26:15 UTC (rev 246)
+++ pkg/src/distxy.c	2011-11-04 16:27:17 UTC (rev 247)
@@ -481,9 +481,9 @@
  * R      : variable range (max - min)
  *
  */
-double xy_MIXED(double *x, double *y, int nr1, int nr2, 
-		int nc, int i1, int i2, int *vtype,
-		double *weights, double *R, int tmin, int tmax, int ord)
+double xy_MIXED_new(double *x, double *y, int nr1, int nr2, 
+		    int nc, int i1, int i2, int *vtype,
+		    double *weights, double *R, int tmin, int tmax, int ord)
 {
     double dist, dev, wsum; //, *curweights;
     int count, j;
@@ -561,6 +561,102 @@
     if (count == 0) return NA_REAL;
     return 1 - (dist / wsum);
 }
+/*
+ * Gower's coefficient for mixed data
+ *
+ * Should be called separately from the underlying R code,
+ * not via xy_distance.
+ *
+ * vtype  : variable type
+ *          1 == Symmetric Binary
+ *          2 == Asymmetric Binary
+ *          3 == Nominal (class/factor)
+ *          4 == Ordinal (ordered factor)
+ *          5 == Quantitative
+ * weights: variable weights
+ * R      : variable range (max - min)
+ *
+ */
+double xy_MIXED(double *x, double *y, int nr1, int nr2, 
+		int nc, int i1, int i2, int *vtype,
+		double *weights, double *R, double wsum)
+{
+	double dist, dev;
+	int count, j;
+	
+	count = 0;
+	dist = 0.0;
+	wsum = 0.0;
+	//curweights = weights; /* current weights */
+	
+	for (j=0; j<nc; j++) {
+		if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
+			if(vtype[j] == 1) { // Symmetric binary
+				dev = (x[i1] == y[i2]) ? 1 : 0;
+				dist += dev * weights[j];
+			}
+			if(vtype[j] == 2) { // Asymmetric binary
+				if((x[i1] != 0) || (y[i2] != 0)) {
+					// both x and y not zero for this variables
+					dev = (x[i1] == y[i2]) ? 1 : 0;
+					dist += dev * weights[j];
+				} else {
+					/* set jth current weight to zero and do not
+					   increment dist as ignoring double zero
+					   We actually subtract the weight as it gets added
+					   later on.
+					*/
+					wsum -= weights[j];
+				}
+			}
+			if(vtype[j] == 3) { // Nominal
+				dev = (x[i1] == y[i2]) ? 1 : 0;
+				dist += dev * weights[j];
+			}
+			if(vtype[j] == 4) { // Ordinal
+				/* ordinal data current not handled 
+				 * so don't call this yet
+				 */
+				/* switch(ord) {
+				case 1: { // classic gower as per nominal
+					dev = (x[i1] == y[i2]) ? 1 : 0;
+					dist += dev * weights[j];
+					break;
+				}
+				case 2: { // podanis rank version
+					if(x[i1] == y[i2]) {
+						dev = 1;
+					} else {
+						dev = (fabs(x[i1] - y[i2])) / 
+							(R[j] - (tmax - 1)/2 - (tmin - 1)/2);
+					}
+					break;
+				}
+				case 3: { // podanis metric version treat as Quantitative??
+					dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+					dist += dev * weights[j];
+					break;
+				}
+				default: {
+					dist += 0;
+					break;
+				}
+				}*/
+			}
+			if(vtype[j] == 5) { // Quantitative
+				dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+				dist += dev * weights[j];
+			}
+			count++;
+			// only summing weights for non-NA comparisons
+			wsum += weights[j];
+		}
+		i1 += nr1;
+		i2 += nr2;
+	}
+	if (count == 0) return NA_REAL;
+	return 1 - (dist / wsum);
+}
 
 double xy_calcTI(double *x, double *y, int nr1, int nr2, int nc, int i1, int i2)
 {
@@ -577,10 +673,10 @@
     return ti;
 }
 
-void xy_mixed(double *x, double *y, int *nr1, int *nr2,
-	      int *nc, double *d, int *vtype, double *weights, 
-	      double *R, int tmin, int tmax, int *podani, 
-	      double *tmatx, double *tmaty)
+void xy_mixed_new(double *x, double *y, int *nr1, int *nr2,
+		  int *nc, double *d, int *vtype, double *weights, 
+		  double *R, int tmin, int tmax, int *podani, 
+		  double *tmatx, double *tmaty)
 {
     int i, j, ij, ord = *podani;
     //double tmat = *d;
@@ -598,8 +694,31 @@
 
     for(j=0; j < *nr1; j++) {
 	for(i=0; i < *nr2; i++) {
-	    d[ij++] = xy_MIXED(x, y, *nr1, *nr2, *nc, j, i,
-			       vtype, weights, R, tmin, tmax, ord);
+	    d[ij++] = xy_MIXED_new(x, y, *nr1, *nr2, *nc, j, i,
+				   vtype, weights, R, tmin, tmax, ord);
 	}
     }
 }
+
+void xy_mixed(double *x, double *y, int *nr1, int *nr2,
+	      int *nc, double *d, int *vtype, double *weights, 
+	      double *R)
+{
+	int i, j, k, ij;
+	double wsum;
+	
+	wsum = 0.0;
+	
+	ij = 0;
+	
+	for(k=0; k <*nc; k++) {
+		wsum += weights[k];
+	}
+	
+	for(j=0; j < *nr1; j++) {
+		for(i=0; i < *nr2; i++) {
+			d[ij++] = xy_MIXED(x, y, *nr1, *nr2, *nc, j, i,
+					   vtype, weights, R, wsum);
+		}
+	}
+}



More information about the Analogue-commits mailing list