[Analogue-commits] r247 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 4 17:27:17 CET 2011
Author: gsimpson
Date: 2011-11-04 17:27:17 +0100 (Fri, 04 Nov 2011)
New Revision: 247
Modified:
pkg/src/distx.c
pkg/src/distxy.c
Log:
separate out the code for MIXED dissimilarity into one that replicates current distance and one that i can use to add extensions for handling ordinal data better.
Modified: pkg/src/distx.c
===================================================================
--- pkg/src/distx.c 2011-11-04 16:26:15 UTC (rev 246)
+++ pkg/src/distx.c 2011-11-04 16:27:17 UTC (rev 247)
@@ -495,15 +495,29 @@
count = 0;
dist = 0.0;
+ wsum = 0.0;
+
for (j=0; j<nc; j++) {
if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
if(vtype[j] == 1) {
dev = (x[i1] == x[i2]) ? 1 : 0;
dist += dev * weights[j];
}
- if(vtype[j] == 2) {
- dev = (x[i1] == x[i2]) ? 1 : 0;
- dist += dev * weights[j];
+ if(vtype[j] == 2) { // Asymmetric binary
+ /*dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j]; */
+ if((x[i1] != 0) || (x[i2] != 0)) {
+ // both x1 and x2 not zero for this variables
+ dev = (x[i1] == x[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ } else {
+ /* set jth current weight to zero and do not
+ increment dist as ignoring double zero
+ We actually subtract the weight as it gets added
+ later on.
+ */
+ wsum -= weights[j];
+ }
}
if(vtype[j] == 3) {
dev = (x[i1] == x[i2]) ? 1 : 0;
@@ -519,6 +533,8 @@
dist += dev * weights[j];
}
count++;
+ // only summing weights for non-NA comparisons
+ wsum += weights[j];
}
i1 += nr;
i2 += nr;
@@ -545,21 +561,21 @@
void xx_mixed(double *x, int *nr, int *nc, double *d,
int *vtype, double *weights, double *R)
{
- int i, j, k, ij;
- double wsum;
-
- wsum = 0.0;
-
- ij = 0;
-
- for(k=0; k <*nc; k++) {
- wsum += weights[k];
- }
-
- for(j=0; j < *nr; j++) {
- for(i=0; i < *nr; i++) {
- d[ij++] = xx_MIXED(x, *nr, *nc, i, j, vtype,
- weights, R, wsum);
+ int i, j, k, ij;
+ double wsum;
+
+ wsum = 0.0;
+
+ ij = 0;
+
+ for(k=0; k <*nc; k++) {
+ wsum += weights[k];
}
- }
+
+ for(j=0; j < *nr; j++) {
+ for(i=0; i < *nr; i++) {
+ d[ij++] = xx_MIXED(x, *nr, *nc, i, j, vtype,
+ weights, R, wsum);
+ }
+ }
}
Modified: pkg/src/distxy.c
===================================================================
--- pkg/src/distxy.c 2011-11-04 16:26:15 UTC (rev 246)
+++ pkg/src/distxy.c 2011-11-04 16:27:17 UTC (rev 247)
@@ -481,9 +481,9 @@
* R : variable range (max - min)
*
*/
-double xy_MIXED(double *x, double *y, int nr1, int nr2,
- int nc, int i1, int i2, int *vtype,
- double *weights, double *R, int tmin, int tmax, int ord)
+double xy_MIXED_new(double *x, double *y, int nr1, int nr2,
+ int nc, int i1, int i2, int *vtype,
+ double *weights, double *R, int tmin, int tmax, int ord)
{
double dist, dev, wsum; //, *curweights;
int count, j;
@@ -561,6 +561,102 @@
if (count == 0) return NA_REAL;
return 1 - (dist / wsum);
}
+/*
+ * Gower's coefficient for mixed data
+ *
+ * Should be called separately from the underlying R code,
+ * not via xy_distance.
+ *
+ * vtype : variable type
+ * 1 == Symmetric Binary
+ * 2 == Asymmetric Binary
+ * 3 == Nominal (class/factor)
+ * 4 == Ordinal (ordered factor)
+ * 5 == Quantitative
+ * weights: variable weights
+ * R : variable range (max - min)
+ *
+ */
+double xy_MIXED(double *x, double *y, int nr1, int nr2,
+ int nc, int i1, int i2, int *vtype,
+ double *weights, double *R, double wsum)
+{
+ double dist, dev;
+ int count, j;
+
+ count = 0;
+ dist = 0.0;
+ wsum = 0.0;
+ //curweights = weights; /* current weights */
+
+ for (j=0; j<nc; j++) {
+ if (R_FINITE(x[i1]) && R_FINITE(y[i2])) {
+ if(vtype[j] == 1) { // Symmetric binary
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ if(vtype[j] == 2) { // Asymmetric binary
+ if((x[i1] != 0) || (y[i2] != 0)) {
+ // both x and y not zero for this variables
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ } else {
+ /* set jth current weight to zero and do not
+ increment dist as ignoring double zero
+ We actually subtract the weight as it gets added
+ later on.
+ */
+ wsum -= weights[j];
+ }
+ }
+ if(vtype[j] == 3) { // Nominal
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ }
+ if(vtype[j] == 4) { // Ordinal
+ /* ordinal data current not handled
+ * so don't call this yet
+ */
+ /* switch(ord) {
+ case 1: { // classic gower as per nominal
+ dev = (x[i1] == y[i2]) ? 1 : 0;
+ dist += dev * weights[j];
+ break;
+ }
+ case 2: { // podanis rank version
+ if(x[i1] == y[i2]) {
+ dev = 1;
+ } else {
+ dev = (fabs(x[i1] - y[i2])) /
+ (R[j] - (tmax - 1)/2 - (tmin - 1)/2);
+ }
+ break;
+ }
+ case 3: { // podanis metric version treat as Quantitative??
+ dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+ dist += dev * weights[j];
+ break;
+ }
+ default: {
+ dist += 0;
+ break;
+ }
+ }*/
+ }
+ if(vtype[j] == 5) { // Quantitative
+ dev = 1 - (fabs(x[i1] - y[i2]) / R[j]);
+ dist += dev * weights[j];
+ }
+ count++;
+ // only summing weights for non-NA comparisons
+ wsum += weights[j];
+ }
+ i1 += nr1;
+ i2 += nr2;
+ }
+ if (count == 0) return NA_REAL;
+ return 1 - (dist / wsum);
+}
double xy_calcTI(double *x, double *y, int nr1, int nr2, int nc, int i1, int i2)
{
@@ -577,10 +673,10 @@
return ti;
}
-void xy_mixed(double *x, double *y, int *nr1, int *nr2,
- int *nc, double *d, int *vtype, double *weights,
- double *R, int tmin, int tmax, int *podani,
- double *tmatx, double *tmaty)
+void xy_mixed_new(double *x, double *y, int *nr1, int *nr2,
+ int *nc, double *d, int *vtype, double *weights,
+ double *R, int tmin, int tmax, int *podani,
+ double *tmatx, double *tmaty)
{
int i, j, ij, ord = *podani;
//double tmat = *d;
@@ -598,8 +694,31 @@
for(j=0; j < *nr1; j++) {
for(i=0; i < *nr2; i++) {
- d[ij++] = xy_MIXED(x, y, *nr1, *nr2, *nc, j, i,
- vtype, weights, R, tmin, tmax, ord);
+ d[ij++] = xy_MIXED_new(x, y, *nr1, *nr2, *nc, j, i,
+ vtype, weights, R, tmin, tmax, ord);
}
}
}
+
+void xy_mixed(double *x, double *y, int *nr1, int *nr2,
+ int *nc, double *d, int *vtype, double *weights,
+ double *R)
+{
+ int i, j, k, ij;
+ double wsum;
+
+ wsum = 0.0;
+
+ ij = 0;
+
+ for(k=0; k <*nc; k++) {
+ wsum += weights[k];
+ }
+
+ for(j=0; j < *nr1; j++) {
+ for(i=0; i < *nr2; i++) {
+ d[ij++] = xy_MIXED(x, y, *nr1, *nr2, *nc, j, i,
+ vtype, weights, R, wsum);
+ }
+ }
+}
More information about the Analogue-commits
mailing list