[Vegan-commits] r1051 - in pkg/vegan: . inst src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Oct 24 13:07:43 CEST 2009


Author: jarioksa
Date: 2009-10-24 13:07:37 +0200 (Sat, 24 Oct 2009)
New Revision: 1051

Modified:
   pkg/vegan/DESCRIPTION
   pkg/vegan/inst/ChangeLog
   pkg/vegan/src/vegdist.c
Log:
C code for Gower index skipping double zeros (no R level call for this yet... or ever)

Modified: pkg/vegan/DESCRIPTION
===================================================================
--- pkg/vegan/DESCRIPTION	2009-10-23 14:03:44 UTC (rev 1050)
+++ pkg/vegan/DESCRIPTION	2009-10-24 11:07:37 UTC (rev 1051)
@@ -1,7 +1,7 @@
 Package: vegan
 Title: Community Ecology Package
-Version: 1.16-31
-Date: October 3, 2009
+Version: 1.16-32
+Date: October 24, 2009
 Author: Jari Oksanen, Roeland Kindt, Pierre Legendre, Bob O'Hara, Gavin L. Simpson, 
    Peter Solymos, M. Henry H. Stevens, Helene Wagner  
 Maintainer: Jari Oksanen <jari.oksanen at oulu.fi>

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2009-10-23 14:03:44 UTC (rev 1050)
+++ pkg/vegan/inst/ChangeLog	2009-10-24 11:07:37 UTC (rev 1051)
@@ -2,8 +2,16 @@
 
 VEGAN DEVEL VERSIONS at http://r-forge.r-project.org/
 
-Version 1.16-31 (opened October 3, 2009)
+Version 1.16-32 (opened October 24, 2009)
 
+	* vegdist: C code for an alternative version of Gower Index that
+	skips double zeros. No R level interface yet. If this function is
+	to be used from vegdist.R, it should be added as item 14 in
+	'method' list, and the R level preprocessing should be done
+	similarly as for "gower" (method choice 6).
+	
+Version 1.16-31 (closed October 24, 2009)
+
 	* ordisurf: removes observation with NA in scores (as result of
 	na.action) or in predicted variable.
 

Modified: pkg/vegan/src/vegdist.c
===================================================================
--- pkg/vegan/src/vegdist.c	2009-10-23 14:03:44 UTC (rev 1050)
+++ pkg/vegan/src/vegdist.c	2009-10-24 11:07:37 UTC (rev 1051)
@@ -41,6 +41,7 @@
 #define RAUP 11
 #define MILLAR 12
 #define CHAO 13
+#define GOWERDZ 14
 #define MATCHING 50
 #define NOSHARED 99
 
@@ -69,10 +70,8 @@
 
 /* Gower is like Manhattan, but data were standardized to range 0..1
  * for rows before call and dist is divided by the number of non-zero
- * pairs.  There is an alternative implementation in cluster package.
- * That can handle mixed data with factors, but won't handle mere
- * numeric variables.  Some extra manipulations are needed in the
- * calling R function.
+ * pairs. There is an alternative implementation in cluster package.
+ * Some extra manipulations are needed in the calling R function.
  */
 
 double veg_gower(double *x, int nr, int nc, int i1, int i2)
@@ -95,6 +94,35 @@
      return dist;
 }
 
+/* Identical to veg_gower except skipping count update for double
+ * zeros.  Gower 1971 proposed skipping double zeros for
+ * presence/absence data (but not for quantitative data), and many
+ * people think this should always be done in Gower distance. With
+ * presence/absence data this gives Jaccard of binary data.
+ */
+
+double veg_gowerDZ(double *x, int nr, int nc, int i1, int i2)
+{
+     double dist;
+     int count, j;
+  
+     dist = 0.0;
+     count = 0;
+     for (j=0; j<nc; j++) {
+	  if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
+	      if (x[i1] > 0 || x[i2] > 0) {
+		  dist += fabs( x[i1] - x[i2] );
+		  count++;
+	      }
+	  }
+	  i1 += nr;
+	  i2 += nr;
+     }
+     if (count == 0) dist = NA_REAL;
+     dist /= (double) count;
+     return dist;
+}
+
 /* Euclidean distance: duplicates base R */
 
 double veg_euclidean(double *x, int nr, int nc, int i1, int i2)
@@ -594,9 +622,12 @@
     case CHAO:
 	distfun = veg_chao;
 	break;
+    case GOWERDZ:
+	distfun = veg_gowerDZ;
+	break;
     case MATCHING:
-	 distfun = veg_matching;
-	 break;
+	distfun = veg_matching;
+	break;
     case NOSHARED:
 	distfun = veg_noshared;
 	break;	 



More information about the Vegan-commits mailing list