[Analogue-commits] r364 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Oct 1 07:24:36 CEST 2013


Author: gsimpson
Date: 2013-10-01 07:24:36 +0200 (Tue, 01 Oct 2013)
New Revision: 364

Modified:
   pkg/R/distance.R
Log:
rearrange some of the code, simplify

Modified: pkg/R/distance.R
===================================================================
--- pkg/R/distance.R	2013-10-01 03:17:41 UTC (rev 363)
+++ pkg/R/distance.R	2013-10-01 05:24:36 UTC (rev 364)
@@ -140,60 +140,50 @@
                )
       }
     if(missing(method))
-      method <- "euclidean"
+        method <- "euclidean"
     method <- match.arg(method)
     y.miss <- FALSE
     if(missing(y)) {
-      y.miss <- TRUE
-      y <- x
+        y.miss <- TRUE
+        y <- x
     }
+    n.vars <- ncol(x)
     if(method == "mixed") {
-      ## sanity check: are same columns in x and y factors
-      facs.x <- sapply(as.data.frame(x), is.factor, USE.NAMES = FALSE)
-      facs.y <- sapply(as.data.frame(y), is.factor, USE.NAMES = FALSE)
-      if(sum(facs.x - facs.y) > 0)
-        stop("Different columns (species) are coded as factors in 'x' and 'y'")
-      ## sanity check: levels of factors also need to be the same
-      for(i in seq_along(facs.x)[facs.x]){
-        if(!identical(levels(x[,i]), levels(y[,i])))
-          stop("The levels of one or more factors in 'x' and 'y'\ndo not match.\nConsider using 'join(x, y)'. See '?join'")
-      }
+        ## are same columns in x and y factors
+        facs.x <- sapply(as.data.frame(x), is.factor, USE.NAMES = FALSE)
+        facs.y <- sapply(as.data.frame(y), is.factor, USE.NAMES = FALSE)
+        if(sum(facs.x - facs.y) > 0) {
+            stop("Different columns (species) are coded as factors in 'x' and 'y'")
+            ## levels of factors also need to be the same
+            for(i in seq_along(facs.x)[facs.x]){
+                if(!identical(levels(x[,i]), levels(y[,i])))
+                    stop("The levels of one or more factors in 'x' and 'y'\ndo not match.\nConsider using 'join(x, y)'. See '?join'")
+            }
+        }
+    } else {
+        ## we do this even if no y as it is harmless
+        facs.x <- facs.y <- rep(FALSE, n.vars)
     }
     x.names <- rownames(x)
     x <- data.matrix(x)
-    n.vars <- ncol(x)
+    y.names <- rownames(y)
+    y <- data.matrix(y)
     ## Do we want to remove NAs? Yes if gower, alt.gower and mixed,
     ## but fail for others
     NA.RM <- FALSE
     if(method %in% c("gower", "alt.gower", "mixed"))
-      NA.RM <- TRUE
-    #y.miss <- FALSE
-    if(missing(y)) {
-      #colsumx <- colSums(x, na.rm = NA.RM)
-      #if(any(colsumx <= 0)) {
-      #  x <- x[, colsumx > 0, drop = FALSE]
-      #  warning("some species contain no data and were removed from data matrix x\n")
-      #}
-      y.miss <- TRUE
-      y <- x
-      y.names <- x.names
-    } else {
-      #if(method == "mixed") {
-        ## sanity check: are same columns in x and y factors
-        #facs.y <- sapply(as.data.frame(y), is.factor)
-        #if(sum(facs.x - facs.y) > 0)
-        #  stop("Different columns (species) are coded as factors in 'x' and 'y'")
-        ## sanity check: levels of factors also need to be the same
-        #for(i in seq_along(facs.x)[facs.x]){
-        #  if(!identical(levels(x[,i]), levels(y[,i])))
-        #    stop("The levels of one or more factors in 'x' and 'y' do not match.\nConsider using 'join(x, y)'. See '?join'")
-        #}
-      #}
-      y.names <- rownames(y)
-      y <- data.matrix(y)
+        NA.RM <- TRUE
+    ## check if any empty species, drop them
+    colsumx <- colSums(x, na.rm = NA.RM)
+    colsumy <- colSums(y, na.rm = NA.RM)
+    ## NO - this causes problems if you merge data
+    if (any(DROP <- (colsumx <= 0 & colsumy <= 0) & !facs.x)) {
+        ##x <- x[, (colsumx > 0 | colsumy > 0) | facs.x, drop = FALSE]
+        ##y <- y[, (colsumx > 0 | colsumy > 0) | facs.x, drop = FALSE]
+        ##warning("Some species contain no data and were removed from data matrices.\n")
     }
     if(method == "chi.distance")
-      colsum <- colSums(join(as.data.frame(x),as.data.frame(y), split = FALSE))
+        colsum <- colSums(join(as.data.frame(x),as.data.frame(y), split = FALSE))
     if(method == "mixed") {
       ## sort out the weights used, eg the Kroneker's Deltas
       ## weights must be NULL or numeric vector of length == ncol(x)



More information about the Analogue-commits mailing list