[Vegan-commits] r998 - in pkg/vegan: R inst

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 8 17:45:28 CEST 2009


Author: jarioksa
Date: 2009-09-08 17:45:28 +0200 (Tue, 08 Sep 2009)
New Revision: 998

Modified:
   pkg/vegan/R/anova.ccabyterm.R
   pkg/vegan/R/cca.formula.R
   pkg/vegan/R/ordiNAexclude.R
   pkg/vegan/R/rda.formula.R
   pkg/vegan/inst/ChangeLog
Log:
NA handling in sequential models: step() and anova(..., by = 'term')

Modified: pkg/vegan/R/anova.ccabyterm.R
===================================================================
--- pkg/vegan/R/anova.ccabyterm.R	2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/anova.ccabyterm.R	2009-09-08 15:45:28 UTC (rev 998)
@@ -2,9 +2,9 @@
     function (object, step = 100, ...) 
 {
     ## Data set size may change during iteration if there are missing
-    ## values: bail out.
-    if (!is.null(object$na.action))
-        stop("anova by = 'term' is unavailable with missing data")
+    ## values: use length(object$residual) to check this like step,
+    ## drop1.default, add1.default do.
+    n0 <- length(object$residuals)
     trm <- terms(object)
     call <- paste("Model:", c(object$call))
     trmlab <- attr(trm, "term.labels")
@@ -31,6 +31,9 @@
         assign(".Random.seed", sim$Random.seed, envir = .GlobalEnv)
         fla <- as.formula(paste(" . ~ . -", trmlab[.ITRM]))
         object <- update(object, fla)
+        ## Change in data set due to missing values?
+        if (length(object$residuals) != n0)
+            stop("number of rows has changed: remove missing values?")
         if (is.null(object$CCA)) 
             break
         sim <- permutest.cca(object, permutations = step, ...)

Modified: pkg/vegan/R/cca.formula.R
===================================================================
--- pkg/vegan/R/cca.formula.R	2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/cca.formula.R	2009-09-08 15:45:28 UTC (rev 998)
@@ -24,7 +24,7 @@
     sol$call <- match.call()
     sol$call[[1]] <- as.name("cca")
     sol$call$formula <- formula(d$terms, width.cutoff = 500)
-    if (!is.null(sol$na.action) && inherits(sol$na.action, "exclude"))
+    if (!is.null(sol$na.action))
         sol <- ordiNAexclude(sol, d$excluded)
     sol
 }

Modified: pkg/vegan/R/ordiNAexclude.R
===================================================================
--- pkg/vegan/R/ordiNAexclude.R	2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/ordiNAexclude.R	2009-09-08 15:45:28 UTC (rev 998)
@@ -11,12 +11,16 @@
     nas <- x$na.action
     if (is.null(nas))
         return(x)
+    ## add a 'residuals' item, because step, add1.default and
+    ## drop1.default use this to check that number of observations
+    ## does not change in sequential fits.
+    x$residuals.zombie <- rep(TRUE, max(0, nrow(x$CA$u)))
     ## rowsums for CA (in RDA/PCA rowsum = NA)
     if (!inherits(x, "rda"))
         x$rowsum.excluded <- rowSums(excluded)/x$grand.total
     ## Estimate WA scores for NA cases with newdata of excluded
     ## observations
-    if (is.null(x$pCCA)) {
+    if (is.null(x$pCCA) && inherits(nas, "exclude")) {
         if (!is.null(x$CCA))
             x$CCA$wa.excluded <- predict(x, newdata = excluded,
                                          type = "wa", model = "CCA")

Modified: pkg/vegan/R/rda.formula.R
===================================================================
--- pkg/vegan/R/rda.formula.R	2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/rda.formula.R	2009-09-08 15:45:28 UTC (rev 998)
@@ -23,7 +23,7 @@
     sol$call <- match.call()
     sol$call[[1]] <- as.name("rda")
     sol$call$formula <- formula(d$terms, width.cutoff = 500)
-    if (!is.null(sol$na.action) && inherits(sol$na.action, "exclude"))
+    if (!is.null(sol$na.action))
         sol <- ordiNAexclude(sol, d$excluded)
     sol
 }

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/inst/ChangeLog	2009-09-08 15:45:28 UTC (rev 998)
@@ -13,22 +13,17 @@
 	but functions accessing the result with scores will get the NA
 	data. 
 
-	* anova.cca(..., by = "term") refuses the analysis if there are
-	missing data. With current implementation, the number of rows can
-	change during analysis and different terms cannot be
-	compared. Needs FIXME.
+	* anova.cca(..., by = "term") and step() did not work correctly
+	with na.exclude or na.omit. These fitted sequential models, and
+	due to missing value removal the data set could change in the
+	sequence. The problem case was step() which relies on standard
+	stats functions add1.default and drop1.default. They detect the
+	change in data by length(object$residuals). Therefore
+	ordiNAexclude adds item 'residuals.zombie' to the ordination
+	result so that step() works. The same mechanism is also used in
+	anova.ccabyterm (not all NA deletions change the numbers of
+	observations).
 
-	* FIXME: step/add1/drop1 are unsafe with missing data in
-	cca/rda. The number of rows can change during stepping and this is
-	not detected. There is no clean way of detecting this. Standard
-	functions step, add1.default and drop1.default detect this by the
-	length of object$residuals which is not present in vegan cca/rda
-	objects (and meaningfully cannot be present), and the only safe
-	way to detect this is to make drop1.default and add1.default do
-	the job, since drop1.cca and add1.cca are only involved if test =
-	"permutation". Probably need to add residuals.dummy to the
-	objects?
-
 	* weights.cca and weights.rda know na.action.
 
 	* Fixing cca/rda functions for changes in weights(). The rule is



More information about the Vegan-commits mailing list