[Vegan-commits] r998 - in pkg/vegan: R inst
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Sep 8 17:45:28 CEST 2009
Author: jarioksa
Date: 2009-09-08 17:45:28 +0200 (Tue, 08 Sep 2009)
New Revision: 998
Modified:
pkg/vegan/R/anova.ccabyterm.R
pkg/vegan/R/cca.formula.R
pkg/vegan/R/ordiNAexclude.R
pkg/vegan/R/rda.formula.R
pkg/vegan/inst/ChangeLog
Log:
NA handling in sequential models: step() and anova(..., by = 'term')
Modified: pkg/vegan/R/anova.ccabyterm.R
===================================================================
--- pkg/vegan/R/anova.ccabyterm.R 2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/anova.ccabyterm.R 2009-09-08 15:45:28 UTC (rev 998)
@@ -2,9 +2,9 @@
function (object, step = 100, ...)
{
## Data set size may change during iteration if there are missing
- ## values: bail out.
- if (!is.null(object$na.action))
- stop("anova by = 'term' is unavailable with missing data")
+ ## values: use length(object$residual) to check this like step,
+ ## drop1.default, add1.default do.
+ n0 <- length(object$residuals)
trm <- terms(object)
call <- paste("Model:", c(object$call))
trmlab <- attr(trm, "term.labels")
@@ -31,6 +31,9 @@
assign(".Random.seed", sim$Random.seed, envir = .GlobalEnv)
fla <- as.formula(paste(" . ~ . -", trmlab[.ITRM]))
object <- update(object, fla)
+ ## Change in data set due to missing values?
+ if (length(object$residuals) != n0)
+ stop("number of rows has changed: remove missing values?")
if (is.null(object$CCA))
break
sim <- permutest.cca(object, permutations = step, ...)
Modified: pkg/vegan/R/cca.formula.R
===================================================================
--- pkg/vegan/R/cca.formula.R 2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/cca.formula.R 2009-09-08 15:45:28 UTC (rev 998)
@@ -24,7 +24,7 @@
sol$call <- match.call()
sol$call[[1]] <- as.name("cca")
sol$call$formula <- formula(d$terms, width.cutoff = 500)
- if (!is.null(sol$na.action) && inherits(sol$na.action, "exclude"))
+ if (!is.null(sol$na.action))
sol <- ordiNAexclude(sol, d$excluded)
sol
}
Modified: pkg/vegan/R/ordiNAexclude.R
===================================================================
--- pkg/vegan/R/ordiNAexclude.R 2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/ordiNAexclude.R 2009-09-08 15:45:28 UTC (rev 998)
@@ -11,12 +11,16 @@
nas <- x$na.action
if (is.null(nas))
return(x)
+ ## add a 'residuals' item, because step, add1.default and
+ ## drop1.default use this to check that number of observations
+ ## does not change in sequential fits.
+ x$residuals.zombie <- rep(TRUE, max(0, nrow(x$CA$u)))
## rowsums for CA (in RDA/PCA rowsum = NA)
if (!inherits(x, "rda"))
x$rowsum.excluded <- rowSums(excluded)/x$grand.total
## Estimate WA scores for NA cases with newdata of excluded
## observations
- if (is.null(x$pCCA)) {
+ if (is.null(x$pCCA) && inherits(nas, "exclude")) {
if (!is.null(x$CCA))
x$CCA$wa.excluded <- predict(x, newdata = excluded,
type = "wa", model = "CCA")
Modified: pkg/vegan/R/rda.formula.R
===================================================================
--- pkg/vegan/R/rda.formula.R 2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/R/rda.formula.R 2009-09-08 15:45:28 UTC (rev 998)
@@ -23,7 +23,7 @@
sol$call <- match.call()
sol$call[[1]] <- as.name("rda")
sol$call$formula <- formula(d$terms, width.cutoff = 500)
- if (!is.null(sol$na.action) && inherits(sol$na.action, "exclude"))
+ if (!is.null(sol$na.action))
sol <- ordiNAexclude(sol, d$excluded)
sol
}
Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog 2009-09-08 07:26:15 UTC (rev 997)
+++ pkg/vegan/inst/ChangeLog 2009-09-08 15:45:28 UTC (rev 998)
@@ -13,22 +13,17 @@
but functions accessing the result with scores will get the NA
data.
- * anova.cca(..., by = "term") refuses the analysis if there are
- missing data. With current implementation, the number of rows can
- change during analysis and different terms cannot be
- compared. Needs FIXME.
+ * anova.cca(..., by = "term") and step() did not work correctly
+ with na.exclude or na.omit. These fitted sequential models, and
+ due to missing value removal the data set could change in the
+ sequence. The problem case was step() which relies on standard
+ stats functions add1.default and drop1.default. They detect the
+ change in data by length(object$residuals). Therefore
+ ordiNAexclude adds item 'residuals.zombie' to the ordination
+ result so that step() works. The same mechanism is also used in
+ anova.ccabyterm (not all NA deletions change the numbers of
+ observations).
- * FIXME: step/add1/drop1 are unsafe with missing data in
- cca/rda. The number of rows can change during stepping and this is
- not detected. There is no clean way of detecting this. Standard
- functions step, add1.default and drop1.default detect this by the
- length of object$residuals which is not present in vegan cca/rda
- objects (and meaningfully cannot be present), and the only safe
- way to detect this is to make drop1.default and add1.default do
- the job, since drop1.cca and add1.cca are only involved if test =
- "permutation". Probably need to add residuals.dummy to the
- objects?
-
* weights.cca and weights.rda know na.action.
* Fixing cca/rda functions for changes in weights(). The rule is
More information about the Vegan-commits
mailing list