[Vegan-commits] r1127 - in branches/1.17: R inst man

Tue Feb 16 07:57:22 CET 2010

Author: jarioksa
Date: 2010-02-16 07:57:21 +0100 (Tue, 16 Feb 2010)
New Revision: 1127

Modified:
   branches/1.17/R/predict.cca.R
   branches/1.17/R/predict.rda.R
   branches/1.17/R/simulate.rda.R
   branches/1.17/inst/ChangeLog
   branches/1.17/man/predict.cca.Rd
   branches/1.17/man/simulate.rda.Rd
Log:
merged r1118-1120, 1122: predict.cca/rda & simulate.rda/cca upgrades

Modified: branches/1.17/R/predict.cca.R
===================================================================

--- branches/1.17/R/predict.cca.R	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/R/predict.cca.R	2010-02-16 06:57:21 UTC (rev 1127)
@@ -1,5 +1,5 @@
 "predict.cca" <-
-    function (object, newdata, type = c("response", "wa", "sp", "lc"), 
+    function (object, newdata, type = c("response", "wa", "sp", "lc", "working"), 
               rank = "full", model = c("CCA", "CA"), scaling = FALSE, ...) 
 {
     type <- match.arg(type)
@@ -18,14 +18,17 @@
     if (is.null(w)) 
         w <- u
     slam <- diag(sqrt(object[[model]]$eig[1:take]), nrow = take)
-    if (type == "response") {
+    if (type %in%  c("response", "working")) {
         Xbar <- 0
         if (take > 0) 
             Xbar <- u %*% slam %*% t(v)
         if (!is.null(object$pCCA)) 
             warning("Conditional ('partial') component ignored")
         rc <- outer(rs, cs)
-        out <- (Xbar + 1) * rc * gtot
+        if (type == "response") 
+            out <- (Xbar + 1) * rc * gtot
+        else                 # type == "working"
+            out <- Xbar * sqrt(rc)
     }
     else if (type == "lc") {
         if (model == "CA") 
@@ -59,10 +62,17 @@
         if (!missing(newdata)) {
             if (!is.null(object$pCCA)) 
                 stop("No 'wa' scores available (yet) in partial CCA")
+            nm <- rownames(v)
+            if (!is.null(nm)) { # Got rownames: keep only species with scores
+                if (!all(nm %in% colnames(newdata)))
+                    stop("'newdata' does not have named columns matching one or more the original columns")
+                newdata <-  newdata[, nm, drop = FALSE]
+            } else { #Rownames are NULL: still try to remove exclude.spec
+                exclude.spec <- attr(object[[model]]$v, "na.action")
+                if (!is.null(exclude.spec)) 
+                    Xbar <- Xbar[, -exclude.spec]
+            }
             Xbar <- as.matrix(newdata)
-            exclude.spec <- attr(object[[model]]$v, "na.action")
-            if (!is.null(exclude.spec)) 
-                Xbar <- Xbar[, -exclude.spec]
             rs <- rowSums(Xbar)
             Xbar <- (Xbar - outer(rs, cs))/sqrt(outer(rs, cs))
             v <- sweep(v, 1, sqrt(cs), "*")
@@ -81,6 +91,12 @@
     }
     else if (type == "sp") {
         if (!missing(newdata)) {
+            nm <- rownames(u)
+            if (!is.null(nm)) {
+                if (!all(nm %in% rownames(newdata)))
+                    stop("'newdata' does not have named rows matching one or more of the original rows")
+                newdata <- newdata[nm, , drop = FALSE]
+            }
             Xbar <- as.matrix(newdata)
             cs <- colSums(Xbar)
             Xbar <- (Xbar - outer(rs, cs))/sqrt(outer(rs, cs))

Modified: branches/1.17/R/predict.rda.R
===================================================================
--- branches/1.17/R/predict.rda.R	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/R/predict.rda.R	2010-02-16 06:57:21 UTC (rev 1127)
@@ -1,5 +1,5 @@
 `predict.rda` <-
-    function (object, newdata, type = c("response", "wa", "sp", "lc"), 
+    function (object, newdata, type = c("response", "wa", "sp", "lc", "working"), 
               rank = "full", model = c("CCA", "CA"), scaling = FALSE, ...) 
 {
     type <- match.arg(type)
@@ -22,7 +22,7 @@
     if (is.null(w)) 
         w <- u
     slam <- diag(sqrt(object[[model]]$eig[1:take] * nr), nrow = take)
-    if (type == "response") {
+    if (type %in% c("response", "working")) {
         if (!is.null(object$pCCA)) 
             warning("Conditional ('partial') component ignored")
         if (inherits(object, "capscale")) {
@@ -36,9 +36,13 @@
                 rownames(out) <- rownames(u)
                 colnames(out) <- rownames(v)
             }
-            if (!is.null(scal)) 
-                out <- sweep(out, 2, scal, "*")
-            out <- sweep(out, 2, cent, "+")
+            if (type == "response") {
+                if (!is.null(scal)) 
+                    out <- sweep(out, 2, scal, "*")
+                out <- sweep(out, 2, cent, "+")
+            } else {
+                out <- out/sqrt(nrow(out) - 1)
+            }
         }
     }
     else if (type == "lc") {
@@ -73,6 +77,12 @@
                 stop("'wa' scores not available in capscale with 'newdata'")
             if (!is.null(object$pCCA)) 
                 stop("No 'wa' scores available (yet) in partial RDA")
+            nm <- rownames(v)
+            if (!is.null(nm)) {
+                if (!all(nm %in% colnames(newdata)))
+                    stop("'newdata' does not have named columns matching one or more the original columns")
+                newdata <-  newdata[, nm, drop = FALSE]
+            }
             Xbar <- as.matrix(newdata)
             Xbar <- sweep(Xbar, 2, cent, "-")
             if (!is.null(scal)) {
@@ -93,6 +103,12 @@
         if (inherits(object, "capscale")) 
             warning("'sp' scores may be meaningless in 'capscale'")
         if (!missing(newdata)) {
+            nm <- rownames(u)
+            if (!is.null(nm)) {
+                if (!all(nm %in% rownames(newdata)))
+                    stop("'newdata' does not have named rows matching one or more of the original rows")
+                newdata <- newdata[nm, , drop = FALSE]
+            }
             Xbar <- as.matrix(newdata)
             Xbar <- scale(Xbar, center = TRUE, scale = scaled.PCA)
             if (!is.null(object$pCCA)) 

Modified: branches/1.17/R/simulate.rda.R
===================================================================
--- branches/1.17/R/simulate.rda.R	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/R/simulate.rda.R	2010-02-16 06:57:21 UTC (rev 1127)
@@ -1,5 +1,5 @@
 `simulate.rda` <-
-    function(object, nsim = 1, seed = NULL, indx = NULL, ...) 
+    function(object, nsim = 1, seed = NULL, indx = NULL, rank = "full", ...) 
 {
     ## Handle RNG: code directly from stats::simulate.lm
     if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) 
@@ -16,7 +16,7 @@
     ## response matrix.
     if (nsim > 1)
         .NotYetUsed("nsim")
-    ftd <- fitted(object)
+    ftd <- predict(object, type = "response", rank = rank)
     ## pRDA: add partial Fit to the constrained
     if (!is.null(object$pCCA))
         ftd <- ftd + object$pCCA$Fit
@@ -39,7 +39,7 @@
 ### still guarantee that all marginal totals are positive.
 
 `simulate.cca` <-
-    function(object, nsim = 1, seed = NULL, indx = NULL, ...)
+    function(object, nsim = 1, seed = NULL, indx = NULL, rank = "full", ...)
 {
     ## Handle RNG: code directly from stats::simulate.lm
     if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) 
@@ -59,7 +59,7 @@
     ## Need sqrt of rowsums for weighting
     sq.r <- sqrt(object$rowsum)
     ## Fitted value
-    ftd <- fitted(object, type = "working")
+    ftd <- predict(object, type = "working", rank = rank)
     ## pCCA: add partial Fit to the constrained
     if (!is.null(object$pCCA))
         ftd <- ftd + object$pCCA$Fit

Modified: branches/1.17/inst/ChangeLog
===================================================================
--- branches/1.17/inst/ChangeLog	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/inst/ChangeLog	2010-02-16 06:57:21 UTC (rev 1127)
@@ -8,6 +8,11 @@
 
 	* merged r1123: CCorA() output fixes plus upgrades of
 	biplot.CCorA(). 
+
+	* merged r1118, r1119: predict.cca/rda match 'newdata' by dim
+	names and gained new choice 'type = "working"'.
+
+	* merged r1120, 1122: simulate.rda/cca gained argument 'rank'. 
 	
 Version 1.17-0 (released January 11, 2010)
 
@@ -206,7 +211,7 @@
 	zero, and z became NaN. Now z = 0 for these cases. A response to
 	the query of Martin Kopecky at the vegan-help forum in R-Forge.
 
-	* densitplot.oecosimu: panels keep the order of statistics. The
+	* densityplot.oecosimu: panels keep the order of statistics. The
 	panels were ordered alphabetically, but the vertical line for the
 	observed statistic was in the original order (and thus often in
 	the wrong panel).

Modified: branches/1.17/man/predict.cca.Rd
===================================================================
--- branches/1.17/man/predict.cca.Rd	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/man/predict.cca.Rd	2010-02-16 06:57:21 UTC (rev 1127)
@@ -24,7 +24,7 @@
 \method{fitted}{capscale}(object, model = c("CCA", "CA", "Imaginary"),
     type = c("response", "working"), ...)
 \method{residuals}{cca}(object, ...)
-\method{predict}{cca}(object, newdata, type = c("response", "wa", "sp", "lc"),
+\method{predict}{cca}(object, newdata, type = c("response", "wa", "sp", "lc", "working"),
     rank = "full", model = c("CCA", "CA"), scaling = FALSE, ...)
 \method{calibrate}{cca}(object, newdata, rank = "full", ...)
 \method{coef}{cca}(object, ...)
@@ -38,25 +38,28 @@
   \item{model}{Show constrained (\code{"CCA"}) or unconstrained
     (\code{"CA"}) results. For \code{\link{capscale}} this can also be 
     \code{"Imaginary"} for imaginary components with negative eigenvalues. }
-  \item{newdata}{New data frame to be used in
-    prediction of species and site scores or for calibration.  Usually
-    this a new community data frame, but for \code{predict.cca}
-    \code{type = "lc"} it must be an environment data frame, and for
-    \code{type = "response"} this is ignored.}
-  \item{type}{The type of prediction, fitted values or residuals: In
-    \code{fitted} and \code{residuals}, \code{"response"} scales results so
-    that the same ordination gives the same results, and \code{"working"}
-    gives the values used internally, that is after Chi-square
-    standardization in \code{\link{cca}} and scaling and centring in 
-    \code{\link{rda}}. In \code{\link{capscale}} the \code{"response"} gives
-    the dissimilarities, and \code{"working"} the scaled scores that produce
-    the dissimilarities as Euclidean distances.
-    In \code{predict} \code{"response"} 
-    gives an approximation of the original data matrix or dissimilarities, 
-    \code{"wa"} the site scores as weighted averages of the community data, 
-    \code{"lc"} the site scores as linear combinations of environmental data, 
-    and \code{"sp"} the species scores. In \code{predict.decorana} the
-    alternatives are scores for \code{"sites"} or \code{"species"}.} 
+  \item{newdata}{New data frame to be used in prediction of species
+    and site scores or for calibration.  Usually this a new community
+    data frame, but for \code{predict.cca} \code{type = "lc"} it must
+    be an environment data frame, and for \code{type = "response"}
+    this is ignored.  If the original model had row or column names,
+    then new data must contain rows or columns with the same names
+    (row names for species scores, column names for \code{"wa"} scores
+    and constraint names of \code{"lc"} scores). In other cases the
+    rows or columns must match directly. }
+  \item{type}{The type of prediction, fitted values or residuals:
+    \code{"response"} scales results so that the same ordination gives
+    the same results, and \code{"working"} gives the values used
+    internally, that is after Chi-square standardization in
+    \code{\link{cca}} and scaling and centring in
+    \code{\link{rda}}. In \code{\link{capscale}} the \code{"response"}
+    gives the dissimilarities, and \code{"working"} the scaled scores
+    that produce the dissimilarities as Euclidean
+    distances. Alternative \code{"wa"} gives the site scores as
+    weighted averages of the community data, \code{"lc"} the site
+    scores as linear combinations of environmental data, and
+    \code{"sp"} the species scores. In \code{predict.decorana} the
+    alternatives are scores for \code{"sites"} or \code{"species"}.}
   \item{rank}{The rank or the number of axes used in the approximation.
     The default is to use all axes (full rank) of the \code{"model"} or
     all available four axes in \code{predict.decorana}.}

Modified: branches/1.17/man/simulate.rda.Rd
===================================================================
--- branches/1.17/man/simulate.rda.Rd	2010-02-16 06:47:38 UTC (rev 1126)
+++ branches/1.17/man/simulate.rda.Rd	2010-02-16 06:57:21 UTC (rev 1127)
@@ -11,7 +11,7 @@
 }
 
 \usage{
-\method{simulate}{rda}(object, nsim = 1, seed = NULL, indx = NULL, ...)
+\method{simulate}{rda}(object, nsim = 1, seed = NULL, indx = NULL, rank = "full", ...)
 }
 \arguments{
   \item{object}{an object representing a fitted \code{\link{rda}} model.}
@@ -26,6 +26,8 @@
     have duplicate entries so that bootstrapping is allowed. If null,
     parametric simulation is used and Gaussian error is added to the
     fitted values.}
+  \item{rank}{The rank of the constrained component: passed to
+    \code{\link{predict.rda}} or \code{\link{predict.cca}}. }
   \item{\dots}{additional optional arguments (ignored). }
 }