From noreply at r-forge.r-project.org Thu Oct 3 09:51:01 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 3 Oct 2013 09:51:01 +0200 (CEST) Subject: [Vegan-commits] r2631 - in pkg/vegan: R inst man Message-ID: <20131003075101.CECB51859D2@r-forge.r-project.org> Author: jarioksa Date: 2013-10-03 09:51:01 +0200 (Thu, 03 Oct 2013) New Revision: 2631 Removed: pkg/vegan/R/confint.fisherfit.R pkg/vegan/R/plot.profile.fisherfit.R pkg/vegan/R/profile.fisherfit.R Modified: pkg/vegan/R/fisher.alpha.R pkg/vegan/R/fisherfit.R pkg/vegan/R/print.fisherfit.R pkg/vegan/inst/ChangeLog pkg/vegan/man/diversity.Rd pkg/vegan/man/fisherfit.Rd Log: re-write fisherfit and remove its standard errors Deleted: pkg/vegan/R/confint.fisherfit.R =================================================================== --- pkg/vegan/R/confint.fisherfit.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/confint.fisherfit.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,6 +0,0 @@ -"confint.fisherfit" <- - function (object, parm, level=0.95, ...) -{ - if (!require(MASS)) stop("Needs packages MASS .. not found") - confint(profile(object), level=level, ...) -} Modified: pkg/vegan/R/fisher.alpha.R =================================================================== --- pkg/vegan/R/fisher.alpha.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/fisher.alpha.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,12 +1,12 @@ -"fisher.alpha" <- - function (x, MARGIN = 1, se = FALSE, ...) +`fisher.alpha` <- + function (x, MARGIN = 1, ...) { x <- as.matrix(x) if(ncol(x) == 1) x <- t(x) sol <- apply(x, MARGIN, fisherfit) out <- unlist(lapply(sol, function(x) x$estimate)) - if (se) { + if (FALSE) { out <- list(alpha = out) out$se <- unlist(lapply(sol, function(x) sqrt(diag(solve(x$hessian)))[1])) out$df.residual <- unlist(lapply(sol, df.residual)) Modified: pkg/vegan/R/fisherfit.R =================================================================== --- pkg/vegan/R/fisherfit.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/fisherfit.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,24 +1,49 @@ +## Fisher alpha is actually based only on the number of species S and +## number of individuals. + `fisherfit` <- - function (x, ...) + function(x, ...) { - Dev.logseries <- function(n.r, p, N) { - r <- as.numeric(names(n.r)) - x <- N/(N + p) - logmu <- log(p) + log(x) * r - log(r) - lhood <- -sum(n.r * (logmu - log(n.r)) + 1) - p * log(1-x) - attr(lhood, "gradient") <- -sum(n.r)/p - log(1-x) - lhood + nr <- as.fisher(x) + S <- sum(nr) + N <- sum(x) + ## Solve 'x' (Fisher alpha). + d1fun <- function(x, S, N) x * log(1 + N/x) - S + ## We may need to bracket the interval + hi <- 50 + lo <- 1 + tries <- 0 + repeat { + sol <- try(uniroot(d1fun, c(lo, hi), S = S, N = N, ...), silent = TRUE) + if (inherits(sol, "try-error")) { + if(d1fun(hi, S, N) < 0) + hi <- 2*hi + if(d1fun(lo, S, N) > 0) + lo <- lo/2 + tries <- tries + 1 + } + else break + ## alpha can tend to +Inf: set root = NA etc. + if (tries > 200) { + sol <- list(root = NA, f.root = NA, iter = NA, init.it = NA, + estim.prec = NA) + break + } } - tmp <- as.rad(x) - N <- sum(x) - tmp <- tmp/N - p <- 1/sum(tmp^2) - n.r <- as.fisher(x) - LSeries <- nlm(Dev.logseries, n.r = n.r, p = p, N = N, - hessian = TRUE, ...) - LSeries$df.residual <- sum(x > 0) - 1 - LSeries$nuisance <- N/(N + LSeries$estimate) - LSeries$fisher <- n.r - class(LSeries) <- "fisherfit" - LSeries + ## 'extendInt' arg was added in R r63162 | maechler | 2013-07-03 + ## 11:47:22 +0300 (Wed, 03 Jul 2013). Latest release is R 3.0.2 of + ## 2013-09-25, but it still does not have the argument. In the + ## future we may switch to the following: + + ##sol <- uniroot(d1fun, c(1,50), extendInt = "yes", S = S, N = N, ...) + + nuisance <- N/(N + sol$root) + ## we used nlm() earlier, and the following output is compatible + out <- list(estimate = sol$root, hessian = NA, + iterations = sol$iter, df.residual = NA, + nuisance = nuisance, fisher = nr, + estim.prec = sol$estim.prec, + code = 2*is.na(sol$estim.prec) + 1) + class(out) <- "fisherfit" + out } Deleted: pkg/vegan/R/plot.profile.fisherfit.R =================================================================== --- pkg/vegan/R/plot.profile.fisherfit.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/plot.profile.fisherfit.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,16 +0,0 @@ -`plot.profile.fisherfit` <- - function (x, type = "l", ...) -{ - tmp <- attr(x, "original.fit") - est <- tmp$coefficients - se <- tmp$std.err - alpha <- x$alpha[, 1] - tau <- x$alpha[, 2] - sp <- spline(tau, alpha) - plot(sp$x, sp$y, type = type, xlab = "alpha", ylab = "tau", - ...) - abline(-est/se, 1/se, lty = 2) - abline(v = est, lty = 3) - abline(h = 0, lty = 3) - invisible() -} Modified: pkg/vegan/R/print.fisherfit.R =================================================================== --- pkg/vegan/R/print.fisherfit.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/print.fisherfit.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,12 +1,8 @@ -"print.fisherfit" <- +`print.fisherfit` <- function (x, ...) { cat("\nFisher log series model\n") - cat("No. of species:", sum(x$fisher), "\n\n") - out <- cbind(x$estimate, sqrt(diag(solve(x$hessian)))) - colnames(out) <- c("Estimate", "Std. Error") - rownames(out) <- "alpha" - printCoefmat(out) - cat("\n") + cat("No. of species:", sum(x$fisher), "\n") + cat("Fisher alpha: ", x$estimate, "\n\n") invisible(x) } Deleted: pkg/vegan/R/profile.fisherfit.R =================================================================== --- pkg/vegan/R/profile.fisherfit.R 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/R/profile.fisherfit.R 2013-10-03 07:51:01 UTC (rev 2631) @@ -1,43 +0,0 @@ -"profile.fisherfit" <- - function (fitted, alpha = 0.01, maxsteps = 20, del = zmax/5, ...) -{ - Dev.logseries <- function(n.r, p, N) { - r <- as.numeric(names(n.r)) - x <- N/(N + p) - logmu <- log(p) + log(x) * r - log(r) - lhood <- -sum(n.r * (logmu - log(n.r)) + 1) - p * log(1 - - x) - lhood - } - par <- fitted$estimate - names(par) <- "alpha" - std.err <- sqrt(diag(solve(fitted$hessian))) - minll <- fitted$minimum - nr <- fitted$fisher - N <- sum(as.numeric(names(nr)) * nr) - zmax <- sqrt(qchisq(1 - alpha/2, 1)) - zi <- 0 - bi <- par - for (sgn in c(-1, 1)) { - step <- 0 - z <- 0 - b <- 0 - while ((step <- step + 1) < maxsteps && abs(z) < zmax) { - b <- par + sgn * step * del * std.err - fm <- Dev.logseries(nr, b, N) - zz <- 2 * (fm - minll) - if (zz > -0.001) - zz <- max(zz, 0) - else stop("profiling has found a better solution, so original fit had not converged") - z <- sgn * sqrt(zz) - bi <- c(bi, b) - zi <- c(zi, z) - } - } - si <- order(bi) - out <- list() - out$alpha <- data.frame(tau = zi[si], par.vals = bi[si]) - attr(out, "original.fit") <- list(coefficients = par, std.err = std.err) - class(out) <- c("profile.fisherfit", "profile.glm", "profile") - out -} Modified: pkg/vegan/inst/ChangeLog =================================================================== --- pkg/vegan/inst/ChangeLog 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/inst/ChangeLog 2013-10-03 07:51:01 UTC (rev 2631) @@ -21,7 +21,18 @@ treated differently in factorfit: now they support null hypothesis, previously they decreased the P-values. - * fisherfit: use analytic derivatives in non-linear minimizer nlm(). + * fisherfit: completely rewritten and estimates of standard error + removed: I could not find no justification for these. Actually, it + seems that the value of Fisher alpha as estimated in the function + was independent of the abundance distribution of species, but will + be defined by the number of species (S) and number of individuals + (N). Now the Fisher alpha is estimated from the relationship S = + alpha*(1 + log(N/alpha)) using function uniroot(). Because of + this, standard errors cannot be estimated and they were + removed. In addition, functions confint.fisherfit, + profile.fisherfit and plot.profile.fisherfit were removed. The + estimation of standard errors was also removed in function + fisher.alpha (that only calls fisherfit). * nestednodf: matrix fill was wrongly calculated in weighted analysis. The nominator was length of 'comm', and if input was a Modified: pkg/vegan/man/diversity.Rd =================================================================== --- pkg/vegan/man/diversity.Rd 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/man/diversity.Rd 2013-10-03 07:51:01 UTC (rev 2631) @@ -20,7 +20,7 @@ drarefy(x, sample) rarecurve(x, step = 1, sample, xlab = "Sample Size", ylab = "Species", label = TRUE, ...) -fisher.alpha(x, MARGIN = 1, se = FALSE, ...) +fisher.alpha(x, MARGIN = 1, ...) specnumber(x, groups, MARGIN = 1) } @@ -32,7 +32,6 @@ \item{base}{ The logarithm \code{base} used in \code{shannon}.} \item{sample}{Subsample size for rarefying community, either a single value or a vector.} - \item{se}{Estimate standard errors.} \item{step}{Step size for sample sizes in rarefaction curves.} \item{xlab, ylab}{Axis labels in plots of rarefaction curves.} \item{label}{Label rarefaction curves by rownames of \code{x} (logical).} @@ -86,11 +85,7 @@ \code{fisher.alpha} estimates the \eqn{\alpha} parameter of Fisher's logarithmic series (see \code{\link{fisherfit}}). The estimation is possible only for genuine - counts of individuals. The function can optionally return standard - errors of \eqn{\alpha}. These should be regarded only as rough - indicators of the accuracy: the confidence limits of \eqn{\alpha} are - strongly non-symmetric and the standard errors cannot be used in - Normal inference. + counts of individuals. Function \code{specnumber} finds the number of species. With \code{MARGIN = 2}, it finds frequencies of species. If \code{groups} Modified: pkg/vegan/man/fisherfit.Rd =================================================================== --- pkg/vegan/man/fisherfit.Rd 2013-09-30 13:33:14 UTC (rev 2630) +++ pkg/vegan/man/fisherfit.Rd 2013-10-03 07:51:01 UTC (rev 2631) @@ -2,9 +2,6 @@ \alias{fisherfit} \alias{as.fisher} \alias{plot.fisherfit} -\alias{profile.fisherfit} -\alias{confint.fisherfit} -\alias{plot.profile.fisherfit} \alias{prestonfit} \alias{prestondistr} \alias{as.preston} @@ -25,9 +22,6 @@ } \usage{ fisherfit(x, ...) -\method{confint}{fisherfit}(object, parm, level = 0.95, ...) -\method{profile}{fisherfit}(fitted, alpha = 0.01, maxsteps = 20, del = zmax/5, - ...) prestonfit(x, tiesplit = TRUE, ...) prestondistr(x, truncate = -1, ...) \method{plot}{prestonfit}(x, xlab = "Frequency", ylab = "Species", bar.col = "skyblue", @@ -45,12 +39,6 @@ \arguments{ \item{x}{Community data vector for fitting functions or their result object for \code{plot} functions.} - \item{object, fitted}{Fitted model.} - \item{parm}{Not used.} - \item{level}{The confidence level required.} - \item{alpha}{The extend of profiling as significance.} - \item{maxsteps}{Maximum number of steps in profiling.} - \item{del}{Step length.} \item{tiesplit}{Split frequencies \eqn{1, 2, 4, 8} etc between adjacent octaves.} \item{truncate}{Truncation point for log-Normal model, in log2 @@ -73,9 +61,8 @@ \details{ In Fisher's logarithmic series the expected number of species \eqn{f} with \eqn{n} observed individuals is - \eqn{f_n = \alpha x^n / n} (Fisher et al. 1943). The estimation - follows Kempton & Taylor (1974) and uses function - \code{\link{nlm}}. The estimation is possible only for genuine + \eqn{f_n = \alpha x^n / n} (Fisher et al. 1943). + The estimation is possible only for genuine counts of individuals. The parameter \eqn{\alpha} is used as a diversity index, and \eqn{\alpha} and its standard error can be estimated with a separate function \code{\link{fisher.alpha}}. The @@ -84,20 +71,6 @@ function \code{as.fisher} transforms abundance data into Fisher frequency table. - Function \code{fisherfit} estimates the standard error of - \eqn{\alpha}{alpha}. However, the confidence limits cannot be directly - estimated from the standard errors, but you should use function - \code{confint} based on profile likelihood. Function \code{confint} - uses function \code{\link[MASS]{confint.glm}} of the \pkg{MASS} - package, using \code{profile.fisherfit} for the profile - likelihood. Function \code{profile.fisherfit} follows - \code{\link[MASS]{profile.glm}} and finds the \eqn{\tau}{tau} parameter or - signed square root of two times log-Likelihood profile. The profile can - be inspected with a \code{plot} function which shows the \eqn{\tau}{tau} - and a dotted line corresponding to the Normal assumption: if standard - errors can be directly used in Normal inference these two lines - are similar. - Preston (1948) was not satisfied with Fisher's model which seemed to imply infinite species richness, and postulated that rare species is a diminishing class and most species are in the middle of frequency @@ -162,11 +135,8 @@ \code{method}. Function \code{prestondistr} omits the entry \code{fitted}. The function \code{fisherfit} returns the result of \code{\link{nlm}}, where item \code{estimate} is \eqn{\alpha}. The - result object is amended with the following items: - \item{df.residuals}{Residual degrees of freedom.} - \item{nuisance}{Parameter \eqn{x}.} \item{fisher}{Observed data - from \code{as.fisher}.} - + result object is amended with the \code{nuisance} parameter and item + \code{fisher} for the observed data from \code{as.fisher} } \references{ Fisher, R.A., Corbet, A.S. & Williams, C.B. (1943). The relation @@ -174,10 +144,6 @@ random sample of animal population. \emph{Journal of Animal Ecology} 12: 42--58. - Kempton, R.A. & Taylor, L.R. (1974). Log-series and log-normal - parameters as diversity discriminators for - Lepidoptera. \emph{Journal of Animal Ecology} 43: 381--399. - Preston, F.W. (1948) The commonness and rarity of species. \emph{Ecology} 29, 254--283. @@ -186,7 +152,7 @@ distribution. \emph{Journal of Animal Ecology} 74, 409--422. } -\author{Bob O'Hara (\code{fisherfit}) and Jari Oksanen. } +\author{Bob O'Hara and Jari Oksanen. } \seealso{\code{\link{diversity}}, \code{\link{fisher.alpha}}, \code{\link{radfit}}, \code{\link{specpool}}. Function @@ -200,8 +166,6 @@ data(BCI) mod <- fisherfit(BCI[5,]) mod -plot(profile(mod)) -confint(mod) # prestonfit seems to need large samples mod.oct <- prestonfit(colSums(BCI)) mod.ll <- prestondistr(colSums(BCI)) From noreply at r-forge.r-project.org Thu Oct 3 14:49:48 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 3 Oct 2013 14:49:48 +0200 (CEST) Subject: [Vegan-commits] r2632 - in pkg/vegan: . man vignettes Message-ID: <20131003124948.966B7185F06@r-forge.r-project.org> Author: jarioksa Date: 2013-10-03 14:49:48 +0200 (Thu, 03 Oct 2013) New Revision: 2632 Modified: pkg/vegan/NAMESPACE pkg/vegan/man/diversity.Rd pkg/vegan/vignettes/diversity-vegan.Rnw Log: clean-up after r2631 (fisherfit re-write and se removal) Modified: pkg/vegan/NAMESPACE =================================================================== --- pkg/vegan/NAMESPACE 2013-10-03 07:51:01 UTC (rev 2631) +++ pkg/vegan/NAMESPACE 2013-10-03 12:49:48 UTC (rev 2632) @@ -132,7 +132,6 @@ # confint: stats -- also uses MASS:::confint.glm & MASS:::profile.glm # does this work with namespaces?? S3method(confint, MOStest) -S3method(confint, fisherfit) # cophenetic: stats S3method(cophenetic, spantree) # density: stats @@ -268,7 +267,6 @@ S3method(plot, preston) S3method(plot, prestonfit) S3method(plot, procrustes) -S3method(plot, profile.fisherfit) S3method(plot, rad) S3method(plot, radfit) S3method(plot, radfit.frame) @@ -367,7 +365,6 @@ # profile: stats # see note on 'confint' S3method(profile, MOStest) -S3method(profile, fisherfit) S3method(profile, humpfit) # radfit: vegan S3method(radfit, data.frame) Modified: pkg/vegan/man/diversity.Rd =================================================================== --- pkg/vegan/man/diversity.Rd 2013-10-03 07:51:01 UTC (rev 2631) +++ pkg/vegan/man/diversity.Rd 2013-10-03 12:49:48 UTC (rev 2632) @@ -32,6 +32,7 @@ \item{base}{ The logarithm \code{base} used in \code{shannon}.} \item{sample}{Subsample size for rarefying community, either a single value or a vector.} + \item{se}{Estimate standard errors.} \item{step}{Step size for sample sizes in rarefaction curves.} \item{xlab, ylab}{Axis labels in plots of rarefaction curves.} \item{label}{Label rarefaction curves by rownames of \code{x} (logical).} Modified: pkg/vegan/vignettes/diversity-vegan.Rnw =================================================================== --- pkg/vegan/vignettes/diversity-vegan.Rnw 2013-10-03 07:51:01 UTC (rev 2631) +++ pkg/vegan/vignettes/diversity-vegan.Rnw 2013-10-03 12:49:48 UTC (rev 2632) @@ -326,18 +326,7 @@ (\Sexpr{k}).} \label{fig:fisher} \end{figure} -We already saw $\alpha$ as a diversity index. Now we also obtained -estimate of standard error of $\alpha$ (these also are optionally -available in \code{fisher.alpha}). The standard errors are based on -the second derivatives (curvature) of log-likelihood at the solution -of $\alpha$. The distribution of $\alpha$ is often non-normal -and skewed, and standard errors are of not much use. However, -\code{fisherfit} has a \code{profile} method that can be used to -inspect the validity of normal assumptions, and will be used in -calculations of confidence intervals from profile deviance: -<<>>= -confint(fish) -@ +We already saw $\alpha$ as a diversity index. Preston's log-normal model is the main challenger to Fisher's log-series \citep{Preston48}. Instead of plotting species by From noreply at r-forge.r-project.org Mon Oct 14 09:40:58 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 14 Oct 2013 09:40:58 +0200 (CEST) Subject: [Vegan-commits] r2633 - in pkg/vegan: . R inst Message-ID: <20131014074058.8119E18538D@r-forge.r-project.org> Author: jarioksa Date: 2013-10-14 09:40:57 +0200 (Mon, 14 Oct 2013) New Revision: 2633 Added: pkg/vegan/R/permutest.ccalist.R Modified: pkg/vegan/DESCRIPTION pkg/vegan/inst/ChangeLog Log: add permutest.ccalist() to analyse a sequence of cca models (experimental) Modified: pkg/vegan/DESCRIPTION =================================================================== --- pkg/vegan/DESCRIPTION 2013-10-03 12:49:48 UTC (rev 2632) +++ pkg/vegan/DESCRIPTION 2013-10-14 07:40:57 UTC (rev 2633) @@ -1,7 +1,7 @@ Package: vegan Title: Community Ecology Package -Version: 2.1-36 -Date: September 25, 2013 +Version: 2.1-37 +Date: October 14, 2013 Author: Jari Oksanen, F. Guillaume Blanchet, Roeland Kindt, Pierre Legendre, Peter R. Minchin, R. B. O'Hara, Gavin L. Simpson, Peter Solymos, M. Henry H. Stevens, Helene Wagner Added: pkg/vegan/R/permutest.ccalist.R =================================================================== --- pkg/vegan/R/permutest.ccalist.R (rev 0) +++ pkg/vegan/R/permutest.ccalist.R 2013-10-14 07:40:57 UTC (rev 2633) @@ -0,0 +1,74 @@ +`permutest.ccalist` <- + function(x, ..., permutations = 99) +{ + ## Collect cca class objects. FIXME: Eventually this should be in + ## a function that calls permutest.ccalist after collecting model + ## objects from dotargs. + dotargs <- list(...) + if (length(dotargs)) { + isCCA <- sapply(dotargs, function(z) inherits(z, "cca")) + dotargs <- dotargs[isCCA] + if (length(dotargs)) + x <- c(list(x), dotargs) + } + nmodels <- length(x) + ## No. of observations and check + N <- sapply(x, nobs) + if (!all(N = N[1])) + stop("models have different numbers of observations") + else + N <- N[1] + ## Create permutation matrix if it does not exist. FIXME: should + ## take arguments for restricted permutation + if (length(permutations) == 1) + permutations <- shuffleSet(N, permutations) + ## permutations is now matrix + nperm <- nrow(permutations) + ## check + if (ncol(permutations) != N) + stop(gettextf("permutation matrix has %d columns, but you have %d sites", + ncol(nperm), N)) + ## All models are evaluated in permutest.cca with identical + ## permutations so that the differences of single permutations can + ## be used to assess the significance of differences of fitted + ## models. This strictly requires nested models (not checked + ## here): all terms of the smaller model must be included in the + ## larger model. FIXME: should pass arguments to permutest.cca. + mods <- lapply(x, function(z) + permutest(z, permutations = permutations)) + dfs <- sapply(mods, function(z) z$df) + dev <- sapply(mods, function(z) z$chi) + resdf <- dfs[2,] + df <- -diff(resdf) + resdev <- dev[2,] + changedev <- -diff(resdev) + big <- which.min(resdf) + scale <- resdev[big]/resdf[big] + fval <- changedev/df/scale + ## Collect permutation results: denominator of F varies in each + ## permutation. + pscale <- mods[[big]]$den/resdf[big] + ## Numerator of F + pfvals <- sapply(mods, function(z) z$num) + pfvals <- apply(pfvals, 1, diff) + ## dropped to vector? + if (!is.matrix(pfvals)) + pfvals <- matrix(pfvals, nrow=1, ncol=nperm) + pval <- rowSums(sweep(pfvals, 1, fval, ">=")) + pval <- (pval + 1)/(nperm+1) + pfvals <- sweep(pfvals, 1, df, "/") + pfvals <- sweep(pfvals, 2, pscale, "/") + ## collect table + table <- data.frame(resdf, resdev, c(NA, df), + c(NA,changedev), c(NA,fval), c(NA,pval)) + dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Res. Chisq", + "Df", "Chisq", "F", "Pr(>F)")) + ## Collect header information + formulae <- sapply(x, function(z) deparse(formula(z))) + head <- paste0("Permutation tests for ", x[[1]]$method, " under ", + mods[[1]]$model, " model\nwith ", nperm, + " permutations\n") + topnote <- paste("Model ", format(1L:nmodels), ": ", formulae, + sep = "", collapse = "\n") + structure(table, heading=c(head,topnote), class = c("anova", "data.frame")) +} Modified: pkg/vegan/inst/ChangeLog =================================================================== --- pkg/vegan/inst/ChangeLog 2013-10-03 12:49:48 UTC (rev 2632) +++ pkg/vegan/inst/ChangeLog 2013-10-14 07:40:57 UTC (rev 2633) @@ -2,8 +2,29 @@ VEGAN DEVEL VERSIONS at http://r-forge.r-project.org/ -Version 2.1-36 (opened September 25, 2013) +Version 2.1-37 (opened October 14, 2013) + * permutest.cca: added new function permutest.ccalist() to compare + a sequence of models. The function is still experimental ("proof + of the concept") and unexported. If this stays in vegan, it should + eventually be called from anova.cca() or permutest.cca(). This + would bring along a change of API to, say, anova.cca(object, ..., + alpha=...): the dots must follow the first argument which turns of + positional and partial matching of arguments so that the function + can collect the "cca" models. We must decide whether the new + function is worth such a change that can make life harder for + ordinary users. + + The function is based on calling permutest.cca for each model with + identical permutations. We can then compare the change in model + for each permutation and collect the test statistics for + differences. This requires that the models really are nested so + that residual deviance certainly decreases in bigger model + (testing theory requires nesting, but this is commonly violated by + users: here nesting is necessary). + +Version 2.1-36 (closed October 14, 2013) + * opened with the release of vegan 2.0-9. * decostand(..., "normalize") uses now .Machine$double.eps to From noreply at r-forge.r-project.org Mon Oct 14 13:24:10 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 14 Oct 2013 13:24:10 +0200 (CEST) Subject: [Vegan-commits] r2634 - pkg/vegan/R Message-ID: <20131014112410.2D3B4184E39@r-forge.r-project.org> Author: jarioksa Date: 2013-10-14 13:24:09 +0200 (Mon, 14 Oct 2013) New Revision: 2634 Modified: pkg/vegan/R/permutest.cca.R Log: return something sensible with insensible input of no constraints Modified: pkg/vegan/R/permutest.cca.R =================================================================== --- pkg/vegan/R/permutest.cca.R 2013-10-14 07:40:57 UTC (rev 2633) +++ pkg/vegan/R/permutest.cca.R 2013-10-14 11:24:09 UTC (rev 2634) @@ -9,6 +9,16 @@ model = c("reduced", "direct", "full"), first = FALSE, strata = NULL, parallel = getOption("mc.cores") , ...) { + ## do something sensible with insensible input (no constraints) + if (is.null(x$CCA)) { + sol <- list(call = match.call(), testcall = x$call, model = NA, + F.0 = NA, F.perm = NA, chi = c(0, x$CA$tot.chi), + num = 0, den = 0, df = c(0, nrow(x$CA$u) - 1), + nperm = 0, method = x$method, first = FALSE, + Random.seed = NA) + class(sol) <- "permutest.cca" + return(sol) + } model <- match.arg(model) isCCA <- !inherits(x, "rda") isPartial <- !is.null(x$pCCA) From noreply at r-forge.r-project.org Mon Oct 14 13:24:54 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 14 Oct 2013 13:24:54 +0200 (CEST) Subject: [Vegan-commits] r2635 - pkg/vegan/R Message-ID: <20131014112454.15138184E39@r-forge.r-project.org> Author: jarioksa Date: 2013-10-14 13:24:53 +0200 (Mon, 14 Oct 2013) New Revision: 2635 Modified: pkg/vegan/R/permutest.ccalist.R Log: permutest.ccalist also accepts a null model with no constraints for comparison Modified: pkg/vegan/R/permutest.ccalist.R =================================================================== --- pkg/vegan/R/permutest.ccalist.R 2013-10-14 11:24:09 UTC (rev 2634) +++ pkg/vegan/R/permutest.ccalist.R 2013-10-14 11:24:53 UTC (rev 2635) @@ -50,6 +50,8 @@ pscale <- mods[[big]]$den/resdf[big] ## Numerator of F pfvals <- sapply(mods, function(z) z$num) + if (is.list(pfvals)) + pfvals <- do.call(cbind, pfvals) pfvals <- apply(pfvals, 1, diff) ## dropped to vector? if (!is.matrix(pfvals)) @@ -66,7 +68,7 @@ ## Collect header information formulae <- sapply(x, function(z) deparse(formula(z))) head <- paste0("Permutation tests for ", x[[1]]$method, " under ", - mods[[1]]$model, " model\nwith ", nperm, + mods[[big]]$model, " model\nwith ", nperm, " permutations\n") topnote <- paste("Model ", format(1L:nmodels), ": ", formulae, sep = "", collapse = "\n") From noreply at r-forge.r-project.org Tue Oct 15 15:52:36 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 15 Oct 2013 15:52:36 +0200 (CEST) Subject: [Vegan-commits] r2636 - pkg/vegan/R Message-ID: <20131015135237.141311858CE@r-forge.r-project.org> Author: jarioksa Date: 2013-10-15 15:52:36 +0200 (Tue, 15 Oct 2013) New Revision: 2636 Modified: pkg/vegan/R/permutest.ccalist.R Log: sanity checks for input in permutest.ccalist Modified: pkg/vegan/R/permutest.ccalist.R =================================================================== --- pkg/vegan/R/permutest.ccalist.R 2013-10-14 11:24:53 UTC (rev 2635) +++ pkg/vegan/R/permutest.ccalist.R 2013-10-15 13:52:36 UTC (rev 2636) @@ -12,12 +12,30 @@ x <- c(list(x), dotargs) } nmodels <- length(x) - ## No. of observations and check + ## check that input is valid + ## 1. All models must be fitted with the same method + method <- sapply(x, function(z) z$method) + if (!all(method == method[1])) + stop("same ordination method must be used in all models") + else + method <- method[1] + ## 2. Same response + resp <- sapply(x, function(z) deparse(formula(z)[[2]])) + if (!all(resp == resp[1])) + stop("response must be same in all models") + ## 3. Same no. of observations N <- sapply(x, nobs) if (!all(N = N[1])) - stop("models have different numbers of observations") + stop("number of observations must be same in all models") else N <- N[1] + ## 4. Terms must be nested + trms <- lapply(x, function(z) labels(terms(z))) + o <- order(sapply(trms, length)) + for(i in 2:nmodels) + if(!all(trms[[o[i-1]]] %in% trms[[o[i]]])) + stop("models must be nested") + ## Create permutation matrix if it does not exist. FIXME: should ## take arguments for restricted permutation if (length(permutations) == 1) From noreply at r-forge.r-project.org Tue Oct 15 16:01:11 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 15 Oct 2013 16:01:11 +0200 (CEST) Subject: [Vegan-commits] r2637 - in pkg/vegan: R inst Message-ID: <20131015140111.8F4CC185DD8@r-forge.r-project.org> Author: jarioksa Date: 2013-10-15 16:01:11 +0200 (Tue, 15 Oct 2013) New Revision: 2637 Added: pkg/vegan/R/anova.ccalist.R Removed: pkg/vegan/R/permutest.ccalist.R Modified: pkg/vegan/inst/ChangeLog Log: rename permutest.ccalist to anova.ccalist: function returns an 'anova' table Copied: pkg/vegan/R/anova.ccalist.R (from rev 2636, pkg/vegan/R/permutest.ccalist.R) =================================================================== --- pkg/vegan/R/anova.ccalist.R (rev 0) +++ pkg/vegan/R/anova.ccalist.R 2013-10-15 14:01:11 UTC (rev 2637) @@ -0,0 +1,94 @@ +`anova.ccalist` <- + function(object, ..., permutations = 99) +{ + ## Collect cca class objects. FIXME: Eventually this should be in + ## a function that calls permutest.ccalist after collecting model + ## objects from dotargs. + dotargs <- list(...) + if (length(dotargs)) { + isCCA <- sapply(dotargs, function(z) inherits(z, "cca")) + dotargs <- dotargs[isCCA] + if (length(dotargs)) + object <- c(list(object), dotargs) + } + nmodels <- length(object) + ## check that input is valid + ## 1. All models must be fitted with the same method + method <- sapply(object, function(z) z$method) + if (!all(method == method[1])) + stop("same ordination method must be used in all models") + else + method <- method[1] + ## 2. Same response + resp <- sapply(object, function(z) deparse(formula(z)[[2]])) + if (!all(resp == resp[1])) + stop("response must be same in all models") + ## 3. Same no. of observations + N <- sapply(object, nobs) + if (!all(N = N[1])) + stop("number of observations must be same in all models") + else + N <- N[1] + ## 4. Terms must be nested + trms <- lapply(object, function(z) labels(terms(z))) + o <- order(sapply(trms, length)) + for(i in 2:nmodels) + if(!all(trms[[o[i-1]]] %in% trms[[o[i]]])) + stop("models must be nested") + + ## Create permutation matrix if it does not exist. FIXME: should + ## take arguments for restricted permutation + if (length(permutations) == 1) + permutations <- shuffleSet(N, permutations) + ## permutations is now matrix + nperm <- nrow(permutations) + ## check + if (ncol(permutations) != N) + stop(gettextf("permutation matrix has %d columns, but you have %d sites", + ncol(nperm), N)) + ## All models are evaluated in permutest.cca with identical + ## permutations so that the differences of single permutations can + ## be used to assess the significance of differences of fitted + ## models. This strictly requires nested models (not checked + ## here): all terms of the smaller model must be included in the + ## larger model. FIXME: should pass arguments to permutest.cca. + mods <- lapply(object, function(z) + permutest(z, permutations = permutations)) + dfs <- sapply(mods, function(z) z$df) + dev <- sapply(mods, function(z) z$chi) + resdf <- dfs[2,] + df <- -diff(resdf) + resdev <- dev[2,] + changedev <- -diff(resdev) + big <- which.min(resdf) + scale <- resdev[big]/resdf[big] + fval <- changedev/df/scale + ## Collect permutation results: denominator of F varies in each + ## permutation. + pscale <- mods[[big]]$den/resdf[big] + ## Numerator of F + pfvals <- sapply(mods, function(z) z$num) + if (is.list(pfvals)) + pfvals <- do.call(cbind, pfvals) + pfvals <- apply(pfvals, 1, diff) + ## dropped to vector? + if (!is.matrix(pfvals)) + pfvals <- matrix(pfvals, nrow=1, ncol=nperm) + pval <- rowSums(sweep(pfvals, 1, fval, ">=")) + pval <- (pval + 1)/(nperm+1) + pfvals <- sweep(pfvals, 1, df, "/") + pfvals <- sweep(pfvals, 2, pscale, "/") + ## collect table + table <- data.frame(resdf, resdev, c(NA, df), + c(NA,changedev), c(NA,fval), c(NA,pval)) + dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Res. Chisq", + "Df", "Chisq", "F", "Pr(>F)")) + ## Collect header information + formulae <- sapply(object, function(z) deparse(formula(z))) + head <- paste0("Permutation tests for ", method, " under ", + mods[[big]]$model, " model\nwith ", nperm, + " permutations\n") + topnote <- paste("Model ", format(1L:nmodels), ": ", formulae, + sep = "", collapse = "\n") + structure(table, heading=c(head,topnote), class = c("anova", "data.frame")) +} Deleted: pkg/vegan/R/permutest.ccalist.R =================================================================== --- pkg/vegan/R/permutest.ccalist.R 2013-10-15 13:52:36 UTC (rev 2636) +++ pkg/vegan/R/permutest.ccalist.R 2013-10-15 14:01:11 UTC (rev 2637) @@ -1,94 +0,0 @@ -`permutest.ccalist` <- - function(x, ..., permutations = 99) -{ - ## Collect cca class objects. FIXME: Eventually this should be in - ## a function that calls permutest.ccalist after collecting model - ## objects from dotargs. - dotargs <- list(...) - if (length(dotargs)) { - isCCA <- sapply(dotargs, function(z) inherits(z, "cca")) - dotargs <- dotargs[isCCA] - if (length(dotargs)) - x <- c(list(x), dotargs) - } - nmodels <- length(x) - ## check that input is valid - ## 1. All models must be fitted with the same method - method <- sapply(x, function(z) z$method) - if (!all(method == method[1])) - stop("same ordination method must be used in all models") - else - method <- method[1] - ## 2. Same response - resp <- sapply(x, function(z) deparse(formula(z)[[2]])) - if (!all(resp == resp[1])) - stop("response must be same in all models") - ## 3. Same no. of observations - N <- sapply(x, nobs) - if (!all(N = N[1])) - stop("number of observations must be same in all models") - else - N <- N[1] - ## 4. Terms must be nested - trms <- lapply(x, function(z) labels(terms(z))) - o <- order(sapply(trms, length)) - for(i in 2:nmodels) - if(!all(trms[[o[i-1]]] %in% trms[[o[i]]])) - stop("models must be nested") - - ## Create permutation matrix if it does not exist. FIXME: should - ## take arguments for restricted permutation - if (length(permutations) == 1) - permutations <- shuffleSet(N, permutations) - ## permutations is now matrix - nperm <- nrow(permutations) - ## check - if (ncol(permutations) != N) - stop(gettextf("permutation matrix has %d columns, but you have %d sites", - ncol(nperm), N)) - ## All models are evaluated in permutest.cca with identical - ## permutations so that the differences of single permutations can - ## be used to assess the significance of differences of fitted - ## models. This strictly requires nested models (not checked - ## here): all terms of the smaller model must be included in the - ## larger model. FIXME: should pass arguments to permutest.cca. - mods <- lapply(x, function(z) - permutest(z, permutations = permutations)) - dfs <- sapply(mods, function(z) z$df) - dev <- sapply(mods, function(z) z$chi) - resdf <- dfs[2,] - df <- -diff(resdf) - resdev <- dev[2,] - changedev <- -diff(resdev) - big <- which.min(resdf) - scale <- resdev[big]/resdf[big] - fval <- changedev/df/scale - ## Collect permutation results: denominator of F varies in each - ## permutation. - pscale <- mods[[big]]$den/resdf[big] - ## Numerator of F - pfvals <- sapply(mods, function(z) z$num) - if (is.list(pfvals)) - pfvals <- do.call(cbind, pfvals) - pfvals <- apply(pfvals, 1, diff) - ## dropped to vector? - if (!is.matrix(pfvals)) - pfvals <- matrix(pfvals, nrow=1, ncol=nperm) - pval <- rowSums(sweep(pfvals, 1, fval, ">=")) - pval <- (pval + 1)/(nperm+1) - pfvals <- sweep(pfvals, 1, df, "/") - pfvals <- sweep(pfvals, 2, pscale, "/") - ## collect table - table <- data.frame(resdf, resdev, c(NA, df), - c(NA,changedev), c(NA,fval), c(NA,pval)) - dimnames(table) <- list(1L:nmodels, c("Resid. Df", "Res. Chisq", - "Df", "Chisq", "F", "Pr(>F)")) - ## Collect header information - formulae <- sapply(x, function(z) deparse(formula(z))) - head <- paste0("Permutation tests for ", x[[1]]$method, " under ", - mods[[big]]$model, " model\nwith ", nperm, - " permutations\n") - topnote <- paste("Model ", format(1L:nmodels), ": ", formulae, - sep = "", collapse = "\n") - structure(table, heading=c(head,topnote), class = c("anova", "data.frame")) -} Modified: pkg/vegan/inst/ChangeLog =================================================================== --- pkg/vegan/inst/ChangeLog 2013-10-15 13:52:36 UTC (rev 2636) +++ pkg/vegan/inst/ChangeLog 2013-10-15 14:01:11 UTC (rev 2637) @@ -4,17 +4,19 @@ Version 2.1-37 (opened October 14, 2013) - * permutest.cca: added new function permutest.ccalist() to compare - a sequence of models. The function is still experimental ("proof - of the concept") and unexported. If this stays in vegan, it should - eventually be called from anova.cca() or permutest.cca(). This - would bring along a change of API to, say, anova.cca(object, ..., - alpha=...): the dots must follow the first argument which turns of - positional and partial matching of arguments so that the function - can collect the "cca" models. We must decide whether the new - function is worth such a change that can make life harder for - ordinary users. + * anova.cca: added new function anova.ccalist() to compare a + sequence of models. The function is still experimental ("proof of + the concept") and unexported. If this stays in vegan, it should + eventually be called from anova.cca(). This would bring along a + change of API to anova.cca(object, ..., alpha=...): the dots must + follow the first argument which turns of positional and partial + matching of arguments so that the function can collect the "cca" + models. We must decide whether the new function is worth such a + change that can make life harder for ordinary users. + One potential advantage is that the code in anova.ccaby* functions + could be simplifed to a anova.ccabylist() calls. + The function is based on calling permutest.cca for each model with identical permutations. We can then compare the change in model for each permutation and collect the test statistics for From noreply at r-forge.r-project.org Thu Oct 17 09:46:58 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 17 Oct 2013 09:46:58 +0200 (CEST) Subject: [Vegan-commits] r2638 - pkg/vegan/man Message-ID: <20131017074658.27DF5185025@r-forge.r-project.org> Author: jarioksa Date: 2013-10-17 09:46:57 +0200 (Thu, 17 Oct 2013) New Revision: 2638 Modified: pkg/vegan/man/oecosimu.Rd Log: fix typo in oecosimu.Rd (reported by Jimmy O'Donnell, UCSC) Modified: pkg/vegan/man/oecosimu.Rd =================================================================== --- pkg/vegan/man/oecosimu.Rd 2013-10-15 14:01:11 UTC (rev 2637) +++ pkg/vegan/man/oecosimu.Rd 2013-10-17 07:46:57 UTC (rev 2638) @@ -186,7 +186,7 @@ Function \code{oecosimu} returns an object of class \code{"oecosimu"}. The result object has items \code{statistic} and \code{oecosimu}. The \code{statistic} contains the complete object - returned by \code{nestedfun} for the original data. The + returned by \code{nestfun} for the original data. The \code{oecosimu} component contains the following items: \item{statistic}{Observed values of the statistic.} \item{simulated}{Simulated values of the statistic.} From noreply at r-forge.r-project.org Thu Oct 17 12:47:48 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 17 Oct 2013 12:47:48 +0200 (CEST) Subject: [Vegan-commits] r2639 - in pkg/lmodel2: . inst vignettes Message-ID: <20131017104748.3FDBB184FA5@r-forge.r-project.org> Author: jarioksa Date: 2013-10-17 12:47:47 +0200 (Thu, 17 Oct 2013) New Revision: 2639 Added: pkg/lmodel2/vignettes/ pkg/lmodel2/vignettes/lmodel2.bib pkg/lmodel2/vignettes/mod2user.Rnw Removed: pkg/lmodel2/inst/doc/ Log: move vignettes from inst/doc/ to vignettes/ Copied: pkg/lmodel2/vignettes/lmodel2.bib (from rev 2638, pkg/lmodel2/inst/doc/lmodel2.bib) =================================================================== --- pkg/lmodel2/vignettes/lmodel2.bib (rev 0) +++ pkg/lmodel2/vignettes/lmodel2.bib 2013-10-17 10:47:47 UTC (rev 2639) @@ -0,0 +1,86 @@ + at Book{Legendre-Legendre98, + author = {P. Legendre and L. Legendre}, + title = {Numerical ecology}, + publisher = {Elsevier}, + year = 1998, + number = 20, + series = {Developments in Environmental Modelling}, + address = {Amsterdam}, + edition = {2nd} +} + at Article{AnderLeg99, + author = {M. J. Anderson and P. Legendre}, + title = {An empirical comparison of permutation methods for + tests of partial regression coefficients in a linear + model}, + journal = {Journal of Statistical Computation and Simulation}, + year = 1999, + volume = 62, + pages = {271-303} +} + at Article{Hines.ea97, + author = {A.H. Hines and R. B. Whitlatch and S. F. Thrush and + J. E. Hewitt and V. J. Cummings and P. K. Dayton and + P. Legendre}, + title = { Nonlinear foraging response of a large marine + predator to benthic prey: eagle ray pits and + bivalves in a {N}ew {Z}ealand sandflat}, + journal = {Journal of Experimental Marine Biology and Ecology}, + year = 1997, + volume = 216, + pages = {191-210} +} + at Article{Mesple.ea96, + author = {F. Mespl{\'e} and M. Troussellier and C. Casellas and + P. Legendre}, + title = {Evaluation of simple statistical criteria to qualify + a simulation}, + journal = {Ecological Modelling}, + year = 1996, + volume = 88, + pages = {9-18} +} + at Book{SokalRohlf95, + author = {R. R. Sokal and F. J. Rohlf}, + title = {Biometry: The principles and practice of statistics + in biological research}, + publisher = {W. H. Freeman}, + year = 1995, + edition = {3rd} +} + at Article{Jolicoeur90, + author = {P. Jolicoeur}, + title = {Bivariate allometry: interval estimation of the + slopes of the ordinary and standardized normal major + axes and structural relationship}, + journal = {Journal of Theoretical Biology}, + year = 1990, + volume = 144, + pages = {275-285} +} + at Article{JoliMosi68, + author = {P. Jolicoeur and J. E. Mosiman}, + title = {Intervalles de confiance pour la pente de l'axe + majeur d'une distribution normale bidimensionnelle}, + journal = {Biom{\'e}trie-Praxim{\'e}trie}, + year = 1968, + volume = 9, + pages = {121-140} +} + at Article{McArdle88, + author = {B. McArdle}, + title = {The structural relationship: regression in biology}, + journal = {Canadian Journal of Zoology}, + year = 1988, + volume = 66, + pages = {2329-2339} +} + at Book{Neter.ea96, + author = {J. Neter and M. H. Kutner and C. J. Nachtsheim and + W. Wasserman}, + title = {Applied linear statistical models}, + publisher = {Richad D. Irwin Inc.}, + year = 1996, + edition = {4th} +} + Copied: pkg/lmodel2/vignettes/mod2user.Rnw (from rev 2638, pkg/lmodel2/inst/doc/mod2user.Rnw) =================================================================== --- pkg/lmodel2/vignettes/mod2user.Rnw (rev 0) +++ pkg/lmodel2/vignettes/mod2user.Rnw 2013-10-17 10:47:47 UTC (rev 2639) @@ -0,0 +1,750 @@ +% -*- mode: noweb; noweb-default-code-mode: R-mode; -*- +%\VignetteIndexEntry{Model II Regression User Guide} +\documentclass[a4paper,10pt,reqno]{amsart} +\usepackage{ucs} +\usepackage[utf8x]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[authoryear,round]{natbib} +\usepackage{hyperref} +\usepackage{graphicx} +\usepackage{tikz} +\usepackage{sidecap} +\setlength{\captionindent}{0pt} +\usepackage{url} +\usepackage{booktabs} +\usepackage{alltt} +\renewcommand{\floatpagefraction}{0.8} +\usepackage{ae} + +\title{Model II regression user's guide, R edition} + +\author{Pierre Legendre} + +\address{D{\'e}partement de sciences biologiques, Universit{\'e} de +Montr{\'e}al, C.P. 6128, succursale Centre-ville, Montr{\'e}al, +Qu{\'e}bec H3C 3J7, Canada} +\email{Pierre.Legendre at umontreal.ca} + +\date{$ $Id$ $} + +\begin{document} + +\setkeys{Gin}{width=0.55\linewidth} +\SweaveOpts{strip.white=true} +<>= +require(lmodel2) +options(width=72) +figset <- function() par(mar=c(4,4,1,1)+.1) +options(SweaveHooks = list(fig = figset)) +@ + + +\maketitle + +\tableofcontents + +Function \texttt{lmodel2} computes model II simple linear regression +using the following methods: major axis (MA), standard major axis +(SMA), ordinary least squares (OLS), and ranged major axis +(RMA). Information about these methods is available, for instance, in +section 10.3.2 of \citet{Legendre-Legendre98} and in sections 14.13 +and 15.7 of \citet{SokalRohlf95}\footnote{In Sokal and Rohlf + (Biometry, 2nd edition, 1981: 551), the numerical result for MA + regression for the example data set is wrong. The mistake has been + corrected in the 1995 edition.}. Parametric 95\% confidence +intervals are computed for the slope and intercept parameters. A +permutation test is available to determine the significance of the +slopes of MA, OLS and RMA and also for the correlation +coefficient. This function represents an evolution of a +\textsc{Fortran} program written in 2000 and 2001. + +Bartlett's three-group model II regression method, described by the +above mentioned authors, is not computed by the program because it +suffers several drawbacks. Its main handicap is that the regression +lines are not the same depending on whether the grouping (into three +groups) is made based on $x$ or $y$. The regression line is not +guaranteed to pass through the centroid of the scatter of points and +the slope estimator is not symmetric, i.e. the slope of the regression +$y = f(x)$ is not the reciprocal of the slope of the regression $x = +f(y)$. + +Model II regression should be used when the two variables in the +regression equation are random, i.e. not controlled by the +researcher. Model I regression using least squares underestimates the +slope of the linear relationship between the variables when they both +contain error; see example in chapter \ref{sec:exa4} +(p. \pageref{sec:exa4}). Detailed recommendations follow. + +\section{Recommendations on the use of model II regression methods} + +Considering the results of simulation studies, +\citet{Legendre-Legendre98} offer the following recommendations to +ecologists who have to estimate the parameters of the functional +linear relationships between variables that are random and measured +with error (Table \ref{tab:recommend}). + +\begin{table} +\label{tab:recommend} + \caption{Application of the model II regression methods. The numbers + in the left-hand column refer to the corresponding paragraphs in + the text.} +\begin{Small} +\begin{tabular}{lllc} +\toprule +Par.& Method &Conditions of application& Test possible\\ +\midrule +1 &OLS &Error on $y \gg$ error on $x$ &Yes\\ +3 &MA& Distribution is bivariate normal & Yes \\ +& &Variables are in the same physical units or dimensionless \\ +& &Variance of error about the same for $x$ and $y$ \\ + +4 & & +Distribution is bivariate normal\\ +& &Error variance on each axis proportional to variance of\\ +& &corresponding variable \\ + +4a & RMA & +Check scatter diagram: no outlier & Yes\\ +4b &SMA& Correlation $r$ is significant& No\\ +5 &OLS& Distribution is not bivariate normal& Yes\\ +& &Relationship between $x$ and $y$ is linear\\ +6 &OLS& To compute forecasted (fitted) or predicted $y$ values& Yes\\ +& &(Regression equation and confidence intervals are irrelevant) \\ +7& MA& To compare observations to model predictions &Yes\\ +\bottomrule +\end{tabular} +\end{Small} +\end{table} + +\begin{enumerate} +\item If the magnitude of the random variation (i.e. the error + variance\footnote{Contrary to the sample variance, the error + variance on $x$ or $y$ cannot be estimated from the data. An + estimate can only be made from knowledge of the way the variables + were measured.}) on the response variable $y$ is much larger + (i.e. more than three times) than that on the explanatory variable + $x$, use OLS. Otherwise, proceed as follows. +\item Check whether the data are approximately bivariate normal, + either by looking at a scatter diagram or by performing a formal + test of significance. If not, attempt transformations to render them + bivariate normal. For data that are or can be made to be reasonably + bivariate normal, consider recommendations 3 and 4. If not, see + recommendation 5. +\item For bivariate normal data, use major axis (MA) regression if + both variables are expressed in the same physical units + (untransformed variables that were originally measured in the same + units) or are dimensionless (e.g. log-transformed variables), if it + can reasonably be assumed that the error variances of the variables + are approximately equal. + + When no information is available on the ratio of the error variances + and there is no reason to believe that it would differ from 1, MA + may be used provided that the results are interpreted with + caution. MA produces unbiased slope estimates and accurate + confidence intervals \citep{Jolicoeur90}. + + MA may also be used with dimensionally heterogeneous variables when + the purpose of the analysis is (1) to compare the slopes of the + relationships between the same two variables measured under + different conditions (e.g. at two or more sampling sites), or (2) to + test the hypothesis that the major axis does not significantly + differ from a value given by hypothesis (e.g. the relationship $E = + b_1 m$ where, according to the famous equation of Einstein, $b_1 = + c^2$, $c$ being the speed of light in vacuum). + +\item For bivariate normal data, if MA cannot be used because the + variables are not expressed in the same physical units or the error + variances on the two axes differ, two alternative methods are + available to estimate the parameters of the functional linear + relationship if it can reasonably be assumed that the error variance + on each axis is proportional to the variance of the corresponding + variable, i.e. (the error variance of $y$ / the sample variance of + $y$) (the error variance of $x$ / the sample variance of $x$). This + condition is often met with counts (e.g. number of plants or + animals) or log-transformed data \citep{McArdle88}. + +\begin{enumerate} +\item Ranged major axis regression (RMA) can be used. The method is + described below. Prior to RMA, one should check for the presence of + outliers, using a scatter diagram of the objects. +\item Standard major axis regression (SMA) can be used. One should + first test the significance of the correlation coefficient ($r$) to + determine if the hypothesis of a relationship is supported. No SMA + regression equation should be computed when this condition is not + met. + + This remains a less-than-ideal solution since SMA slope estimates + cannot be tested for significance. Confidence intervals should also + be used with caution: simulations have shown that, as the slope + departs from $\pm 1$, the SMA slope estimate is increasingly biased + and the confidence interval includes the true value less and less + often. Even when the slope is near $\pm 1$ (e.g. example \S + \ref{sec:exa5}), the confidence interval is too narrow if $n$ is + very small or if the correlation is weak. +\end{enumerate} + +\item If the distribution is not bivariate normal and the data cannot + be transformed to satisfy that condition (e.g. if the distribution + possesses two or several modes), one should wonder whether the slope + of a regression line is really an adequate model to describe the + functional relationship between the two variables. Since the + distribution is not bivariate normal, there seems little reason to + apply models such as MA, SMA or RMA, which primarily describe the + first principal component of a bivariate normal distribution. So, + (1) if the relationship is linear, OLS is recommended to estimate + the parameters of the regression line. The significance of the slope + should be tested by permutation, however, because the distributional + assumptions of parametric testing are not satisfied. (2) If a + straight line is not an appropriate model, polynomial or nonlinear + regression should be considered. + +\item When the purpose of the study is not to estimate the parameters + of a functional relationship, but simply to forecast or predict + values of $y$ for given $x$'s, use OLS in all cases. OLS is the + only method that minimizes the squared residuals in y. The OLS + regression line itself is meaningless. Do not use the standard error + and confidence bands, however, unless $x$ is known to be free of + error \citep[p. 545, Table 14.3]{SokalRohlf95}; this warning applies + in particular to the 95\% confidence intervals computed for OLS by + this program. +\item Observations may be compared to the predictions of a + statistical or deterministic model (e.g. simulation model) in order + to assess the quality of the model. If the model contains random + variables measured with error, use MA for the comparison since + observations and model predictions should be in the same units. + + If the model fits the data well, the slope is expected to be $1$ and + the intercept $0$. A slope that significantly differs from $1$ + indicates a difference between observed and simulated values which + is proportional to the observed values. For relative-scale + variables, an intercept which significantly differs from $0$ + suggests the existence of a systematic difference between + observations and simulations \citep{Mesple.ea96}. + +\item With all methods, the confidence intervals are large when n is + small; they become smaller as n goes up to about 60, after which + they change much more slowly. Model II regression should ideally be + applied to data sets containing 60 observations or more. Some of the + examples presented below have fewer observations; they are only + presented for illustration. +\end{enumerate} + +\section{Ranged major axis regression} + +Ranged major axis regression (RMA) is described in +\citep[511--512]{Legendre-Legendre98}. It is computed as follows: + +\begin{enumerate} +\item Transform the $y$ and $x$ variables into $y'$ and $x'$, + respectively, whose range is 1. Two formulas are available for + ranging, depending on the nature of the variables: + \begin{itemize} + \item For variables whose variation is expressed relative to an + arbitrary zero (interval-scale variables, e.g. temperature in + $^{\circ}$C), the formula for ranging is: +\begin{equation} +\label{eq:range1} +y'_i = \frac{y_i - y_{\min}}{y_{\max} - y_{\min}} \quad \text{or} \quad +x'_i = \frac{x_i - x_{\min}}{x_{\max}-x_{\min}} +\end{equation} + +\item For variables whose variation is expressed relative to a true + zero value (ratio-scale or relative-scale variables, e.g. species + abundances, or temperature expressed in $^{\circ}$K), the recommended + formula for ranging assumes a minimum value of $0$; + eq. \ref{eq:range1} reduces to: +\begin{equation} +y'_i = \frac{y_i}{y_{\max}} \quad \text{or} \quad +x'_i = \frac{x_i}{x_{\max}} +\end{equation} +\end{itemize} +\item Compute MA regression between the ranged variables $y'$ and + $x'$. Test the significance of the slope estimate by permutation if + needed. +\item Back-transform the estimated slope, as well as its confidence + interval limits, to the original units by multiplying them by the + ratio of the ranges: +\begin{equation} +b_1 = b'_1 \frac{y_{\max} - y_{\min}}{x_{\max} - x_{\min}} +\end{equation} + +\item Recompute the intercept $b_0$ and its confidence interval + limits, using the original centroid ($\bar x$, $\bar y$) of the + scatter of points and the estimates of the slope $b_1$ and its + confidence limits: +\begin{equation} +b_0= \bar y - b_1 \bar x +\end{equation} +\end{enumerate} + +The RMA slope estimator has several desirable properties when the +variables $x$ and $y$ are not expressed in the same units or when the +error variances on the two axes differ. (1) The slope estimator +scales proportionally to the units of the two variables: the position +of the regression line in the scatter of points remains the same +irrespective of any linear change of scale of the variables. (2) The +estimator is sensitive to the covariance of the variables; this is not +the case for SMA. (3) Finally, and contrary to SMA, it is possible to +test the hypothesis that an RMA slope estimate is equal to a stated +value, in particular 0 or 1. As in MA, the test may be done either by +permutation, or by comparing the confidence interval of the slope to +the hypothetical value of interest. Thus, whenever MA regression +cannot be used because of incommensurable units or because the error +variances on the two axes differ, RMA regression can be used. There is +no reason, however, to use RMA when MA is justified. + +Prior to RMA, one should check for the presence of outliers, using a +scatter diagram of the objects. RMA should not be used in the presence +of outliers because they cause important changes to the estimates of +the ranges of the variables. Outliers that are not aligned fairly well +with the dispersion ellipse of the objects may have an undesirable +influence on the slope estimate. The identification and treatment of +outliers is discussed in \citep[Section 13.4]{SokalRohlf95}. Outliers +may, in some cases, be eliminated from the data set, or they may be +subjected to a winsorizing procedure described by these authors. + +\section{Input file} + +A data frame with objects in rows and variables in columns. + +\section{Output file} + +The output file obtained by \texttt{print.lmodel2} contains the +following results: + +\begin{enumerate} +\item The call to the function. +\item General regression statistics: number of objects ($n$), + correlation coefficient ($r$), coefficient of determination ($r^2$) + of the OLS regression, parametric $P$-values (2-tailed, one-tailed) + for the test of the correlation coefficient and the OLS slope, angle + between the two OLS regression lines, \texttt{lm(y \~\ x)} and + \texttt{lm(x \~\ y)}. +\item A table with rows corresponding to the four regression + methods. Column 1 gives the method name, followed by the intercept + and slope estimates, the angle between the regression line and + the abscissa, and the permutational probability (one-tailed, for the + tail corresponding to the sign of the slope estimate). + +\item A table with rows corresponding to the four regression + methods. The method name is followed by the parametric 95\% + confidence intervals (2.5 and 97.5 percentiles) for the intercept + and slope estimates. +\item The eigenvalues of the bivariate dispersion, computed during + major axis regression, and the $H$ statistic used for computing the + confidence interval of the major axis slope (notation following + \citealt{SokalRohlf95}). + + For the slopes of MA, OLS and RMA, the permutation tests are carried + out using the slope estimates $b$ themselves as the reference + statistics. In OLS simple linear regression, a permutation test of + significance based on the $r$ statistic is equivalent to a + permutation test based on the pivotal $t$-statistic associated with + $b_{OLS}$ \citep[21]{Legendre-Legendre98}. On the other hand, across + the permutations, the slope estimate ($b_{OLS}$) differs from $r$ by a + constant ($s_y/s_x$) since $b_{OLS} = r_{xy} s_y/s_x$, so that $b_{OLS}$ and + $r$ are equivalent statistics for permutation testing. As a + consequence, a permutation test of $b_{OLS}$ is equivalent to a + permutation test carried out using the pivotal $t$-statistic + associated with $b_{OLS}$. This is not the case in multiple linear + regression, however, as shown by \citet{AnderLeg99}. +\end{enumerate} + +If the objective is simply to assess the relationship between the two +variables under study, one can simply compute the correlation +coefficient r and test its significance. A parametric test can be used +when the assumption of binormality can safely be assumed to hold, or a +permutation test when it cannot. + +For the intercept of OLS, the confidence interval is computed using +the standard formulas found in textbooks of statistics; results are +identical to those of standard statistical software. No such formula, +providing correct $\alpha$-coverage, is known for the other three +methods. In the program, the confidence intervals for the intercepts +of MA, SMA and RMA are computed by projecting the bounds of the +confidence intervals of the slopes onto the ordinate; this results in +an underestimation of these confidence intervals. + +\begin{figure} + \centering +\resizebox{\linewidth}{!}{ + \begin{tikzpicture} + \draw[->, very thick] (0,0) -- (6,0); + \draw[<-, very thick] (3,3) -- (3,-3); + \draw[->, very thick] (8,0) -- (14, 0); + \draw[->, very thick] (11,-3) -- (11,3); + \draw[->, thick] (6.8,-2) -- (8.6,-2); + \draw[<-, thick] (7.7,-1.1) -- (7.7,-2.8); + \draw[font=\footnotesize] (8.2,-1.6) node {I}; + \draw[font=\footnotesize] (7.2,-1.6) node {II}; + \draw[font=\footnotesize] (7.2,-2.4) node {III}; + \draw[font=\footnotesize] (8.2,-2.4) node {IV}; + \draw[dashed] (3,0) -- (3+3/2.75,3) node[above]{$b_{1 \inf} = 2.75$}; + \draw (3,0) -- (3-3/2.75,-3) node[at start,sloped,above]{Lower bound + of CI}; + \draw (3,0) -- (3+3/5.67,-3) node[midway,sloped,above]{MA + regression line} node[below]{$b_1 = -5.67$}; + \draw (3,0) -- (3+2.7/1.19, -2.7) node[midway,sloped,above]{Upper + bound of CI} node[below]{$b_{1 \sup}=-1.19$}; + \draw (11,0) -- (11-3/5.67,3); + \draw[dashed] (11,0) -- (11+3/5.67,-3) + node[at start,sloped,below]{Upper bound of CI} node[below]{$b_{1 \sup} + = -5.67$}; + \draw (11,0) -- (11+3/2.75, 3) node[near end,sloped,above]{MA + regression line} node[right]{$b_1 = 2.75$}; + \draw (11,0) -- (11+2/0.84, 2) node[near end,sloped,above]{Lower + bound of CI} node[right]{$b_{1 \inf} =0.84$}; + \draw[font=\large] (0,3) node{(a)}; + \draw[font=\large] (8,3) node{(b)}; + \end{tikzpicture} +} + \caption{(a) If a MA regression line has the lower bound of its + confidence interval (C.I.) in quadrant III, this bound has a + positive slope ($+2.75$ in example). (b) Likewise, if a MA + regression line has the upper bound of its confidence interval in + quadrant II, this bound has a negative slope ($-5.67$ in example).} + \label{fig:rma} +\end{figure} +In MA or RMA regression, the bounds of the confidence interval (C.I.) +of the slope may, on occasions, lie outside quadrants I and IV of the +plane centred on the centroid of the bivariate distribution. When the +\emph{lower bound} of the confidence interval corresponds to a line in +quadrant III (Fig. \ref{fig:rma}a), it has a positive slope; the RMA +regression line of example in chapter \ref{sec:exa5} +(p. \pageref{sec:exa5}) provides an example of this +phenomenon. Likewise, when the \emph{upper bound} of the confidence +interval corresponds to a line in quadrant II (Fig. \ref{fig:rma}b), +it has a negative slope. In other instances, the confidence interval +of the slope may occupy all 360$^{\circ}$ of the plane, which results +in it having no bounds. The bounds are then noted 0.00000; see chapter +\ref{sec:exa5} (p. \pageref{sec:exa5}). + +In SMA or OLS, confidence interval bounds cannot lie outside quadrants +I and IV. In SMA, the regression line always lies at a $+45^{\circ}$ +or $-45^{\circ}$ angle in the space of the standardized variables; the +SMA slope is a back-transformation of $\pm45^{\circ}$ to the units of +the original variables. In OLS, the slope is always at an angle closer +to zero than the major axis of the dispersion ellipse of the points, +i.e. it always underestimates the MA slope in absolute value. + + +\section{Examples} +\subsection{Surgical unit data} +\label{sec:exa1} + +\subsubsection{Input data} + +This example compares observations to the values forecasted by a +model. A hospital surgical unit wanted to forecast survival of +patients undergoing a particular type of liver surgery. Four +explanatory variables were measured on patients. The response variable +Y was survival time, which was $\log_{10}$-transformed. The data are +described in detail in Section 8.2 of \citet{Neter.ea96} who also +provide the original data sets. The data were divided in two groups of +54 patients. The first group was used to construct forecasting models +whereas the second group was reserved for model validation. Several +regression models were studied. One of them, which uses variables $X_3 =$ +enzyme function test score and $X_4$ = liver function test score, is used +as the basis for the present example. The multiple regression equation +is the following: + + +\begin{equation*} +\hat Y = 1.388778 + 0.005653 X_3 + 0.139015 X_4 +\end{equation*} +This equation was applied to the second data set (also 54 patients) to +produce forecasted survival times. In the present example, these +values are compared to the observed survival times. Fig. \ref{fig:ex1} +shows the scatter diagram with $\log_{10}$(observed survival time) in +abscissa and forecasted values in ordinate. The MA regression line is +shown with its 95\% confidence region. The $45^{\circ}$ line, which +would correspond to perfect forecasting, is also shown for comparison. + +\subsubsection{Output file} + +MA, SMA and OLS equations, 95\% C.I., and tests of significance, were +obtained with the following R commands. The RMA method, which is +optional, was not computed since MA is the only appropriate method in +this example. + +<<>>= +data(mod2ex1) +Ex1.res <- lmodel2(Predicted_by_model ~ Survival, data=mod2ex1, nperm=99) +Ex1.res +@ +\begin{SCfigure} +<>= +plot(Ex1.res, centro = TRUE, xlab="log(observed survival time", ylab="Forecasted") +abline(diff(colMeans(mod2ex1)), 1, lty=2) +legend("topleft", c("MA regression", "Confidence limits", "45 degree line"), col=c("red", "grey", "black"), lty=c(1,1,2)) +@ +\caption{Scatter diagram of the 2.5 Example 1 data showing the major + axis (MA) regression line and its 95\% confidence region. The + 45$^{\circ}$ line is drawn for reference. The cross indicates the + centroid of the bivariate distribution. The MA regression line + passes through this centroid.} +\label{fig:ex1} +\end{SCfigure} + +The interesting aspect of the MA regression equation in this example +is that the regression line is not parallel to the 45$^{\circ}$ line +drawn in Fig. \ref{fig:ex1}. The 45$^{\circ}$ line is not included in +the 95\% confidence interval of the MA slope, which goes from +$\tan^{-1}(0.62166) = 31.87$ to $\tan^{-1}(0.89456) = 41.81$. The +Figure shows that the forecasting equation overestimated survival +below the mean and underestimated it above the mean. The OLS +regression line, which is often (erroneously) used by researchers for +comparisons of this type, would show an even greater discrepancy +(33.3$^{\circ}$ angle) from the 45$^{\circ}$ line, compared to the MA +regression line (36.8$^{\circ}$ angle). + +\subsection{Eagle rays and \emph{Macomona} bivalves} +\label{sec:exa2} + +\subsubsection{Input data} + +The following table presents observations at 20 sites from a study on +predator-prey relationships \citep{Hines.ea97}. $y$ is the number of +bivalves (\emph{Macomona liliana}) larger than 15 mm in size, found in +0.25 m$^2$ quadrats of sediment; $x$ is the number of sediment +disturbance pits of a predator, the eagle ray (\emph{Myliobatis + tenuicaudatus}), found within circles of a 15 m radius around the +bivalve quadrats. + + +The variables $x$ and $y$ are expressed in the same physical units and +are estimated with sampling error, and their distribution is +approximately bivariate normal. The error variance is not the same for +$x$ and $y$ but, since the data are animal counts, it seems reasonable +to assume that the error variance along each axis is proportional to +the variance of the corresponding variable. The correlation is +significant: $r = 0.86$, $p < 0.001$. RMA and SMA are thus appropriate +for this data set; MA and OLS are not. Fig. \ref{fig:ex2} shows the +scatter diagram. The various regression lines are presented to allow +their comparison. + +\subsubsection{Output file} + +MA, SMA, OLS and RMA regression equations, confidence intervals, and +tests of significance, were obtained with the following R commands. +That the 95\% confidence intervals of the SMA and RMA intercepts do +not include 0 may be due to different reasons: (1) the relationship +may not be perfectly linear; (2) the C.I. of the intercepts are +underestimated; (3) the predators (eagle rays) may not be attracted to +sampling locations containing few prey (bivalves). +<<>>= +data(mod2ex2) +Ex2.res = lmodel2(Prey ~ Predators, data=mod2ex2, "relative","relative",99) +Ex2.res +@ +\begin{SCfigure} +<>= +plot(Ex2.res, confid=FALSE, xlab="Eagle rays (predators)", ylab="Bivalves (prey)", main = "", centr=TRUE) +lines(Ex2.res, "OLS", col=1, confid=FALSE) +lines(Ex2.res, "SMA", col=3, confid=FALSE) +lines(Ex2.res, "RMA", col=4, confid=FALSE) +legend("topleft", c("OLS", "MA", "SMA", "RMA"), col=1:4, lty=1) +@ +\caption{Scatter diagram of the Example 2 data (number of bivalves as + a function of the number of eagle rays) showing the major axis (MA), + standard major axis (SMA), ordinary 50 least-squares (OLS) and + ranged major axis (RMA) regression lines. SMA and RMA are the + appropriate regression lines in this example. The cross indicates + the centroid of the bivariate distribution. The four regression + lines pass through this centroid. } +\label{fig:ex2} +\end{SCfigure} + +[1] In this table of the output file, the rows correspond, +respectively, to the MA, OLS and RMA slopes and to the coefficient of +correlation $r$ (\texttt{Corr}). \texttt{Stat.} is the value of the +statistic being tested for significance. As explained in the +\emph{Output file section}, the statistic actually used by the program +for the test of the MA slope, in this example, is the inverse of the +$b_{MA}$ slope estimate ($1/3.46591 = 0.28852$) because the reference +value of the statistic in this permutation test must not exceed +1. One-tailed probabilities (\texttt{One-tailed p}) are computed in +the direction of the sign of the coefficient. For a one-tailed test in +the upper tail (i.e. for a coefficient with a positive sign), $p =$ (EQ ++ GT)/(Number of permutations + 1). For a test in the lower tail +(i.e. for a coefficient with a negative sign), $p =$ (LT + EQ)/(Number +of permutations + 1), where +\begin{itemize} +\item LT is the number of values under permutation that are smaller + than the reference value; +\item EQ is the number of values under permutation that are equal to + the reference value of the statistic, plus 1 for the reference value + itself; +\item GT is the number of values under permutation that are greater + than the reference value. +\end{itemize} + +\subsection{Cabezon spawning}% +\label{sec:exa3}% +\subsubsection{Input data} + +The following table presents data used by \citet[Box +14.12]{SokalRohlf95} to illustrate model II regression analysis. They +concern the mass ($x$) of unspawned females of a California fish, the +cabezon (\emph{Scorpaenichthys marmoratus}), and the number of eggs +they subsequently produced ($y$). One may be interested to estimate +the functional equation relating the number of eggs to the mass of +females before spawning. The physical units of the variables are as in +the table published by \citet[546]{SokalRohlf95}. + +Since the variables are in different physical units and are estimated +with error, and their distribution is approximately bivariate normal, +RMA and SMA are appropriate for this example; MA is inappropriate. The +OLS regression line is meaningless; in model II regression, OLS should +only be used for forecasting or prediction. It is plotted in +Fig. \ref{fig:ex3} only to allow comparison. + +The RMA and SMA regression lines are nearly indistinguishable in this +example. The slope of RMA can be tested for significance ($H0$: +$b_{RMA} = 0$), however, whereas the SMA slope cannot. The 95\% +confidence intervals of the intercepts of RMA and SMA, although +underestimated, include the value 0, as expected if a linear model +applies to the data: a female with a mass of 0 is expected to produce +no egg. + +Another interesting property of RMA and SMA is that their estimates of +slope and intercept change proportionally to changes in the units of +measurement. One can easily verify that by changing the decimal places +in the Example 2 data file and recomputing the regression +equations. RMA and SMA share this property with OLS. MA regression +does not have this property; this is why it should only be used with +variables that are in the same physical units, as those of Example 1. + +\subsubsection{Output file} + [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/vegan -r 2639 From noreply at r-forge.r-project.org Thu Oct 17 12:54:27 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 17 Oct 2013 12:54:27 +0200 (CEST) Subject: [Vegan-commits] r2640 - in pkg/lmodel2: . R Message-ID: <20131017105427.47A11184FA5@r-forge.r-project.org> Author: jarioksa Date: 2013-10-17 12:54:26 +0200 (Thu, 17 Oct 2013) New Revision: 2640 Modified: pkg/lmodel2/DESCRIPTION pkg/lmodel2/R/lmodel2.R Log: remove superfluous blanks from column headers (suggested by Ben Harrison) Modified: pkg/lmodel2/DESCRIPTION =================================================================== --- pkg/lmodel2/DESCRIPTION 2013-10-17 10:47:47 UTC (rev 2639) +++ pkg/lmodel2/DESCRIPTION 2013-10-17 10:54:26 UTC (rev 2640) @@ -1,8 +1,8 @@ Package: lmodel2 Type: Package Title: Model II Regression -Version: 1.7-1 -Date: 2013-01-08 +Version: 1.7-2 +Date: 2013-10-17 Author: Pierre Legendre Maintainer: Jari Oksanen Description: Computes model II simple linear regression using ordinary Modified: pkg/lmodel2/R/lmodel2.R =================================================================== --- pkg/lmodel2/R/lmodel2.R 2013-10-17 10:47:47 UTC (rev 2639) +++ pkg/lmodel2/R/lmodel2.R 2013-10-17 10:54:26 UTC (rev 2640) @@ -1,5 +1,5 @@ `lmodel2` <- -function(formula, data = NULL, range.y = NULL, range.x = NULL, + function(formula, data = NULL, range.y = NULL, range.x = NULL, nperm = 0) ### ### Bivariate model II regression. @@ -209,10 +209,10 @@ H <- NA reg.res <- data.frame(met,res1,res2,res3,res8) CI.res <- data.frame(met,res4,res5,res6,res7) - colnames(reg.res) <- c("Method","Intercept"," Slope", - " Angle (degrees)"," P-perm (1-tailed)") - colnames(CI.res) <- c("Method"," 2.5%-Intercept","97.5%-Intercept", - " 2.5%-Slope","97.5%-Slope") + colnames(reg.res) <- c("Method","Intercept","Slope", + "Angle (degrees)","P-perm (1-tailed)") + colnames(CI.res) <- c("Method","2.5%-Intercept","97.5%-Intercept", + "2.5%-Slope","97.5%-Slope") out <- list(y=y, x=x, regression.results=reg.res, confidence.intervals=CI.res, From noreply at r-forge.r-project.org Thu Oct 31 21:24:59 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 31 Oct 2013 21:24:59 +0100 (CET) Subject: [Vegan-commits] r2641 - in pkg/vegan: R inst man Message-ID: <20131031202459.EA97D18622A@r-forge.r-project.org> Author: jarioksa Date: 2013-10-31 21:24:59 +0100 (Thu, 31 Oct 2013) New Revision: 2641 Modified: pkg/vegan/R/renyiaccum.R pkg/vegan/R/specaccum.R pkg/vegan/R/tsallisaccum.R pkg/vegan/inst/ChangeLog pkg/vegan/man/renyi.Rd pkg/vegan/man/specaccum.Rd pkg/vegan/man/tsallis.Rd Log: spec/renyi/tsallis accum gained 'subset' Modified: pkg/vegan/R/renyiaccum.R =================================================================== --- pkg/vegan/R/renyiaccum.R 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/R/renyiaccum.R 2013-10-31 20:24:59 UTC (rev 2641) @@ -1,6 +1,9 @@ `renyiaccum` <- -function(x, scales=c(0, 0.5, 1, 2, 4, Inf), permutations = 100, raw = FALSE, ...) -{ +function(x, scales=c(0, 0.5, 1, 2, 4, Inf), permutations = 100, + raw = FALSE, subset, ...) +{ + if (!missing(subset)) + x <- subset(x, subset) x <- as.matrix(x) n <- nrow(x) p <- ncol(x) Modified: pkg/vegan/R/specaccum.R =================================================================== --- pkg/vegan/R/specaccum.R 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/R/specaccum.R 2013-10-31 20:24:59 UTC (rev 2641) @@ -1,11 +1,15 @@ `specaccum` <- function (comm, method = "exact", permutations = 100, conditioned=TRUE, - gamma="jack1", w = NULL, ...) + gamma="jack1", w = NULL, subset, ...) { METHODS <- c("collector", "random", "exact", "rarefaction", "coleman") method <- match.arg(method, METHODS) if (!is.null(w) && !(method %in% c("random", "collector"))) stop(gettextf("weights 'w' can be only used with methods 'random' and 'collector'")) + if (!missing(subset)) { + comm <- subset(comm, subset) + w <- subset(w, subset) + } x <- comm x <- as.matrix(x) x <- x[, colSums(x) > 0, drop=FALSE] Modified: pkg/vegan/R/tsallisaccum.R =================================================================== --- pkg/vegan/R/tsallisaccum.R 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/R/tsallisaccum.R 2013-10-31 20:24:59 UTC (rev 2641) @@ -1,6 +1,9 @@ tsallisaccum <- -function (x, scales = seq(0, 2, 0.2), permutations = 100, raw = FALSE, ...) +function (x, scales = seq(0, 2, 0.2), permutations = 100, raw = FALSE, + subset, ...) { + if (!missing(subset)) + x <- subset(x, subset) x <- as.matrix(x) n <- nrow(x) p <- ncol(x) Modified: pkg/vegan/inst/ChangeLog =================================================================== --- pkg/vegan/inst/ChangeLog 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/inst/ChangeLog 2013-10-31 20:24:59 UTC (rev 2641) @@ -24,6 +24,9 @@ that residual deviance certainly decreases in bigger model (testing theory requires nesting, but this is commonly violated by users: here nesting is necessary). + + * specaccum, renyiaccum, tsallisaccum: gained argument to select a + 'subset' of sites (looks like my student would need them). Version 2.1-36 (closed October 14, 2013) Modified: pkg/vegan/man/renyi.Rd =================================================================== --- pkg/vegan/man/renyi.Rd 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/man/renyi.Rd 2013-10-31 20:24:59 UTC (rev 2641) @@ -14,16 +14,17 @@ \code{renyiaccum} finds these statistics with accumulating sites. } \usage{ -renyi(x, scales = c(0, 0.25, 0.5, 1, 2, 4, 8, 16, 32, 64, Inf), hill = FALSE) +renyi(x, scales = c(0, 0.25, 0.5, 1, 2, 4, 8, 16, 32, 64, Inf), + hill = FALSE) \method{plot}{renyi}(x, ...) renyiaccum(x, scales = c(0, 0.5, 1, 2, 4, Inf), permutations = 100, - raw = FALSE, ...) -\method{plot}{renyiaccum} (x, what = c("mean", "Qnt 0.025", "Qnt 0.975"), type = "l", + raw = FALSE, subset, ...) +\method{plot}{renyiaccum}(x, what = c("mean", "Qnt 0.025", "Qnt 0.975"), type = "l", ...) -\method{persp}{renyiaccum} (x, theta = 220, col = heat.colors(100), zlim, ...) +\method{persp}{renyiaccum}(x, theta = 220, col = heat.colors(100), zlim, ...) rgl.renyiaccum(x, rgl.height = 0.2, ...) } -%- maybe also 'usage' for other objects documented here. + \arguments{ \item{x}{Community data matrix or plotting object. } \item{scales}{Scales of \enc{R?nyi}{Renyi} diversity.} @@ -33,6 +34,8 @@ \item{raw}{if \code{FALSE} then return summary statistics of permutations, and if \code{TRUE} then returns the individual permutations.} + \item{subset}{logical expression indicating sites (rows) to keep: missing + values are taken as \code{FALSE}.} \item{what}{Items to be plotted.} \item{type}{Type of plot, where \code{type = "l"} means lines.} \item{theta}{Angle defining the viewing direction (azimuthal) in Modified: pkg/vegan/man/specaccum.Rd =================================================================== --- pkg/vegan/man/specaccum.Rd 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/man/specaccum.Rd 2013-10-31 20:24:59 UTC (rev 2641) @@ -19,7 +19,7 @@ } \usage{ specaccum(comm, method = "exact", permutations = 100, - conditioned =TRUE, gamma = "jack1", w = NULL, ...) + conditioned =TRUE, gamma = "jack1", w = NULL, subset, ...) \method{plot}{specaccum}(x, add = FALSE, random = FALSE, ci = 2, ci.type = c("bar", "line", "polygon"), col = par("fg"), ci.col = col, ci.lty = 1, xlab, ylab = x$method, ylim, @@ -48,6 +48,8 @@ \item{gamma}{Method for estimating the total extrapolated number of species in the survey area by function \code{\link{specpool}}} \item{w}{Weights giving the sampling effort.} + \item{subset}{logical expression indicating sites (rows) to keep: missing + values are taken as \code{FALSE}.} \item{x}{A \code{specaccum} result object} \item{add}{Add to an existing graph.} \item{random}{\dots} Modified: pkg/vegan/man/tsallis.Rd =================================================================== --- pkg/vegan/man/tsallis.Rd 2013-10-17 10:54:26 UTC (rev 2640) +++ pkg/vegan/man/tsallis.Rd 2013-10-31 20:24:59 UTC (rev 2641) @@ -9,65 +9,112 @@ } \usage{ tsallis(x, scales = seq(0, 2, 0.2), norm = FALSE, hill = FALSE) -tsallisaccum(x, scales = seq(0, 2, 0.2), permutations = 100, raw = FALSE, ...) +tsallisaccum(x, scales = seq(0, 2, 0.2), permutations = 100, + raw = FALSE, subset, ...) \method{persp}{tsallisaccum}(x, theta = 220, phi = 15, col = heat.colors(100), zlim, ...) } -%- maybe also 'usage' for other objects documented here. + \arguments{ \item{x}{Community data matrix or plotting object. } \item{scales}{Scales of Tsallis diversity.} - \item{norm}{Logical, if \code{TRUE} diversity values are normalized by their maximum (diversity value at equiprobability conditions).} + + \item{norm}{Logical, if \code{TRUE} diversity values are normalized + by their maximum (diversity value at equiprobability conditions).} + \item{hill}{Calculate Hill numbers.} - \item{permutations}{Number of random permutations in accumulating sites.} - \item{raw}{If \code{FALSE} then return summary statistics of permutations, and if TRUE then returns the individual permutations.} - \item{theta, phi}{angles defining the viewing direction. \code{theta} gives the azimuthal direction and \code{phi} the colatitude.} - \item{col}{Colours used for surface.} - \item{zlim}{Limits of vertical axis.} - \item{\dots}{Other arguments which are passed to \code{tsallis} and to graphical functions.} + + \item{permutations}{Number of random permutations in accumulating + sites.} -} -\details{ -The Tsallis diversity (also equivalent to Patil and Taillie diversity) is a one-parametric generalised entropy function, defined as: + \item{raw}{If \code{FALSE} then return summary statistics of + permutations, and if TRUE then returns the individual + permutations.} + \item{subset}{logical expression indicating sites (rows) to keep: + missing values are taken as \code{FALSE}.} + + \item{theta, phi}{angles defining the viewing + direction. \code{theta} gives the azimuthal direction and + \code{phi} the colatitude.} + + \item{col}{Colours used for surface.} \item{zlim}{Limits of + vertical axis.} + + \item{\dots}{Other arguments which are passed to \code{tsallis} and + to graphical functions.} + +} + +\details{ The Tsallis diversity (also equivalent to Patil and Taillie +diversity) is a one-parametric generalised entropy function, defined +as: + \deqn{H_q = \frac{1}{q-1} (1-\sum_{i=1}^S p_i^q)}{H.q = 1/(q-1)(1-sum(p^q))} -where \eqn{q} is a scale parameter, \eqn{S} the number of species in the sample (Tsallis 1988, Tothmeresz 1995). This diversity is concave for all \eqn{q>0}, but non-additive (Keylock 2005). For \eqn{q=0} it gives the number of species minus one, as \eqn{q} tends to 1 this gives Shannon diversity, for \eqn{q=2} this gives the Simpson index (see function \code{\link{diversity}}). +where \eqn{q} is a scale parameter, \eqn{S} the number of species in +the sample (Tsallis 1988, Tothmeresz 1995). This diversity is concave +for all \eqn{q>0}, but non-additive (Keylock 2005). For \eqn{q=0} it +gives the number of species minus one, as \eqn{q} tends to 1 this +gives Shannon diversity, for \eqn{q=2} this gives the Simpson index +(see function \code{\link{diversity}}). -If \code{norm = TRUE}, \code{tsallis} gives values normalized by the maximum: +If \code{norm = TRUE}, \code{tsallis} gives values normalized by the +maximum: \deqn{H_q(max) = \frac{S^{1-q}-1}{1-q}}{H.q(max) = (S^(1-q)-1)/(1-q)} -where \eqn{S} is the number of species. As \eqn{q} tends to 1, maximum is defined as \eqn{ln(S)}. +where \eqn{S} is the number of species. As \eqn{q} tends to 1, maximum +is defined as \eqn{ln(S)}. -If \code{hill = TRUE}, \code{tsallis} gives Hill numbers (numbers equivalents, see Jost 2007): +If \code{hill = TRUE}, \code{tsallis} gives Hill numbers (numbers +equivalents, see Jost 2007): \deqn{D_q = (1-(q-1) H)^{1/(1-q)}}{D.q = (1-(q-1)*H)^(1/(1-q))} -Details on plotting methods and accumulating values can be found on the help pages of the functions \code{\link{renyi}} and \code{\link{renyiaccum}}. +Details on plotting methods and accumulating values can be found on +the help pages of the functions \code{\link{renyi}} and +\code{\link{renyiaccum}}. } -\value{ -Function \code{tsallis} returns a data frame of selected indices. Function \code{tsallisaccum} with argument \code{raw = FALSE} returns a three-dimensional array, where the first dimension are the accumulated sites, second dimension are the diversity scales, and third dimension are the summary statistics \code{mean}, \code{stdev}, \code{min}, \code{max}, \code{Qnt 0.025} and \code{Qnt 0.975}. With argument \code{raw = TRUE} the statistics on the third dimension are replaced with individual permutation results. -} + +\value{ +Function \code{tsallis} returns a data frame of selected +indices. Function \code{tsallisaccum} with argument \code{raw = FALSE} +returns a three-dimensional array, where the first dimension are the +accumulated sites, second dimension are the diversity scales, and +third dimension are the summary statistics \code{mean}, \code{stdev}, +\code{min}, \code{max}, \code{Qnt 0.025} and \code{Qnt 0.975}. With +argument \code{raw = TRUE} the statistics on the third dimension are +replaced with individual permutation results. } + \references{ -Tsallis, C. (1988) Possible generalization of Boltzmann-Gibbs statistics. - \emph{J. Stat. Phis.} 52, 479--487. +Tsallis, C. (1988) Possible generalization of Boltzmann-Gibbs + statistics. \emph{J. Stat. Phis.} 52, 479--487. + Tothmeresz, B. (1995) Comparison of different methods for diversity ordering. \emph{Journal of Vegetation Science} \bold{6}, 283--290. -Patil, G. P. and Taillie, C. (1982) Diversity as a concept and its measurement. - \emph{J. Am. Stat. Ass.} \bold{77}, 548--567. +Patil, G. P. and Taillie, C. (1982) Diversity as a concept and its + measurement. \emph{J. Am. Stat. Ass.} \bold{77}, 548--567. -Keylock, C. J. (2005) Simpson diversity and the Shannon-Wiener index as special cases of a generalized entropy. - \emph{Oikos} \bold{109}, 203--207. +Keylock, C. J. (2005) Simpson diversity and the Shannon-Wiener index + as special cases of a generalized entropy. \emph{Oikos} \bold{109}, + 203--207. -Jost, L (2007) Partitioning diversity into independent alpha and beta components. - \emph{Ecology} \bold{88}, 2427--2439. +Jost, L (2007) Partitioning diversity into independent alpha and beta + components. \emph{Ecology} \bold{88}, 2427--2439. } -\author{\enc{P?ter S?lymos}{Peter Solymos}, \email{solymos at ualberta.ca}, based on the code of Roeland Kindt and Jari Oksanen written for \code{renyi}} -\seealso{ -Plotting methods and accumulation routines are based on functions \code{\link{renyi}} and \code{\link{renyiaccum}}. An object of class 'tsallisaccum' can be used with function \code{\link{rgl.renyiaccum}} as well. See also settings for \code{\link{persp}}. -} + +\author{\enc{P?ter S?lymos}{Peter Solymos}, +\email{solymos at ualberta.ca}, based on the code of Roeland Kindt and +Jari Oksanen written for \code{renyi}} + +\seealso{ Plotting methods and accumulation routines are based on +functions \code{\link{renyi}} and \code{\link{renyiaccum}}. An object +of class 'tsallisaccum' can be used with function +\code{\link{rgl.renyiaccum}} as well. See also settings for +\code{\link{persp}}. } + \examples{ data(BCI) i <- sample(nrow(BCI), 12)