[Vegan-commits] r639 - in pkg/vegan: R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Dec 17 19:45:13 CET 2008
Author: jarioksa
Date: 2008-12-17 19:45:13 +0100 (Wed, 17 Dec 2008)
New Revision: 639
Added:
pkg/vegan/R/kendall.global.R
pkg/vegan/R/kendall.post.R
pkg/vegan/man/kendall.global.Rd
Modified:
pkg/vegan/inst/ChangeLog
Log:
Kendall coefficient of concordance to identify significant species association (written by Guillaume Blanchet and Pierre Legendre
Added: pkg/vegan/R/kendall.global.R
===================================================================
--- pkg/vegan/R/kendall.global.R (rev 0)
+++ pkg/vegan/R/kendall.global.R 2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,118 @@
+`kendall.global` <-
+ function(Y, group, nperm=999, mult="holm")
+{
+### Function to test the overall significance of the Kendall coefficient of
+### concordance for a single group or several groups of judges (e.g. species)
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+
+ mult <- match.arg(mult, c("sidak", "holm", "bonferroni"))
+
+ a <- system.time({
+
+ ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+ Y <- as.matrix(Y)
+ n <- nrow(Y)
+ p <- ncol(Y)
+ if(p < 2) stop("There is a single variable in the data matrix")
+
+ ##CC# Transform the species abundances to ranks, by column
+ R <- apply(Y,2,rank)
+
+ if(missing(group)) group <- rep(1,p)
+ if(length(group) != p){
+ stop("The number of species in the vector differs from the total number of species")
+ }
+ group <- as.factor(group)
+ gr.lev <- levels(group)
+ ngr <- nlevels(group)
+
+ gr <- as.list(1:ngr)
+
+ n.per.gr <- vector(length=ngr)
+ for(i in 1:ngr){
+ gr[[i]] <- which(group==gr.lev[i])
+ n.per.gr[i] <- length(gr[[i]])
+ ## Vector containing the number of species per group
+ }
+
+ ## Initialise the vectors of results
+ W.gr <- vector("numeric",ngr)
+ Chi2.gr <- vector("numeric",ngr)
+ P.Chi2.gr <- vector("numeric",ngr)
+ vec <- NA
+
+ for(i in 1:ngr){
+ p.i <- n.per.gr[i]
+ if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+ ##CC# Correction factors for tied ranks (eq. 3.3)
+ t.ranks <- apply(R[,gr[[i]]], 2, function(x) summary(as.factor(x)))
+ TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
+
+ ##CC# Calculate the Sum of squares of the uncentred ranks (S) (eq. 3.1)
+ S <- sum(apply(R[,gr[[i]]], 1, sum)^2)
+
+ ##CC# Calculate Kendall's W (eq. 3.2)
+ W.gr[i] <- ((12*S)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*TT))
+
+ ##CC# Calculate Friedman's Chi-square (eq. 3.4)
+ Chi2.gr[i] <- p.i*(n-1)*W.gr[i]
+
+ counter <- 1
+ for(j in 1:nperm) { # Each species is permuted independently
+ R.perm <- apply(R[,gr[[i]]], 2, sample)
+ S.perm <- sum(apply(R.perm, 1, sum)^2)
+ W.perm <- ((12*S.perm)-(3*(p^2)*n*(n+1)^2))/(((p^2)*((n^3)-n))-(p*TT))
+ Chi2.perm <- p*(n-1)*W.perm
+ if(Chi2.perm >= Chi2.gr[i]) counter <- counter+1
+ }
+ P.Chi2.gr[i] <- counter/(nperm+1)
+ vec <- c(vec, P.Chi2.gr[i])
+ }
+ ## Correction to P-values for multiple testing
+ ## Write all P-values to vector 'vec'
+ if(ngr > 1) {
+ vec <- vec[-1]
+ if(length(vec) != ngr) stop("Error in putting together vector 'vec'")
+
+ if(mult == "sidak") {
+ vec.corr = NA
+ for(i in 1:ngr) vec.corr = c(vec.corr, (1-(1-vec[i])^ngr))
+ vec.corr <- vec.corr[-1]
+ }
+ if(mult == "holm") vec.corr <- p.adjust(vec, method="holm")
+ if(mult == "bonferroni") vec.corr <- p.adjust(vec, method="bonferroni")
+
+ vec.gr <- vector("list",ngr)
+ end <- 0
+ for(i in 1:ngr){
+ beg <- end+1
+ end <- end + n.per.gr[i]
+ vec.gr[[i]] <- vec.corr[beg:end]
+ }
+ }
+ ## Create a data frame containing the results
+ if(ngr == 1) {
+ table <- rbind(W.gr, Chi2.gr, P.Chi2.gr)
+ colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+ rownames(table) <- c("W", "Chi2", "Prob")
+ } else {
+ table <- rbind(W.gr, Chi2.gr, P.Chi2.gr, vec.corr)
+ colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+ rownames(table) <- c("W", "Chi2", "Prob", "Corrected prob")
+ }
+ #
+ })
+ a[3] <- sprintf("%2f",a[3])
+ cat("Time to compute global test(s) =",a[3]," sec",'\n')
+ #
+ if(ngr == 1) {
+ out <- list(Concordance_analysis=table)
+ } else {
+ out <- list(Concordance_analysis=table, Correction.type=mult)
+ }
+ #
+ class(out) <- "kendall.global"
+ out
+}
Added: pkg/vegan/R/kendall.post.R
===================================================================
--- pkg/vegan/R/kendall.post.R (rev 0)
+++ pkg/vegan/R/kendall.post.R 2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,142 @@
+`kendall.post` <-
+ function(Y, group, nperm=999, mult="holm")
+{
+### Function to carry out a posteriori tests on individual judges (e.g. species)
+### for a single group or several groups of judges.
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+
+ mult <- match.arg(mult, c("sidak", "holm", "bonferroni"))
+
+ a <- system.time({
+
+ ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+ Y <- as.matrix(Y)
+ n <- nrow(Y)
+ p <- ncol(Y)
+ if(p < 2) stop("There is a single variable in the data matrix")
+
+ ##CC# Transform the species abundances to ranks, by column
+ R <- apply(Y,2,rank)
+
+ if(missing(group)) group <- rep(1,p)
+ if(length(group) != p){
+ stop("The number of species in the vector differs from the total number of species")
+ }
+
+ ##CC# Separate tests for the variables in each group
+ group <- as.factor(group)
+ gr.lev <- levels(group)
+ ngr <- nlevels(group)
+
+ gr <- as.list(1:ngr)
+
+ n.per.gr <- vector(length=ngr)
+ for(i in 1:ngr){
+ gr[[i]] <- which(group==gr.lev[i])
+ n.per.gr[i] <- length(gr[[i]])
+ ## Vector with the number of variables per group
+ }
+
+ ##===============================
+ ##CC# start permutation procedure
+ ##===============================
+ ##CC# Initialize the counters
+ counter <- as.list(1:ngr)
+ for(i in 1:ngr){
+ counter[[i]] <- vector(l=n.per.gr[i])
+ }
+ W.gr <- vector("list",ngr)
+ if(ngr > 1) spear.gr <- vector("list",ngr)
+
+ for(i in 1:ngr){
+ p.i <- n.per.gr[i]
+ if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+ ##CC# Extract variables part of group i
+ R.gr <- R[,gr[[i]]]
+ ## Table with species of group 'i' only as columns CC#
+ ##Calculate the mean of the Spearman correlations for each
+ ##species in a group
+ spear.mat <- cor(R.gr)
+ diag(spear.mat) <- NA
+ spear.mean <- apply(spear.mat, 1, mean, na.rm=TRUE)
+ ##CC# Calculate Kendall's W for each variable
+ W.var <- ((p.i-1)*spear.mean+1)/p.i
+
+ #for(j in 1:n.per.gr[i]){
+ for(j in 1:p.i){ # Test each species in turn
+ #CC# Create a new R where the
+ #permuted variable has been
+ #removed
+ R.gr.mod <- R.gr[,-j]
+ #CC# Set counter
+ counter[[i]][j] <- 1
+ #CC# The actual permutation
+ #procedure
+ for(k in 1:nperm){
+ var.perm <- sample(R.gr[,j])
+ spear.mat.perm <- cor(cbind(R.gr.mod, var.perm))
+ diag(spear.mat.perm) <- NA
+ spear.mean.j.perm <- mean(spear.mat.perm[p.i,], na.rm=TRUE)
+ W.perm <- ((p.i-1)*spear.mean.j.perm+1)/p.i
+ if(W.perm >= W.var[j]) counter[[i]][j] <- counter[[i]][j]+1
+ }
+ }
+ W.gr[[i]] <- W.var
+ if(ngr > 1) spear.gr[[i]] <- spear.mean
+ }
+ ##CC# Calculate P-values
+ for(i in 1:ngr) {
+ counter[[i]] <- counter[[i]]/(nperm+1)
+ }
+ ## Correction to P-values for multiple testing
+ ## Write all P-values to a long vector 'vec'
+ vec <- counter[[1]]
+ if(ngr > 1) {
+ for(i in 2:ngr) vec = c(vec, counter[[i]])
+ }
+ if(length(vec) != p) stop("Error in putting together vector 'vec'")
+
+ if(mult == "sidak") {
+ vec.corr = NA
+ for(i in 1:p) vec.corr = c(vec.corr, (1-(1-vec[i])^p))
+ vec.corr <- vec.corr[-1]
+ }
+ if(mult == "holm") vec.corr <- p.adjust(vec, method="holm")
+ if(mult == "bonferroni") vec.corr <- p.adjust(vec, method="bonferroni")
+
+ if(ngr > 1) {
+ vec.gr <- vector("list",ngr)
+ end <- 0
+ for(i in 1:ngr){
+ beg <- end+1
+ end <- end + n.per.gr[i]
+ vec.gr[[i]] <- vec.corr[beg:end]
+ }
+ }
+ ## Create data frames containing the results
+ if(ngr == 1) {
+ table <- rbind(spear.mean, W.var, counter[[1]], vec.corr)
+ rownames(table) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+ } else {
+ table <- as.list(1:ngr)
+ for(i in 1:ngr) {
+ table[[i]] <- rbind(spear.gr[[i]], W.gr[[i]], counter[[i]], vec.gr[[i]])
+ rownames(table[[i]]) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+ }
+ }
+
+ })
+ a[3] <- sprintf("%2f",a[3])
+ cat("Time to compute a posteriori tests (per species) =",a[3]," sec",'\n')
+
+ if(ngr == 1) {
+ out <- list(A_posteriori_tests=table, Correction.type=mult)
+ } else {
+ out <- list(A_posteriori_tests_Group=table, Correction.type=mult)
+ }
+
+ class(out) <- "kendall.post"
+ out
+}
Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog 2008-12-17 08:47:04 UTC (rev 638)
+++ pkg/vegan/inst/ChangeLog 2008-12-17 18:45:13 UTC (rev 639)
@@ -4,12 +4,18 @@
Version 1.16-8 (opened Dec 17, 2008)
- * nestedness: added nestendess index based on overlap and
- decreasing fill (Almeida-Neto et al., Oikos 117, 1227-1239;
- 2008). Not all properties of the Oikos paper are not yet
- implemented, and the UI needs work (print, plot methods). Coding
- by Gustavo Carvalho as a part of R-Forge Feature Request #265.
- Documented with nestedtemp.
+ * kendall.global & kendall.post: new functions to analyse the
+ Kendall's coefficient of concordance --- use in ecology: to
+ identify significant species associations. See P. Legendre,
+ J. Agric. Biol. Envir. Statistics 10, 226-245; 2005. Written by
+ Guillaume Blanchet and Pierre Legendre.
+
+ * nestedness: added nestedness index based on overlap and
+ decreasing fill (Almeida-Neto et al., Oikos 117, 1227-1239; 2008).
+ Not yet all properties of the Oikos paper are implemented, and the
+ UI needs work (print, plot methods). Coding by Gustavo Carvalho
+ as a part of R-Forge Feature Request #265. Documented with
+ nestedtemp.
Version 1.16-7 (closed Dec 17, 2008)
@@ -22,16 +28,15 @@
Version 1.16-6 (closed Dec 7, 2008)
* adipart: got a formula interface, and aggregate() was replaced
- by matrix multiplication. Now it 10 times faster. The formula
+ by matrix multiplication. Now it is 10 times faster. The formula
interface has some consequences on the specification of the
sampling design.
-
* permatfull: Jari Oksanen made the C port for the quantitative
- quasiswap algorithm. So the permat* null model family is now can
- be used for a wide array of null model analyses and is quick
- enough to make reliable testing. The permatswap function and help
- page were modified accordingly.
+ quasiswap algorithm. So the permat* null model family now can be
+ used for a wide array of null model analyses and is quick enough
+ to make reliable testing. The permatswap function and help page
+ were modified accordingly.
* plot.rad: gained argument log = "y", allowing log = "xy" so that
Zipf model is a straight line, or log = "" with arithmetic
Added: pkg/vegan/man/kendall.global.Rd
===================================================================
--- pkg/vegan/man/kendall.global.Rd (rev 0)
+++ pkg/vegan/man/kendall.global.Rd 2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,160 @@
+\encoding{UTF-8}
+\name{kendall.global}
+\alias{kendall.global}
+\alias{kendall.post}
+
+\title{ Kendall coefficient of concordance }
+
+\description{
+ Function \code{kendall.global} computes and tests the coefficient of
+ concordance among several judges (variables, species) through a
+ permutation test.
+
+ Function \code{kendall.post} carries out \emph{a posteriori} tests
+ of the contributions of individual judges (variables, species) to
+ the overall concordance of their group through permutation tests.
+
+ If several groups of judges are identified in the data table,
+ coefficients of concordance (\code{kendall.global}) or a posteriori
+ tests (\code{kendall.post}) will be computed for each group
+ separately. Use in ecology: to identify significant species
+ associations.
+}
+\usage{
+kendall.global(Y, group, nperm = 999, mult = "holm")
+kendall.post(Y, group, nperm = 999, mult = "holm")
+}
+\arguments{
+ \item{Y}{ Data file (data frame or matrix) containing quantitative
+ data. Rows are objects and columns are judges (variables). In
+ community ecology, that table is often a site-by-species table. }
+ \item{group}{ A vector defining how judges should be divided into
+ groups. See example below. If groups are not explicitly defined,
+ all judges in the data file will be considered as forming a single
+ group. }
+ \item{nperm}{ Number of permutations to be performed. Default is
+ 999. }
+ \item{mult}{ Correct P-values for multiple testing. The methods are
+ "holm", "sidak", and "bonferroni". The Bonferroni correction is
+ overly conservative; it is not recommended. It is included to
+ allow comparisons with the other methods. }
+}
+\details{
+ \code{Y} must contain quantitative data. They will be transformed to
+ ranks within each column before computation of the coefficient of
+ concordance.
+
+ The search for species associations described in Legendre (2005)
+ proceeds in 3 steps:
+
+ (1) Correlation analysis of the species. A possible method is to
+ compute Ward's agglomerative clustering of a matrix of correlations
+ among the species. In detail: (1.1) compute a Pearson or Spearman
+ correlation matrix (\code{correl.matrix}) among the species; (1.2)
+ turn it into a distance matrix: \code{mat.D =
+ as.dist(1-correl.matrix)}; (1.3) carry out Ward's hierarchical
+ clustering of that matrix using \code{hclust}: \code{clust.ward =
+ hclust(mat.D, "ward")}; (1.4) plot the dendrogram:
+ \code{plot(clust.ward, hang=-1)}; (1.5) cut the dendrogram in two
+ groups, retrieve the vector of species membership: \code{group.2 =
+ cutree(clust.ward, k=2)}. (1.6) After steps 2 and 3 below, you may
+ have to come back and try divisions of the species into k = 3, 4, 5,
+ \dots groups.
+
+ (2) Compute global tests of significance of the 2 (or more) groups
+ using the function \code{kendall.global} and the vector defining the
+ groups. Groups that are not globally significant must be refined or
+ abandoned.
+
+ (3) Compute a posteriori tests of the contribution of individual
+ species to the concordance of their group using the function
+ \code{kendall.post} and the vector defining the groups. If some
+ species have negative values for "Spearman.mean", this means that
+ these species clearly do not belong to the group, hence that group
+ is too inclusive. Go back to (1.5) and cut the dendrogram more
+ finely. The left and right groups can be cut separately,
+ independently of the levels along the dendrogram; write your own
+ vector of group membership if \code{cutree} does not produce the
+ desired groups.
+
+ The corrections used for multiple testing are applied to the list of
+ P-values (P); they take into account the number of tests (k) carried
+ out simulatenously (number of groups in \code{kendall.global}, or
+ number of species in \code{kendall.post}).
+
+ The Holm correction is computed after ordering the P-values in a
+ list with the smallest value to the left. Compute adjusted P-values
+ as:
+ \eqn{P_{corr} = (k-i+1) P}{P_corr = (k-i+1)*P}
+
+ where i is the position in the ordered list. Final step: from left
+ to right, if an adjusted \eqn{P_corr} in the ordered list is smaller
+ than the one occurring at its left, make the smallest one equal to
+ the largest one.
+
+ The \enc{Šidák}{Sidak} correction is:
+
+ \eqn{P_{corr} = 1 - (1 - P)^k}
+
+ The Bonferonni correction is:
+
+ \eqn{P_{corr} = k P}{P_corr = k*P}
+}
+\value{
+
+ A table containing the following information in rows. The columns
+ correspond to the groups of "judges" defined in vector "group". When
+ function \code{Kendall.post} is used, there are as many tables as
+ the number of predefined groups.
+
+ \item{W }{Kendall's coefficient of concordance, W. }
+
+ \item{Chi2 }{Friedman's chi-square statistic used to test W. }
+
+ \item{Prob }{Permutational probability, uncorrected. }
+
+ \item{Corrected prob }{Probabilities corrected using the method
+ selected in parameter "mult". Shown only if there are more than one
+ group. }
+
+ \item{Spearman.mean }{Mean of the Spearman correlations between the
+ judge under test and all the other judges in the same group. }
+
+ \item{W.per.species }{Contribution of the judge under test to the
+ overall concordance statistic for that group. }
+
+}
+\references{
+
+ Legendre, P. 2005. Species associations: the Kendall coefficient of
+ concordance revisited. Journal of Agricultural, Biological, and
+ Environmental Statistics 10: 226-245.
+
+ Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for the beh avioral sciences. 2nd edition. McGraw-Hill, New York.
+}
+
+\seealso{\code{\link{cor}}, \code{\link{hclust}},
+\code{\link{cutree}}, \code{\link{kmeans}},
+\code{\link[vegan]{cascadeKM}}, \code{\link[labdsv]{duleg}}}
+
+\author{ F. Guillaume Blanchet, University of Alberta, and Pierre
+ Legendre, Université de Montréal }
+
+\examples{
+data(mite)
+mite.hel <- decostand(mite, "hel")
+
+## Reproduce the results shown in Table 2 of Legendre (2005), a single group
+mite.small <- mite.hel[c(4,9,14,22,31,34,45,53,61,69),c(13:15,23)]
+out1 <- kendall.global(mite.small)
+out2 <- kendall.post(mite.small, mult="holm")
+
+## Reproduce the results shown in Tables 3 and 4 of Legendre (2005), 2 groups
+group <-c(1,1,2,1,1,1,1,1,2,1,1,1,1,1,1,2,1,2,1,1,1,1,2,1,2,1,1,1,1,1,2,2,2,2,2)
+out3 <- kendall.global(mite.hel, group=group)
+out4 <- kendall.post(mite.hel, group=group, mult="holm", nperm=99)
+
+}
+
+\keyword{ multivariate }
+\keyword{ nonparametric }
More information about the Vegan-commits
mailing list