[Vegan-commits] r639 - in pkg/vegan: R inst man

Wed Dec 17 19:45:13 CET 2008

Author: jarioksa
Date: 2008-12-17 19:45:13 +0100 (Wed, 17 Dec 2008)
New Revision: 639

Added:
   pkg/vegan/R/kendall.global.R
   pkg/vegan/R/kendall.post.R
   pkg/vegan/man/kendall.global.Rd
Modified:
   pkg/vegan/inst/ChangeLog
Log:
Kendall coefficient of concordance to identify significant species association (written by Guillaume Blanchet and Pierre Legendre

Added: pkg/vegan/R/kendall.global.R
===================================================================

--- pkg/vegan/R/kendall.global.R	                        (rev 0)
+++ pkg/vegan/R/kendall.global.R	2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,118 @@
+`kendall.global` <-
+	function(Y, group, nperm=999, mult="holm")
+{
+### Function to test the overall significance of the Kendall coefficient of  
+### concordance for a single group or several groups of judges (e.g. species)
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+	
+    mult <- match.arg(mult, c("sidak", "holm", "bonferroni"))
+	
+    a <- system.time({
+
+        ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+	Y <- as.matrix(Y)
+	n <- nrow(Y)
+	p <- ncol(Y)
+	if(p < 2) stop("There is a single variable in the data matrix")
+
+        ##CC# Transform the species abundances to ranks, by column
+	R <- apply(Y,2,rank)
+
+	if(missing(group)) group <- rep(1,p)
+	if(length(group) != p){
+            stop("The number of species in the vector differs from the total number of species")
+	}
+	group <- as.factor(group)
+	gr.lev <- levels(group)
+	ngr <- nlevels(group)
+		
+	gr <- as.list(1:ngr)
+		
+	n.per.gr <- vector(length=ngr)
+	for(i in 1:ngr){
+            gr[[i]] <- which(group==gr.lev[i])
+            n.per.gr[i] <- length(gr[[i]])
+            ## Vector containing the number of species per group
+	}
+
+        ## Initialise the vectors of results
+	W.gr <- vector("numeric",ngr)
+	Chi2.gr <- vector("numeric",ngr)
+	P.Chi2.gr <- vector("numeric",ngr)
+	vec <- NA
+
+	for(i in 1:ngr){
+            p.i <- n.per.gr[i]
+            if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+            ##CC# Correction factors for tied ranks (eq. 3.3)
+            t.ranks <- apply(R[,gr[[i]]], 2, function(x) summary(as.factor(x)))
+            TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
+	
+            ##CC# Calculate the Sum of squares of the uncentred ranks (S) (eq. 3.1)
+            S <- sum(apply(R[,gr[[i]]], 1, sum)^2)
+	
+            ##CC# Calculate Kendall's W (eq. 3.2)
+            W.gr[i] <- ((12*S)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*TT))
+	
+            ##CC# Calculate Friedman's Chi-square (eq. 3.4)
+            Chi2.gr[i] <- p.i*(n-1)*W.gr[i]
+
+            counter <- 1
+            for(j in 1:nperm) { # Each species is permuted independently
+                R.perm <- apply(R[,gr[[i]]], 2, sample)
+                S.perm <- sum(apply(R.perm, 1, sum)^2)
+                W.perm <- ((12*S.perm)-(3*(p^2)*n*(n+1)^2))/(((p^2)*((n^3)-n))-(p*TT))
+                Chi2.perm <- p*(n-1)*W.perm
+                if(Chi2.perm >= Chi2.gr[i]) counter <- counter+1
+            }
+            P.Chi2.gr[i] <- counter/(nperm+1)
+            vec <- c(vec, P.Chi2.gr[i])
+	}
+        ## Correction to P-values for multiple testing
+        ## Write all P-values to vector 'vec'
+        if(ngr > 1) {
+            vec <- vec[-1]
+            if(length(vec) != ngr) stop("Error in putting together vector 'vec'")
+
+            if(mult == "sidak") {
+                vec.corr = NA
+                for(i in 1:ngr) vec.corr = c(vec.corr, (1-(1-vec[i])^ngr))
+                vec.corr <- vec.corr[-1]
+            }
+            if(mult == "holm") vec.corr <- p.adjust(vec, method="holm")
+            if(mult == "bonferroni") vec.corr <- p.adjust(vec, method="bonferroni")
+
+            vec.gr <- vector("list",ngr)
+            end <- 0
+            for(i in 1:ngr){
+                beg <- end+1
+                end <- end + n.per.gr[i]
+                vec.gr[[i]] <- vec.corr[beg:end]
+            }
+        }
+        ## Create a data frame containing the results
+	if(ngr == 1) {
+            table <- rbind(W.gr, Chi2.gr, P.Chi2.gr)
+            colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+            rownames(table) <- c("W", "Chi2", "Prob")
+        }  else {
+            table <- rbind(W.gr, Chi2.gr, P.Chi2.gr, vec.corr)
+            colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+            rownames(table) <- c("W", "Chi2", "Prob", "Corrected prob")		
+        }
+                                        #
+    })
+    a[3] <- sprintf("%2f",a[3])
+    cat("Time to compute global test(s) =",a[3]," sec",'\n')
+                                        #
+    if(ngr == 1) {
+        out <- list(Concordance_analysis=table)
+    }  else {
+        out <- list(Concordance_analysis=table, Correction.type=mult)
+    }
+                                        #
+    class(out) <- "kendall.global"
+    out
+}

Added: pkg/vegan/R/kendall.post.R
===================================================================
--- pkg/vegan/R/kendall.post.R	                        (rev 0)
+++ pkg/vegan/R/kendall.post.R	2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,142 @@
+`kendall.post` <-
+	function(Y, group, nperm=999, mult="holm")
+{
+### Function to carry out a posteriori tests on individual judges (e.g. species)
+### for a single group or several groups of judges.
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+	
+    mult <- match.arg(mult, c("sidak", "holm", "bonferroni"))
+	
+    a <- system.time({
+	
+        ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+        Y <- as.matrix(Y)
+        n <- nrow(Y)
+        p <- ncol(Y)
+        if(p < 2) stop("There is a single variable in the data matrix")
+
+        ##CC# Transform the species abundances to ranks, by column
+        R <- apply(Y,2,rank)
+
+        if(missing(group)) group <- rep(1,p)
+        if(length(group) != p){
+            stop("The number of species in the vector differs from the total number of species")
+        }
+		
+        ##CC# Separate tests for the variables in each group
+        group <- as.factor(group)
+        gr.lev <- levels(group)
+        ngr <- nlevels(group)
+		
+        gr <- as.list(1:ngr)
+		
+        n.per.gr <- vector(length=ngr)
+        for(i in 1:ngr){
+            gr[[i]] <- which(group==gr.lev[i])
+            n.per.gr[i] <- length(gr[[i]])
+            ## Vector with the number of variables per group
+        }
+		
+        ##===============================
+        ##CC# start permutation procedure
+        ##===============================
+        ##CC# Initialize the counters
+        counter <- as.list(1:ngr)
+        for(i in 1:ngr){
+            counter[[i]] <- vector(l=n.per.gr[i])
+        }
+        W.gr <- vector("list",ngr)
+        if(ngr > 1) spear.gr <- vector("list",ngr)
+		
+        for(i in 1:ngr){
+            p.i <- n.per.gr[i]
+            if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+            ##CC# Extract variables part of group i
+            R.gr <- R[,gr[[i]]]
+            ## Table with species of group 'i' only as columns CC#
+            ##Calculate the mean of the Spearman correlations for each
+            ##species in a group
+            spear.mat <- cor(R.gr)
+            diag(spear.mat) <- NA
+            spear.mean <- apply(spear.mat, 1, mean, na.rm=TRUE)
+            ##CC# Calculate Kendall's W for each variable
+            W.var <- ((p.i-1)*spear.mean+1)/p.i
+
+                                        #for(j in 1:n.per.gr[i]){
+            for(j in 1:p.i){            # Test each species in turn
+                                        #CC# Create a new R where the
+                                        #permuted variable has been
+                                        #removed
+                R.gr.mod <- R.gr[,-j]
+                                        #CC# Set counter
+                counter[[i]][j] <- 1
+                                        #CC# The actual permutation
+                                        #procedure
+                for(k in 1:nperm){
+                    var.perm <- sample(R.gr[,j])
+                    spear.mat.perm <- cor(cbind(R.gr.mod, var.perm))
+                    diag(spear.mat.perm) <- NA
+                    spear.mean.j.perm <- mean(spear.mat.perm[p.i,], na.rm=TRUE)
+                    W.perm <- ((p.i-1)*spear.mean.j.perm+1)/p.i
+                    if(W.perm >= W.var[j]) counter[[i]][j] <- counter[[i]][j]+1
+                }
+            }
+            W.gr[[i]] <- W.var
+            if(ngr > 1) spear.gr[[i]] <- spear.mean
+        }
+        ##CC# Calculate P-values
+        for(i in 1:ngr) {
+            counter[[i]] <- counter[[i]]/(nperm+1)
+        }
+        ## Correction to P-values for multiple testing
+        ## Write all P-values to a long vector 'vec'
+        vec <- counter[[1]]
+        if(ngr > 1) {
+            for(i in 2:ngr) vec = c(vec, counter[[i]])
+        }
+        if(length(vec) != p) stop("Error in putting together vector 'vec'")
+
+        if(mult == "sidak") {
+            vec.corr = NA
+            for(i in 1:p) vec.corr = c(vec.corr, (1-(1-vec[i])^p))
+            vec.corr <- vec.corr[-1]
+        }
+        if(mult == "holm") vec.corr <- p.adjust(vec, method="holm")
+        if(mult == "bonferroni") vec.corr <- p.adjust(vec, method="bonferroni")
+
+        if(ngr > 1) {
+            vec.gr <- vector("list",ngr)
+            end <- 0
+            for(i in 1:ngr){
+                beg <- end+1
+                end <- end + n.per.gr[i]
+                vec.gr[[i]] <- vec.corr[beg:end]
+            }
+        }
+        ## Create data frames containing the results
+	if(ngr == 1) {
+            table <- rbind(spear.mean, W.var, counter[[1]], vec.corr)
+            rownames(table) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+        } else {
+            table <- as.list(1:ngr)
+            for(i in 1:ngr) {
+                table[[i]] <- rbind(spear.gr[[i]], W.gr[[i]], counter[[i]], vec.gr[[i]])
+                rownames(table[[i]]) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+            }
+        }
+
+    })
+    a[3] <- sprintf("%2f",a[3])
+    cat("Time to compute a posteriori tests (per species) =",a[3]," sec",'\n')
+
+    if(ngr == 1) {
+        out <- list(A_posteriori_tests=table, Correction.type=mult)
+    } else {
+        out <- list(A_posteriori_tests_Group=table, Correction.type=mult)
+    }
+
+    class(out) <- "kendall.post"
+    out
+}

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2008-12-17 08:47:04 UTC (rev 638)
+++ pkg/vegan/inst/ChangeLog	2008-12-17 18:45:13 UTC (rev 639)
@@ -4,12 +4,18 @@
 
 Version 1.16-8 (opened Dec 17, 2008)
 
-	* nestedness: added nestendess index based on overlap and
-	decreasing fill (Almeida-Neto et al., Oikos 117, 1227-1239;
-	2008).  Not all properties of the Oikos paper are not yet
-	implemented, and the UI needs work (print, plot methods).  Coding
-	by Gustavo Carvalho as a part of R-Forge Feature Request #265.
-	Documented with nestedtemp.
+	* kendall.global & kendall.post: new functions to analyse the
+	Kendall's coefficient of concordance  --- use in ecology: to
+	identify significant species associations. See P. Legendre,
+	J. Agric. Biol. Envir. Statistics 10, 226-245; 2005. Written by
+	Guillaume Blanchet and Pierre Legendre.
+
+	* nestedness: added nestedness index based on overlap and
+	decreasing fill (Almeida-Neto et al., Oikos 117, 1227-1239; 2008).
+	Not yet all properties of the Oikos paper are implemented, and the
+	UI needs work (print, plot methods).  Coding by Gustavo Carvalho
+	as a part of R-Forge Feature Request #265.  Documented with
+	nestedtemp.
 	
 Version 1.16-7 (closed Dec 17, 2008)
 
@@ -22,16 +28,15 @@
 Version 1.16-6 (closed Dec 7, 2008)
 
 	* adipart: got a formula interface, and aggregate() was replaced
-	by matrix multiplication. Now it 10 times faster.  The formula
+	by matrix multiplication. Now it is 10 times faster.  The formula
 	interface has some consequences on the specification of the
 	sampling design.
 
-
 	* permatfull: Jari Oksanen made the C port for the quantitative
-	quasiswap algorithm. So the permat* null model family is now can
-	be used for a wide array of null model analyses and is quick
-	enough to make reliable testing. The permatswap function and help
-	page were modified accordingly.
+	quasiswap algorithm. So the permat* null model family now can be
+	used for a wide array of null model analyses and is quick enough
+	to make reliable testing. The permatswap function and help page
+	were modified accordingly.
 
 	* plot.rad: gained argument log = "y", allowing log = "xy" so that
 	Zipf model is a straight line, or log = "" with arithmetic

Added: pkg/vegan/man/kendall.global.Rd
===================================================================
--- pkg/vegan/man/kendall.global.Rd	                        (rev 0)
+++ pkg/vegan/man/kendall.global.Rd	2008-12-17 18:45:13 UTC (rev 639)
@@ -0,0 +1,160 @@
+\encoding{UTF-8}
+\name{kendall.global}
+\alias{kendall.global}
+\alias{kendall.post}
+
+\title{ Kendall coefficient of concordance }
+
+\description{ 
+   Function \code{kendall.global} computes and tests the coefficient of
+  concordance among several judges (variables, species) through a
+  permutation test.
+
+  Function \code{kendall.post} carries out \emph{a posteriori} tests
+  of the contributions of individual judges (variables, species) to
+  the overall concordance of their group through permutation tests.
+
+  If several groups of judges are identified in the data table,
+  coefficients of concordance (\code{kendall.global}) or a posteriori
+  tests (\code{kendall.post}) will be computed for each group
+  separately. Use in ecology: to identify significant species
+  associations.
+}
+\usage{
+kendall.global(Y, group, nperm = 999, mult = "holm")
+kendall.post(Y, group, nperm = 999, mult = "holm")
+}
+\arguments{
+  \item{Y}{ Data file (data frame or matrix) containing quantitative
+    data. Rows are objects and columns are judges (variables). In
+    community ecology, that table is often a site-by-species table. }
+  \item{group}{ A vector defining how judges should be divided into
+    groups. See example below. If groups are not explicitly defined,
+    all judges in the data file will be considered as forming a single
+    group. }
+  \item{nperm}{ Number of permutations to be performed. Default is
+    999. }
+  \item{mult}{ Correct P-values for multiple testing. The methods are
+    "holm", "sidak", and "bonferroni". The Bonferroni correction is
+    overly conservative; it is not recommended. It is included to
+    allow comparisons with the other methods. }
+}
+\details{
+  \code{Y} must contain quantitative data. They will be transformed to
+  ranks within each column before computation of the coefficient of
+  concordance.
+
+  The search for species associations described in Legendre (2005)
+  proceeds in 3 steps:
+
+  (1) Correlation analysis of the species. A possible method is to
+  compute Ward's agglomerative clustering of a matrix of correlations
+  among the species. In detail: (1.1) compute a Pearson or Spearman
+  correlation matrix (\code{correl.matrix}) among the species; (1.2)
+  turn it into a distance matrix: \code{mat.D =
+  as.dist(1-correl.matrix)}; (1.3) carry out Ward's hierarchical
+  clustering of that matrix using \code{hclust}: \code{clust.ward =
+  hclust(mat.D, "ward")}; (1.4) plot the dendrogram:
+  \code{plot(clust.ward, hang=-1)}; (1.5) cut the dendrogram in two
+  groups, retrieve the vector of species membership: \code{group.2 =
+  cutree(clust.ward, k=2)}. (1.6) After steps 2 and 3 below, you may
+  have to come back and try divisions of the species into k = 3, 4, 5,
+  \dots groups.
+
+  (2) Compute global tests of significance of the 2 (or more) groups
+  using the function \code{kendall.global} and the vector defining the
+  groups. Groups that are not globally significant must be refined or
+  abandoned.
+
+  (3) Compute a posteriori tests of the contribution of individual
+  species to the concordance of their group using the function
+  \code{kendall.post} and the vector defining the groups. If some
+  species have negative values for "Spearman.mean", this means that
+  these species clearly do not belong to the group, hence that group
+  is too inclusive. Go back to (1.5) and cut the dendrogram more
+  finely. The left and right groups can be cut separately,
+  independently of the levels along the dendrogram; write your own
+  vector of group membership if \code{cutree} does not produce the
+  desired groups.
+
+  The corrections used for multiple testing are applied to the list of
+  P-values (P); they take into account the number of tests (k) carried
+  out simulatenously (number of groups in \code{kendall.global}, or
+  number of species in \code{kendall.post}).
+
+  The Holm correction is computed after ordering the P-values in a
+  list with the smallest value to the left. Compute adjusted P-values
+  as:
+    \eqn{P_{corr} = (k-i+1) P}{P_corr = (k-i+1)*P}
+
+  where i is the position in the ordered list. Final step: from left
+  to right, if an adjusted \eqn{P_corr} in the ordered list is smaller
+  than the one occurring at its left, make the smallest one equal to
+  the largest one.
+
+  The \enc{Šidák}{Sidak} correction is:
+
+     \eqn{P_{corr} = 1 - (1 - P)^k}
+
+  The Bonferonni correction is:
+
+   \eqn{P_{corr} = k P}{P_corr = k*P}
+}
+\value{
+
+  A table containing the following information in rows. The columns
+  correspond to the groups of "judges" defined in vector "group". When
+  function \code{Kendall.post} is used, there are as many tables as
+  the number of predefined groups.
+  
+  \item{W }{Kendall's coefficient of concordance, W. }
+  
+  \item{Chi2 }{Friedman's chi-square statistic used to test W. }
+  
+  \item{Prob }{Permutational probability, uncorrected. }
+  
+  \item{Corrected prob }{Probabilities corrected using the method
+  selected in parameter "mult". Shown only if there are more than one
+  group. } 
+
+  \item{Spearman.mean }{Mean of the Spearman correlations between the
+  judge under test and all the other judges in the same group. }
+  
+  \item{W.per.species }{Contribution of the judge under test to the
+  overall concordance statistic for that group. }
+
+}
+\references{ 
+
+  Legendre, P. 2005. Species associations: the Kendall coefficient of
+  concordance revisited. Journal of Agricultural, Biological, and
+  Environmental Statistics 10: 226-245.
+
+  Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for the beh  avioral sciences. 2nd edition. McGraw-Hill, New York.
+}
+
+\seealso{\code{\link{cor}}, \code{\link{hclust}},
+\code{\link{cutree}}, \code{\link{kmeans}},
+\code{\link[vegan]{cascadeKM}}, \code{\link[labdsv]{duleg}}}
+
+\author{ F. Guillaume Blanchet, University of Alberta, and Pierre
+  Legendre, Université de Montréal }
+
+\examples{
+data(mite)
+mite.hel <- decostand(mite, "hel")
+
+## Reproduce the results shown in Table 2 of Legendre (2005), a single group
+mite.small <- mite.hel[c(4,9,14,22,31,34,45,53,61,69),c(13:15,23)]
+out1 <- kendall.global(mite.small)
+out2 <- kendall.post(mite.small, mult="holm")
+
+## Reproduce the results shown in Tables 3 and 4 of Legendre (2005), 2 groups
+group <-c(1,1,2,1,1,1,1,1,2,1,1,1,1,1,1,2,1,2,1,1,1,1,2,1,2,1,1,1,1,1,2,2,2,2,2)
+out3 <- kendall.global(mite.hel, group=group)
+out4 <- kendall.post(mite.hel, group=group, mult="holm", nperm=99)
+
+}
+
+\keyword{ multivariate }
+\keyword{ nonparametric }