[Vegan-commits] r768 - in branches/1.15: R inst man

Mon Mar 30 18:15:43 CEST 2009

Author: jarioksa
Date: 2009-03-30 18:15:43 +0200 (Mon, 30 Mar 2009)
New Revision: 768

Added:
   branches/1.15/R/kendall.global.R
   branches/1.15/R/kendall.post.R
   branches/1.15/man/kendall.global.Rd
Modified:
   branches/1.15/inst/ChangeLog
Log:
copied kendall.global etc to branches/1.15

Copied: branches/1.15/R/kendall.global.R (from rev 767, pkg/vegan/R/kendall.global.R)
===================================================================

--- branches/1.15/R/kendall.global.R	                        (rev 0)
+++ branches/1.15/R/kendall.global.R	2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,111 @@
+`kendall.global` <-
+	function(Y, group, nperm=999, mult="holm")
+{
+### Function to test the overall significance of the Kendall coefficient of  
+### concordance for a single group or several groups of judges (e.g. species)
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+	
+    mult <- match.arg(mult, c("sidak", p.adjust.methods))
+	
+    ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+    Y <- as.matrix(Y)
+    n <- nrow(Y)
+    p <- ncol(Y)
+    if(p < 2) stop("There is a single variable in the data matrix")
+
+    ##CC# Transform the species abundances to ranks, by column
+    R <- apply(Y,2,rank)
+
+    if(missing(group)) group <- rep(1,p)
+    if(length(group) != p){
+        stop("The number of species in the vector differs from the total number of species")
+    }
+    group <- as.factor(group)
+    gr.lev <- levels(group)
+    ngr <- nlevels(group)
+		
+    gr <- as.list(1:ngr)
+		
+    n.per.gr <- vector(length=ngr)
+    for(i in 1:ngr){
+        gr[[i]] <- which(group==gr.lev[i])
+        n.per.gr[i] <- length(gr[[i]])
+        ## Vector containing the number of species per group
+    }
+
+    ## Initialise the vectors of results
+    W.gr <- vector("numeric",ngr)
+    F.gr <- vector("numeric",ngr)
+    prob.F.gr <- vector("numeric",ngr)
+    Chi2.gr <- vector("numeric",ngr)
+    prob.perm.gr <- vector("numeric",ngr)
+    vec <- NA
+
+    for(i in 1:ngr){
+        p.i <- n.per.gr[i]
+        if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+        ##CC# Correction factors for tied ranks (eq. 3.3)
+        t.ranks <- apply(R[,gr[[i]]], 2, function(x) summary(as.factor(x)))
+        T. <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
+	
+        ##CC# Compute the Sum of squares of the uncentred ranks (S) (eq. 3.1)
+        S <- sum(apply(R[,gr[[i]]], 1, sum)^2)
+	
+        ##CC# Compute Kendall's W (eq. 3.2)
+        W.gr[i] <- ((12*S)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*T.))
+		
+        ##C# Compute Fisher F statistic and associated probability
+        F.gr[i] <- (p.i-1)*W.gr[i]/(1-W.gr[i])
+        nu1 <- n-1-(2/p.i)
+        nu2 <- nu1*(p.i-1)
+        prob.F.gr[i] <- pf(F.gr[i], nu1, nu2, lower.tail=FALSE)
+	
+        ##CC# Calculate Friedman's Chi-square (eq. 3.4)
+        Chi2.gr[i] <- p.i*(n-1)*W.gr[i]
+
+        counter <- 1
+        for(j in 1:nperm) {   # Each species is permuted independently
+            R.perm <- apply(R[,gr[[i]]], 2, sample)
+            S.perm <- sum(apply(R.perm, 1, sum)^2)
+            W.perm <- ((12*S.perm)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*T.))
+            Chi2.perm <- p.i*(n-1)*W.perm
+            if(Chi2.perm >= Chi2.gr[i]) counter <- counter+1
+        }
+        prob.perm.gr[i] <- counter/(nperm+1)
+    }
+    ## Correction to P-values for multiple testing
+    if(ngr > 1) {
+        if(mult == "sidak") {
+            perm.corr <- NA
+            for(i in 1:ngr) perm.corr = c(perm.corr, (1-(1-prob.perm.gr[i])^ngr))
+            perm.corr <- perm.corr[-1]
+                                        #
+            prob.F.corr <- NA
+            for(i in 1:ngr) prob.F.corr = c(prob.F.corr, (1-(1-prob.F.gr[i])^ngr))
+            prob.F.corr <- prob.F.corr[-1]
+        } else {
+            perm.corr <- p.adjust(prob.perm.gr, method=mult)
+            prob.F.corr <- p.adjust(prob.F.gr, method=mult)
+        }
+    }
+    ## Create a data frame containing the results
+    if(ngr == 1) {
+        table <- rbind(W.gr, F.gr, prob.F.gr, Chi2.gr, prob.perm.gr)
+        colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+        rownames(table) <- c("W", "F", "Prob.F", "Chi2", "Prob.perm")
+    }  else {
+        table <- rbind(W.gr, F.gr, prob.F.gr, prob.F.corr, Chi2.gr, prob.perm.gr, perm.corr)
+        colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+        rownames(table) <- c("W", "F", "Prob.F", "Corrected prob.F", "Chi2", "Prob.perm", "Corrected prob.perm")		
+    }
+    if(ngr == 1) {
+        out <- list(Concordance_analysis=table)
+    }  else {
+        out <- list(Concordance_analysis=table, Correction.type=mult)
+    }
+                                        #
+    class(out) <- "kendall.global"
+    out
+}

Copied: branches/1.15/R/kendall.post.R (from rev 767, pkg/vegan/R/kendall.post.R)
===================================================================
--- branches/1.15/R/kendall.post.R	                        (rev 0)
+++ branches/1.15/R/kendall.post.R	2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,140 @@
+`kendall.post` <-
+	function(Y, group, nperm=999, mult="holm")
+{
+### Function to carry out a posteriori tests on individual judges (e.g. species)
+### for a single group or several groups of judges.
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+
+    mult <- match.arg(mult, c("sidak", p.adjust.methods))
+	
+    ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+    Y <- as.matrix(Y)
+    n <- nrow(Y)
+    p <- ncol(Y)
+    if(p < 2) stop("There is a single variable in the data matrix")
+
+    ##CC# Transform the species abundances to ranks, by column
+    R <- apply(Y,2,rank)
+
+    if(missing(group)) group <- rep(1,p)
+    if(length(group) != p){
+        stop("The number of species in the vector differs from the total number of species")
+    }
+		
+    ##CC# Separate tests for the variables in each group
+    group <- as.factor(group)
+    gr.lev <- levels(group)
+    ngr <- nlevels(group)
+		
+    gr <- as.list(1:ngr)
+		
+    n.per.gr <- vector(length=ngr)
+    for(i in 1:ngr){
+        gr[[i]] <- which(group==gr.lev[i])
+        n.per.gr[i] <- length(gr[[i]])  # Vector with the number of
+                                        # variables per group
+    }
+		
+    ##===============================
+    ##CC# start permutation procedure
+    ##===============================
+    ##CC# Initialize the counters
+    counter <- as.list(1:ngr)
+    for(i in 1:ngr){
+        counter[[i]] <- vector(l=n.per.gr[i])
+    }
+    W.gr <- vector("list",ngr)
+    if(ngr > 1) spear.gr <- vector("list",ngr)
+		
+    for(i in 1:ngr){
+        p.i <- n.per.gr[i]
+        if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+                                        #CC# Extract variables part of
+                                        #group i
+        R.gr <- R[,gr[[i]]]     # Table with species of group 'i' only
+                                        #as columns CC# Calculate the
+                                        #mean of the Spearman
+                                        #correlations for each species
+                                        #in a group
+        spear.mat <- cor(R.gr)
+        diag(spear.mat) <- NA
+        spear.mean <- apply(spear.mat, 1, mean, na.rm=TRUE)
+                                        #CC# Calculate Kendall's W for each variable
+        W.var <- ((p.i-1)*spear.mean+1)/p.i
+
+                                        #for(j in 1:n.per.gr[i]){
+        for(j in 1:p.i){                # Test each species in turn
+                                        #CC# Create a new R where the
+                                        #permuted variable has been
+                                        #removed
+            R.gr.mod <- R.gr[,-j]
+            ##CC# Set counter
+            counter[[i]][j] <- 1
+            ##CC# The actual permutation procedure
+            for(k in 1:nperm){
+                var.perm <- sample(R.gr[,j])
+                spear.mat.perm <- cor(cbind(R.gr.mod, var.perm))
+                diag(spear.mat.perm) <- NA
+                spear.mean.j.perm <- mean(spear.mat.perm[p.i,], na.rm=TRUE)
+                W.perm <- ((p.i-1)*spear.mean.j.perm+1)/p.i
+                if(W.perm >= W.var[j]) counter[[i]][j] <- counter[[i]][j]+1
+            }
+        }
+        W.gr[[i]] <- W.var
+        if(ngr > 1) spear.gr[[i]] <- spear.mean
+    }
+    ##CC# Calculate P-values
+    for(i in 1:ngr) {
+        counter[[i]] <- counter[[i]]/(nperm+1)
+    }
+             
+    ## Correction to P-values for multiple testing
+    ## Write all P-values to a long vector 'vec'
+    vec <- counter[[1]]
+    if(ngr > 1) {
+        for(i in 2:ngr) vec = c(vec, counter[[i]])
+    }
+    if(length(vec) != p) stop("Error in putting together vector 'vec'")
+
+    if(mult == "sidak") {
+        vec.corr = NA
+        for(i in 1:p) vec.corr = c(vec.corr, (1-(1-vec[i])^p))
+        vec.corr <- vec.corr[-1]
+    } else {
+        vec.corr <- p.adjust(vec, method=mult)
+    }
+
+    if(ngr > 1) {
+        vec.gr <- vector("list",ngr)
+        end <- 0
+        for(i in 1:ngr){
+            beg <- end+1
+            end <- end + n.per.gr[i]
+            vec.gr[[i]] <- vec.corr[beg:end]
+        }
+    }
+    ## Create data frames containing the results
+    if(ngr == 1) {
+        table <- rbind(spear.mean, W.var, counter[[1]], vec.corr)
+        rownames(table) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+    } else {
+        table <- as.list(1:ngr)
+        for(i in 1:ngr) {
+            table[[i]] <- rbind(spear.gr[[i]], W.gr[[i]], counter[[i]], vec.gr[[i]])
+            rownames(table[[i]]) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+            ## PL: Next line had been lost
+            colnames(table[[i]]) <- colnames(table[[i]], do.NULL = FALSE,
+                                             prefix = "Spec")
+        }
+    }
+    if(ngr == 1) {
+        out <- list(A_posteriori_tests=table, Correction.type=mult)
+    } else {
+        out <- list(A_posteriori_tests_Group=table, Correction.type=mult)
+    }
+                                        #
+    class(out) <- "kendall.post"
+    out
+}

Modified: branches/1.15/inst/ChangeLog
===================================================================
--- branches/1.15/inst/ChangeLog	2009-03-30 15:47:59 UTC (rev 767)
+++ branches/1.15/inst/ChangeLog	2009-03-30 16:15:43 UTC (rev 768)
@@ -5,6 +5,9 @@
 
 Version 1.15-2 (opened January 14, 2009)
 
+	* copied kendall.global at rev767 from pkg/vegan tree to
+	branches/1.15.
+
 	* merged r608: plot.radfit gained argument log = "y".
 
 	* merged r538: orditkplot makes TIFF graphics.

Copied: branches/1.15/man/kendall.global.Rd (from rev 767, pkg/vegan/man/kendall.global.Rd)
===================================================================
--- branches/1.15/man/kendall.global.Rd	                        (rev 0)
+++ branches/1.15/man/kendall.global.Rd	2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,169 @@
+\encoding{UTF-8}
+\name{kendall.global}
+\alias{kendall.global}
+\alias{kendall.post}
+
+\title{ Kendall coefficient of concordance }
+
+\description{ 
+   Function \code{kendall.global} computes and tests the coefficient of
+  concordance among several judges (variables, species) through a
+  permutation test.
+
+  Function \code{kendall.post} carries out \emph{a posteriori} tests
+  of the contributions of individual judges (variables, species) to
+  the overall concordance of their group through permutation tests.
+
+  If several groups of judges are identified in the data table,
+  coefficients of concordance (\code{kendall.global}) or a posteriori
+  tests (\code{kendall.post}) will be computed for each group
+  separately. Use in ecology: to identify significant species
+  associations.
+}
+\usage{
+kendall.global(Y, group, nperm = 999, mult = "holm")
+kendall.post(Y, group, nperm = 999, mult = "holm")
+}
+\arguments{
+  \item{Y}{ Data file (data frame or matrix) containing quantitative or
+    semiquantitative data. Rows are objects and columns are judges
+    (variables). In community ecology, that table is often a
+    site-by-species table. }
+  \item{group}{ A vector defining how judges should be divided into
+    groups. See example below. If groups are not explicitly defined,
+    all judges in the data file will be considered as forming a single
+    group. }
+  \item{nperm}{ Number of permutations to be performed. Default is
+    999. }
+  \item{mult}{Correct P-values for multiple testing using the
+    alternatives described in \code{\link{p.adjust}} and in addition
+    \code{"sidak"} (see Details). The Bonferroni correction is overly
+    conservative; it is not recommended. It is included to allow
+    comparisons with the other methods.}
+}
+\details{
+  \code{Y} must contain quantitative data. They will be transformed to
+  ranks within each column before computation of the coefficient of
+  concordance.
+
+  The search for species associations described in Legendre (2005)
+  proceeds in 3 steps:
+
+  (1) Correlation analysis of the species. A possible method is to
+  compute Ward's agglomerative clustering of a matrix of correlations
+  among the species. In detail: (1.1) compute a Pearson or Spearman
+  correlation matrix (\code{correl.matrix}) among the species; (1.2)
+  turn it into a distance matrix: \code{mat.D =
+  as.dist(1-correl.matrix)}; (1.3) carry out Ward's hierarchical
+  clustering of that matrix using \code{hclust}: \code{clust.ward =
+  hclust(mat.D, "ward")}; (1.4) plot the dendrogram:
+  \code{plot(clust.ward, hang=-1)}; (1.5) cut the dendrogram in two
+  groups, retrieve the vector of species membership: \code{group.2 =
+  cutree(clust.ward, k=2)}. (1.6) After steps 2 and 3 below, you may
+  have to come back and try divisions of the species into k = 3, 4, 5,
+  \dots groups.
+
+  (2) Compute global tests of significance of the 2 (or more) groups
+  using the function \code{kendall.global} and the vector defining the
+  groups. Groups that are not globally significant must be refined or
+  abandoned.
+
+  (3) Compute a posteriori tests of the contribution of individual
+  species to the concordance of their group using the function
+  \code{kendall.post} and the vector defining the groups. If some
+  species have negative values for "Spearman.mean", this means that
+  these species clearly do not belong to the group, hence that group
+  is too inclusive. Go back to (1.5) and cut the dendrogram more
+  finely. The left and right groups can be cut separately,
+  independently of the levels along the dendrogram; write your own
+  vector of group membership if \code{cutree} does not produce the
+  desired groups.
+
+  The corrections used for multiple testing are applied to the list of
+  P-values (P); they take into account the number of tests (k) carried
+  out simulatenously (number of groups in \code{kendall.global}, or
+  number of species in \code{kendall.post}). The corrections are
+  performed using funtion \code{\link{p.adjust}}; see that function
+  for the describtion of the correction methods. In addition, there is
+  \enc{Šidák}{Sidak} correction which defined as 
+  \eqn{P_{corr} = 1 -(1 - P)^k}.
+}
+\value{
+
+  A table containing the following information in rows. The columns
+  correspond to the groups of "judges" defined in vector "group". When
+  function \code{Kendall.post} is used, there are as many tables as
+  the number of predefined groups.
+
+  \item{W }{Kendall's coefficient of concordance, W. }
+
+  \item{F }{F statistic. F = W*(m-1)/(1-W) where m is the number of
+  judges. }
+
+  \item{Prob.F }{Probability associated with the F statistic, computed
+  from the F distribution with nu1 = n-1-(2/m) and nu2 = nu1*(m-1); n is
+  the number of objects. }
+
+  \item{Corrected prob.F }{Probabilities associated with F, corrected
+  using the method selected in parameter \code{mult}. Shown only if
+  there are more than one group. }
+
+  \item{Chi2 }{Friedman's chi-square statistic (Friedman 1937) used in
+  the permutation test of W. }
+
+  \item{Prob.perm }{Permutational probabilities, uncorrected. }
+
+  \item{Corrected prob.perm }{Permutational probabilities corrected
+  using the method selected in parameter \code{mult}. Shown only if
+  there are more than one group. }
+
+  \item{Spearman.mean }{Mean of the Spearman correlations between the
+  judge under test and all the other judges in the same group. }
+
+  \item{W.per.species }{Contribution of the judge under test to the
+  overall concordance statistic for that group. }
+}
+\references{ 
+
+Friedman, M. 1937. The use of ranks to avoid the assumption of normality
+implicit in the analysis of variance. Journal of the American
+Statistical Association 32: 675-701.
+
+Kendall, M. G. and B. Babington Smith. 1939. The problem of m
+rankings. Annals of Mathematical Statistics 10: 275-287.
+
+Legendre, P. 2005. Species associations: the Kendall coefficient of
+concordance revisited. Journal of Agricultural, Biological, and
+Environmental Statistics 10: 226-245.
+
+Legendre, P. 2009. Coefficient of concordance. In: Encyclopedia of
+Research Design. SAGE Publications (in press).
+
+Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for
+the behavioral sciences. 2nd edition. McGraw-Hill, New York.
+}
+
+\seealso{\code{\link{cor}}, \code{\link{friedman.test}},
+\code{\link{hclust}}, \code{\link{cutree}}, \code{\link{kmeans}},
+\code{\link[vegan]{cascadeKM}}, \code{\link[labdsv]{duleg}}}
+
+\author{ F. Guillaume Blanchet, University of Alberta, and Pierre
+  Legendre, Université de Montréal }
+
+\examples{
+data(mite)
+mite.hel <- decostand(mite, "hel")
+
+# Reproduce the results shown in Table 2 of Legendre (2005), a single group
+mite.small <- mite.hel[c(4,9,14,22,31,34,45,53,61,69),c(13:15,23)]
+kendall.global(mite.small)
+kendall.post(mite.small, mult="holm")
+
+# Reproduce the results shown in Tables 3 and 4 of Legendre (2005), 2 groups
+group <-c(1,1,2,1,1,1,1,1,2,1,1,1,1,1,1,2,1,2,1,1,1,1,2,1,2,1,1,1,1,1,2,2,2,2,2)
+kendall.global(mite.hel, group=group)
+kendall.post(mite.hel, group=group, mult="holm", nperm=99)
+}
+
+\keyword{ multivariate }
+\keyword{ nonparametric }