[Vegan-commits] r768 - in branches/1.15: R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Mar 30 18:15:43 CEST 2009
Author: jarioksa
Date: 2009-03-30 18:15:43 +0200 (Mon, 30 Mar 2009)
New Revision: 768
Added:
branches/1.15/R/kendall.global.R
branches/1.15/R/kendall.post.R
branches/1.15/man/kendall.global.Rd
Modified:
branches/1.15/inst/ChangeLog
Log:
copied kendall.global etc to branches/1.15
Copied: branches/1.15/R/kendall.global.R (from rev 767, pkg/vegan/R/kendall.global.R)
===================================================================
--- branches/1.15/R/kendall.global.R (rev 0)
+++ branches/1.15/R/kendall.global.R 2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,111 @@
+`kendall.global` <-
+ function(Y, group, nperm=999, mult="holm")
+{
+### Function to test the overall significance of the Kendall coefficient of
+### concordance for a single group or several groups of judges (e.g. species)
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+
+ mult <- match.arg(mult, c("sidak", p.adjust.methods))
+
+ ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+ Y <- as.matrix(Y)
+ n <- nrow(Y)
+ p <- ncol(Y)
+ if(p < 2) stop("There is a single variable in the data matrix")
+
+ ##CC# Transform the species abundances to ranks, by column
+ R <- apply(Y,2,rank)
+
+ if(missing(group)) group <- rep(1,p)
+ if(length(group) != p){
+ stop("The number of species in the vector differs from the total number of species")
+ }
+ group <- as.factor(group)
+ gr.lev <- levels(group)
+ ngr <- nlevels(group)
+
+ gr <- as.list(1:ngr)
+
+ n.per.gr <- vector(length=ngr)
+ for(i in 1:ngr){
+ gr[[i]] <- which(group==gr.lev[i])
+ n.per.gr[i] <- length(gr[[i]])
+ ## Vector containing the number of species per group
+ }
+
+ ## Initialise the vectors of results
+ W.gr <- vector("numeric",ngr)
+ F.gr <- vector("numeric",ngr)
+ prob.F.gr <- vector("numeric",ngr)
+ Chi2.gr <- vector("numeric",ngr)
+ prob.perm.gr <- vector("numeric",ngr)
+ vec <- NA
+
+ for(i in 1:ngr){
+ p.i <- n.per.gr[i]
+ if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+ ##CC# Correction factors for tied ranks (eq. 3.3)
+ t.ranks <- apply(R[,gr[[i]]], 2, function(x) summary(as.factor(x)))
+ T. <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
+
+ ##CC# Compute the Sum of squares of the uncentred ranks (S) (eq. 3.1)
+ S <- sum(apply(R[,gr[[i]]], 1, sum)^2)
+
+ ##CC# Compute Kendall's W (eq. 3.2)
+ W.gr[i] <- ((12*S)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*T.))
+
+ ##C# Compute Fisher F statistic and associated probability
+ F.gr[i] <- (p.i-1)*W.gr[i]/(1-W.gr[i])
+ nu1 <- n-1-(2/p.i)
+ nu2 <- nu1*(p.i-1)
+ prob.F.gr[i] <- pf(F.gr[i], nu1, nu2, lower.tail=FALSE)
+
+ ##CC# Calculate Friedman's Chi-square (eq. 3.4)
+ Chi2.gr[i] <- p.i*(n-1)*W.gr[i]
+
+ counter <- 1
+ for(j in 1:nperm) { # Each species is permuted independently
+ R.perm <- apply(R[,gr[[i]]], 2, sample)
+ S.perm <- sum(apply(R.perm, 1, sum)^2)
+ W.perm <- ((12*S.perm)-(3*(p.i^2)*n*(n+1)^2))/(((p.i^2)*((n^3)-n))-(p.i*T.))
+ Chi2.perm <- p.i*(n-1)*W.perm
+ if(Chi2.perm >= Chi2.gr[i]) counter <- counter+1
+ }
+ prob.perm.gr[i] <- counter/(nperm+1)
+ }
+ ## Correction to P-values for multiple testing
+ if(ngr > 1) {
+ if(mult == "sidak") {
+ perm.corr <- NA
+ for(i in 1:ngr) perm.corr = c(perm.corr, (1-(1-prob.perm.gr[i])^ngr))
+ perm.corr <- perm.corr[-1]
+ #
+ prob.F.corr <- NA
+ for(i in 1:ngr) prob.F.corr = c(prob.F.corr, (1-(1-prob.F.gr[i])^ngr))
+ prob.F.corr <- prob.F.corr[-1]
+ } else {
+ perm.corr <- p.adjust(prob.perm.gr, method=mult)
+ prob.F.corr <- p.adjust(prob.F.gr, method=mult)
+ }
+ }
+ ## Create a data frame containing the results
+ if(ngr == 1) {
+ table <- rbind(W.gr, F.gr, prob.F.gr, Chi2.gr, prob.perm.gr)
+ colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+ rownames(table) <- c("W", "F", "Prob.F", "Chi2", "Prob.perm")
+ } else {
+ table <- rbind(W.gr, F.gr, prob.F.gr, prob.F.corr, Chi2.gr, prob.perm.gr, perm.corr)
+ colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Group.")
+ rownames(table) <- c("W", "F", "Prob.F", "Corrected prob.F", "Chi2", "Prob.perm", "Corrected prob.perm")
+ }
+ if(ngr == 1) {
+ out <- list(Concordance_analysis=table)
+ } else {
+ out <- list(Concordance_analysis=table, Correction.type=mult)
+ }
+ #
+ class(out) <- "kendall.global"
+ out
+}
Copied: branches/1.15/R/kendall.post.R (from rev 767, pkg/vegan/R/kendall.post.R)
===================================================================
--- branches/1.15/R/kendall.post.R (rev 0)
+++ branches/1.15/R/kendall.post.R 2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,140 @@
+`kendall.post` <-
+ function(Y, group, nperm=999, mult="holm")
+{
+### Function to carry out a posteriori tests on individual judges (e.g. species)
+### for a single group or several groups of judges.
+###
+### copyleft - Guillaume Blanchet and Pierre Legendre, October 2008
+################################################################################
+
+ mult <- match.arg(mult, c("sidak", p.adjust.methods))
+
+ ##CC# Make sure Y is a matrix and find number of rows and columns of Y
+ Y <- as.matrix(Y)
+ n <- nrow(Y)
+ p <- ncol(Y)
+ if(p < 2) stop("There is a single variable in the data matrix")
+
+ ##CC# Transform the species abundances to ranks, by column
+ R <- apply(Y,2,rank)
+
+ if(missing(group)) group <- rep(1,p)
+ if(length(group) != p){
+ stop("The number of species in the vector differs from the total number of species")
+ }
+
+ ##CC# Separate tests for the variables in each group
+ group <- as.factor(group)
+ gr.lev <- levels(group)
+ ngr <- nlevels(group)
+
+ gr <- as.list(1:ngr)
+
+ n.per.gr <- vector(length=ngr)
+ for(i in 1:ngr){
+ gr[[i]] <- which(group==gr.lev[i])
+ n.per.gr[i] <- length(gr[[i]]) # Vector with the number of
+ # variables per group
+ }
+
+ ##===============================
+ ##CC# start permutation procedure
+ ##===============================
+ ##CC# Initialize the counters
+ counter <- as.list(1:ngr)
+ for(i in 1:ngr){
+ counter[[i]] <- vector(l=n.per.gr[i])
+ }
+ W.gr <- vector("list",ngr)
+ if(ngr > 1) spear.gr <- vector("list",ngr)
+
+ for(i in 1:ngr){
+ p.i <- n.per.gr[i]
+ if(p.i < 2) stop("There is a single variable in group ",gr.lev[i])
+ #CC# Extract variables part of
+ #group i
+ R.gr <- R[,gr[[i]]] # Table with species of group 'i' only
+ #as columns CC# Calculate the
+ #mean of the Spearman
+ #correlations for each species
+ #in a group
+ spear.mat <- cor(R.gr)
+ diag(spear.mat) <- NA
+ spear.mean <- apply(spear.mat, 1, mean, na.rm=TRUE)
+ #CC# Calculate Kendall's W for each variable
+ W.var <- ((p.i-1)*spear.mean+1)/p.i
+
+ #for(j in 1:n.per.gr[i]){
+ for(j in 1:p.i){ # Test each species in turn
+ #CC# Create a new R where the
+ #permuted variable has been
+ #removed
+ R.gr.mod <- R.gr[,-j]
+ ##CC# Set counter
+ counter[[i]][j] <- 1
+ ##CC# The actual permutation procedure
+ for(k in 1:nperm){
+ var.perm <- sample(R.gr[,j])
+ spear.mat.perm <- cor(cbind(R.gr.mod, var.perm))
+ diag(spear.mat.perm) <- NA
+ spear.mean.j.perm <- mean(spear.mat.perm[p.i,], na.rm=TRUE)
+ W.perm <- ((p.i-1)*spear.mean.j.perm+1)/p.i
+ if(W.perm >= W.var[j]) counter[[i]][j] <- counter[[i]][j]+1
+ }
+ }
+ W.gr[[i]] <- W.var
+ if(ngr > 1) spear.gr[[i]] <- spear.mean
+ }
+ ##CC# Calculate P-values
+ for(i in 1:ngr) {
+ counter[[i]] <- counter[[i]]/(nperm+1)
+ }
+
+ ## Correction to P-values for multiple testing
+ ## Write all P-values to a long vector 'vec'
+ vec <- counter[[1]]
+ if(ngr > 1) {
+ for(i in 2:ngr) vec = c(vec, counter[[i]])
+ }
+ if(length(vec) != p) stop("Error in putting together vector 'vec'")
+
+ if(mult == "sidak") {
+ vec.corr = NA
+ for(i in 1:p) vec.corr = c(vec.corr, (1-(1-vec[i])^p))
+ vec.corr <- vec.corr[-1]
+ } else {
+ vec.corr <- p.adjust(vec, method=mult)
+ }
+
+ if(ngr > 1) {
+ vec.gr <- vector("list",ngr)
+ end <- 0
+ for(i in 1:ngr){
+ beg <- end+1
+ end <- end + n.per.gr[i]
+ vec.gr[[i]] <- vec.corr[beg:end]
+ }
+ }
+ ## Create data frames containing the results
+ if(ngr == 1) {
+ table <- rbind(spear.mean, W.var, counter[[1]], vec.corr)
+ rownames(table) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+ } else {
+ table <- as.list(1:ngr)
+ for(i in 1:ngr) {
+ table[[i]] <- rbind(spear.gr[[i]], W.gr[[i]], counter[[i]], vec.gr[[i]])
+ rownames(table[[i]]) <- c("Spearman.mean", "W.per.species", "Prob", "Corrected prob")
+ ## PL: Next line had been lost
+ colnames(table[[i]]) <- colnames(table[[i]], do.NULL = FALSE,
+ prefix = "Spec")
+ }
+ }
+ if(ngr == 1) {
+ out <- list(A_posteriori_tests=table, Correction.type=mult)
+ } else {
+ out <- list(A_posteriori_tests_Group=table, Correction.type=mult)
+ }
+ #
+ class(out) <- "kendall.post"
+ out
+}
Modified: branches/1.15/inst/ChangeLog
===================================================================
--- branches/1.15/inst/ChangeLog 2009-03-30 15:47:59 UTC (rev 767)
+++ branches/1.15/inst/ChangeLog 2009-03-30 16:15:43 UTC (rev 768)
@@ -5,6 +5,9 @@
Version 1.15-2 (opened January 14, 2009)
+ * copied kendall.global at rev767 from pkg/vegan tree to
+ branches/1.15.
+
* merged r608: plot.radfit gained argument log = "y".
* merged r538: orditkplot makes TIFF graphics.
Copied: branches/1.15/man/kendall.global.Rd (from rev 767, pkg/vegan/man/kendall.global.Rd)
===================================================================
--- branches/1.15/man/kendall.global.Rd (rev 0)
+++ branches/1.15/man/kendall.global.Rd 2009-03-30 16:15:43 UTC (rev 768)
@@ -0,0 +1,169 @@
+\encoding{UTF-8}
+\name{kendall.global}
+\alias{kendall.global}
+\alias{kendall.post}
+
+\title{ Kendall coefficient of concordance }
+
+\description{
+ Function \code{kendall.global} computes and tests the coefficient of
+ concordance among several judges (variables, species) through a
+ permutation test.
+
+ Function \code{kendall.post} carries out \emph{a posteriori} tests
+ of the contributions of individual judges (variables, species) to
+ the overall concordance of their group through permutation tests.
+
+ If several groups of judges are identified in the data table,
+ coefficients of concordance (\code{kendall.global}) or a posteriori
+ tests (\code{kendall.post}) will be computed for each group
+ separately. Use in ecology: to identify significant species
+ associations.
+}
+\usage{
+kendall.global(Y, group, nperm = 999, mult = "holm")
+kendall.post(Y, group, nperm = 999, mult = "holm")
+}
+\arguments{
+ \item{Y}{ Data file (data frame or matrix) containing quantitative or
+ semiquantitative data. Rows are objects and columns are judges
+ (variables). In community ecology, that table is often a
+ site-by-species table. }
+ \item{group}{ A vector defining how judges should be divided into
+ groups. See example below. If groups are not explicitly defined,
+ all judges in the data file will be considered as forming a single
+ group. }
+ \item{nperm}{ Number of permutations to be performed. Default is
+ 999. }
+ \item{mult}{Correct P-values for multiple testing using the
+ alternatives described in \code{\link{p.adjust}} and in addition
+ \code{"sidak"} (see Details). The Bonferroni correction is overly
+ conservative; it is not recommended. It is included to allow
+ comparisons with the other methods.}
+}
+\details{
+ \code{Y} must contain quantitative data. They will be transformed to
+ ranks within each column before computation of the coefficient of
+ concordance.
+
+ The search for species associations described in Legendre (2005)
+ proceeds in 3 steps:
+
+ (1) Correlation analysis of the species. A possible method is to
+ compute Ward's agglomerative clustering of a matrix of correlations
+ among the species. In detail: (1.1) compute a Pearson or Spearman
+ correlation matrix (\code{correl.matrix}) among the species; (1.2)
+ turn it into a distance matrix: \code{mat.D =
+ as.dist(1-correl.matrix)}; (1.3) carry out Ward's hierarchical
+ clustering of that matrix using \code{hclust}: \code{clust.ward =
+ hclust(mat.D, "ward")}; (1.4) plot the dendrogram:
+ \code{plot(clust.ward, hang=-1)}; (1.5) cut the dendrogram in two
+ groups, retrieve the vector of species membership: \code{group.2 =
+ cutree(clust.ward, k=2)}. (1.6) After steps 2 and 3 below, you may
+ have to come back and try divisions of the species into k = 3, 4, 5,
+ \dots groups.
+
+ (2) Compute global tests of significance of the 2 (or more) groups
+ using the function \code{kendall.global} and the vector defining the
+ groups. Groups that are not globally significant must be refined or
+ abandoned.
+
+ (3) Compute a posteriori tests of the contribution of individual
+ species to the concordance of their group using the function
+ \code{kendall.post} and the vector defining the groups. If some
+ species have negative values for "Spearman.mean", this means that
+ these species clearly do not belong to the group, hence that group
+ is too inclusive. Go back to (1.5) and cut the dendrogram more
+ finely. The left and right groups can be cut separately,
+ independently of the levels along the dendrogram; write your own
+ vector of group membership if \code{cutree} does not produce the
+ desired groups.
+
+ The corrections used for multiple testing are applied to the list of
+ P-values (P); they take into account the number of tests (k) carried
+ out simulatenously (number of groups in \code{kendall.global}, or
+ number of species in \code{kendall.post}). The corrections are
+ performed using funtion \code{\link{p.adjust}}; see that function
+ for the describtion of the correction methods. In addition, there is
+ \enc{Šidák}{Sidak} correction which defined as
+ \eqn{P_{corr} = 1 -(1 - P)^k}.
+}
+\value{
+
+ A table containing the following information in rows. The columns
+ correspond to the groups of "judges" defined in vector "group". When
+ function \code{Kendall.post} is used, there are as many tables as
+ the number of predefined groups.
+
+ \item{W }{Kendall's coefficient of concordance, W. }
+
+ \item{F }{F statistic. F = W*(m-1)/(1-W) where m is the number of
+ judges. }
+
+ \item{Prob.F }{Probability associated with the F statistic, computed
+ from the F distribution with nu1 = n-1-(2/m) and nu2 = nu1*(m-1); n is
+ the number of objects. }
+
+ \item{Corrected prob.F }{Probabilities associated with F, corrected
+ using the method selected in parameter \code{mult}. Shown only if
+ there are more than one group. }
+
+ \item{Chi2 }{Friedman's chi-square statistic (Friedman 1937) used in
+ the permutation test of W. }
+
+ \item{Prob.perm }{Permutational probabilities, uncorrected. }
+
+ \item{Corrected prob.perm }{Permutational probabilities corrected
+ using the method selected in parameter \code{mult}. Shown only if
+ there are more than one group. }
+
+ \item{Spearman.mean }{Mean of the Spearman correlations between the
+ judge under test and all the other judges in the same group. }
+
+ \item{W.per.species }{Contribution of the judge under test to the
+ overall concordance statistic for that group. }
+}
+\references{
+
+Friedman, M. 1937. The use of ranks to avoid the assumption of normality
+implicit in the analysis of variance. Journal of the American
+Statistical Association 32: 675-701.
+
+Kendall, M. G. and B. Babington Smith. 1939. The problem of m
+rankings. Annals of Mathematical Statistics 10: 275-287.
+
+Legendre, P. 2005. Species associations: the Kendall coefficient of
+concordance revisited. Journal of Agricultural, Biological, and
+Environmental Statistics 10: 226-245.
+
+Legendre, P. 2009. Coefficient of concordance. In: Encyclopedia of
+Research Design. SAGE Publications (in press).
+
+Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for
+the behavioral sciences. 2nd edition. McGraw-Hill, New York.
+}
+
+\seealso{\code{\link{cor}}, \code{\link{friedman.test}},
+\code{\link{hclust}}, \code{\link{cutree}}, \code{\link{kmeans}},
+\code{\link[vegan]{cascadeKM}}, \code{\link[labdsv]{duleg}}}
+
+\author{ F. Guillaume Blanchet, University of Alberta, and Pierre
+ Legendre, Université de Montréal }
+
+\examples{
+data(mite)
+mite.hel <- decostand(mite, "hel")
+
+# Reproduce the results shown in Table 2 of Legendre (2005), a single group
+mite.small <- mite.hel[c(4,9,14,22,31,34,45,53,61,69),c(13:15,23)]
+kendall.global(mite.small)
+kendall.post(mite.small, mult="holm")
+
+# Reproduce the results shown in Tables 3 and 4 of Legendre (2005), 2 groups
+group <-c(1,1,2,1,1,1,1,1,2,1,1,1,1,1,1,2,1,2,1,1,1,1,2,1,2,1,1,1,1,1,2,2,2,2,2)
+kendall.global(mite.hel, group=group)
+kendall.post(mite.hel, group=group, mult="holm", nperm=99)
+}
+
+\keyword{ multivariate }
+\keyword{ nonparametric }
More information about the Vegan-commits
mailing list