[Vegan-commits] r2850 - in pkg/vegan: . R inst man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 6 10:51:23 CET 2014


Author: jarioksa
Date: 2014-02-06 10:51:23 +0100 (Thu, 06 Feb 2014)
New Revision: 2850

Added:
   pkg/vegan/man/reorder.hclust.Rd
Modified:
   pkg/vegan/NAMESPACE
   pkg/vegan/R/as.hclust.spantree.R
   pkg/vegan/inst/ChangeLog
Log:
provide reorder() and rev() methods for 'hclust' trees

Modified: pkg/vegan/NAMESPACE
===================================================================
--- pkg/vegan/NAMESPACE	2014-02-05 13:37:22 UTC (rev 2849)
+++ pkg/vegan/NAMESPACE	2014-02-06 09:51:23 UTC (rev 2850)
@@ -192,6 +192,9 @@
 # hiersimu: vegan
 S3method(hiersimu, default)
 S3method(hiersimu, formula)
+# methods for hclust object in base R: these would be better in R
+S3method(reorder, hclust)
+S3method(rev, hclust)
 # identify: graphics
 S3method(identify, ordiplot)
 # labels: base

Modified: pkg/vegan/R/as.hclust.spantree.R
===================================================================
--- pkg/vegan/R/as.hclust.spantree.R	2014-02-05 13:37:22 UTC (rev 2849)
+++ pkg/vegan/R/as.hclust.spantree.R	2014-02-06 09:51:23 UTC (rev 2850)
@@ -5,10 +5,7 @@
 ### vector that gives the order of leaves in the plotted
 ### dendrogram. The 'height's are only sorted spantree segment
 ### distances, but for 'merge' we need to establish cluster
-### memberships, and for 'order' we must traverse the tree. The
-### plot.hclust() function seems to require that the left kid is
-### always more compact (a single point or fused earlier than the
-### right kid).
+### memberships, and for 'order' we must traverse the tree.
 
 `as.hclust.spantree` <-
     function(x, ...)
@@ -27,16 +24,11 @@
     merge <- matrix(0, nrow=npoints-1, ncol=2)
     for(i in 1:nrow(merge)) {
         ## add items of labs, keep tighter cluster on the left
-        items <- c(labs[dad[i]], labs[kid[i]])
-        if (items[1] > 0 || items[2] > 0)
-            items <- sort(items)
-        else
-            items <- rev(sort(items))
-        merge[i, ] <- items
+        merge[i, ] <- c(labs[dad[i]], labs[kid[i]])
         ## update labs for the current group and its kids
         labs[labs %in% labs[c(dad[i], kid[i])]] <- i
     }
-    ## Get order of leaves with recursive search from the root
+    ## Get order of leaves with recursive search from the root.
     visited <- matrix(FALSE, nrow = nrow(merge), ncol=ncol(merge))
     order <- numeric(npoints)
     ind <- 0
@@ -67,3 +59,98 @@
     class(out) <- "hclust"
     out
 }
+
+### Internal vegan function to get the 'order' from a merge matrix of
+### an hclust tree
+
+`hclustMergeOrder` <-
+    function(merge)
+{
+    ## Get order of leaves with recursive search from the root
+    visited <- matrix(FALSE, nrow = nrow(merge), ncol=ncol(merge))
+    order <- numeric(nrow(merge)+1)
+    ind <- 0
+    ## "<<-" updates data only within this function, but outside the
+    ## visit() function.
+    visit <- function(i, j) {
+        if(visited[i,j])
+            return(NULL)
+        else {
+            visited[i,j] <<- TRUE
+        }
+        if (merge[i,j] < 0) {
+            ind <<- ind+1
+            order[ind] <<- -merge[i,j]
+            if (j == 1)
+                visit(i, 2)
+        } else {
+            visit(merge[i,j], 1)
+            visit(merge[i,j], 2)
+        }
+    }
+    visit(nrow(merge), 1)
+    visit(nrow(merge), 2)
+    return(order)
+}
+
+### Reorder an hclust tree. Basic R provides reorder.dendrogram, but
+### this functoin works with 'hclust' objects, and also differs in
+### implementation. We use either weighted mean, min or max or
+### sum. The dendrogram is always ordered in ascending order, so that
+### with max the left kid always has lower value. So with 'max' the
+### largest value is smaller in leftmost group. The choice 'sum'
+### hardly makes sense, but it is the default in
+### reorder.dendrogram. The ordering with 'mean' differs from
+### reorder.dendrogram which uses unweighted means, but here we weight
+### means by group sizes so that the mean of an internal node is the
+### mean of its leaves.
+
+`reorder.hclust` <-
+    function(x, wts, agglo.FUN = c("mean", "min", "max", "sum"), ...)
+{
+    agglo.FUN <- match.arg(agglo.FUN)
+    merge <- x$merge
+    nlev <- nrow(merge)
+    stats <- numeric(nlev)
+    counts <- numeric(nlev)
+    pair <- numeric(2)
+    pairw <- numeric(2)
+    ## Go through merge, order each level and update the statistic.
+    for(i in 1:nlev) {
+        for(j in 1:2) {
+            if (merge[i,j] < 0) {
+                pair[j] <- wts[-merge[i,j]]
+                pairw[j] <- 1
+            } else {
+                pair[j] <- stats[merge[i,j]]
+                pairw[j] <- counts[merge[i,j]]
+            }
+        }
+        ## reorder
+        merge[i,] <- merge[i, order(pair)]
+        ## statistic for this merge level
+        stats[i] <-
+            switch(agglo.FUN,
+                   "mean" = weighted.mean(pair, pairw),
+                   "min" = min(pair),
+                   "max" = max(pair),
+                   "sum" = sum(pair))
+        counts[i] <- sum(pairw)
+    }
+    ## Get the 'order' of the reordered dendrogram
+    order <- hclustMergeOrder(merge)
+    x$merge <- merge
+    x$order <- order
+    x$value <- stats
+    x
+}
+
+### Trivial function to reverse the order of an hclust tree (why this
+### is not in base R?)
+
+`rev.hclust` <-
+    function(x)
+{
+    x$order <- rev(x$order)
+    x
+}

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2014-02-05 13:37:22 UTC (rev 2849)
+++ pkg/vegan/inst/ChangeLog	2014-02-06 09:51:23 UTC (rev 2850)
@@ -14,6 +14,15 @@
 	* as.hclust.spantree: a new function to cast a "spantree" result
 	object to an "hclust" tree. 
 
+	* hclust: add reorder() and rev() methods for standard "hclust"
+	trees of R.  I have no clue why base R does not have these
+	methods, but I provide them now in vegan. An additional reason for
+	providing these methods is that reorder(<dendrogram-object>, wts,
+	agglo.FUN = mean) will use unweighted mean of merged groups even
+	when these are of very unequal sizes. The reorder method provided
+	here will use group sizes as weights and the value of the group
+	will be the mean of its leaves (terminal nodes).
+
 	* biplot.rda: failed in axis scaling with negative 'scaling'
 	values when some species had zero variance (and hence species
 	scores was 0/0 = NaN).

Added: pkg/vegan/man/reorder.hclust.Rd
===================================================================
--- pkg/vegan/man/reorder.hclust.Rd	                        (rev 0)
+++ pkg/vegan/man/reorder.hclust.Rd	2014-02-06 09:51:23 UTC (rev 2850)
@@ -0,0 +1,94 @@
+\name{reorder.hclust}
+\alias{reorder.hclust}
+\alias{rev.hclust}
+
+\title{
+Reorder a Hierarchical Clustering Tree
+}
+
+\description{
+
+  Function takes a hierarchical clustering tree from
+  \code{\link{hclust}} and a vector of values and reorders the
+  clustering tree in the order of the supplied vector, maintaining the
+  constraints on the tree. This is a method of generic function
+  \code{\link{reorder}} and an alternative to reordering a
+  \code{"dendrogram"} object with \code{\link{reorder.dendrogram}}
+
+}
+
+\usage{
+\method{reorder}{hclust}(x, wts, agglo.FUN = c("mean", "min", "max", "sum"), ...)
+\method{rev}{hclust}(x)
+}
+
+\arguments{
+  \item{x}{
+    hierarchical clustering from \code{\link{hclust}}.
+}
+  \item{wts}{
+    numeric vector for reordering.
+}
+  \item{agglo.FUN}{
+    a function for weights agglomeration, see below.
+}
+  \item{\dots}{
+    additional arguments (ignored).
+}
+}
+
+\details{
+  
+  Dendrograms can be ordered in many ways. The \code{reorder} function
+  reorders an \code{\link{hclust}} tree and provides an alternative to
+  \code{\link{reorder.dendrogram}} which can reorder a
+  \code{\link{dendrogram}}. The current function will also work
+  differently when the \code{agglo.FUN} is \code{"mean"}: the
+  \code{\link{reorder.dendrogram}} will always take the direct mean of
+  member groups ignoring their sizes, but this function will used
+  \code{\link{weighted.mean}} weighted by group sizes, so that the group
+  mean is always the mean of member leaves (terminal nodes).
+
+  The function accepts only a limited list of \code{agglo.FUN}
+  functions for assessing the value of \code{wts} for groups. The
+  ordering is always ascending, but the order of leaves can be reversed
+  with \code{rev}.
+
+}
+
+\value{
+  Reordered \code{\link{hclust}} result object with added item
+  \code{value} that gives the value of the statistic at each merge
+  level. 
+}
+
+\author{
+  Jari Oksanen
+}
+\note{
+  These functions should really be in base \R.
+}
+
+
+\seealso{
+  \code{\link{hclust}} for getting clustering trees,
+  \code{\link{as.hclust.spantree}} to change a \pkg{vegan} minimum
+  spanning tree to an \code{\link{hclust}} object, and
+  \code{\link{dendrogram}} and \code{\link{reorder.dendrogram}} for an
+  alternative implementation.
+}
+\examples{
+data(mite, mite.env)
+hc <- hclust(vegdist(wisconsin(sqrt(mite))))
+ohc <- with(mite.env, reorder(hc, WatrCont))
+plot(hc)
+plot(ohc)
+## Slightly different from reordered 'dendrogram' which ignores group
+## sizes in assessing means.
+den <- as.dendrogram(hc)
+den <- with(mite.env, reorder(den, WatrCont, agglo.FUN = mean))
+plot(den)
+}
+
+\keyword{multivariate}
+



More information about the Vegan-commits mailing list