[Vegan-commits] r2849 - in pkg/vegan: . R inst man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Feb 5 14:37:23 CET 2014


Author: jarioksa
Date: 2014-02-05 14:37:22 +0100 (Wed, 05 Feb 2014)
New Revision: 2849

Added:
   pkg/vegan/R/as.hclust.spantree.R
Modified:
   pkg/vegan/NAMESPACE
   pkg/vegan/inst/ChangeLog
   pkg/vegan/man/spantree.Rd
Log:
add as.hclust.spantree

Modified: pkg/vegan/NAMESPACE
===================================================================
--- pkg/vegan/NAMESPACE	2014-02-02 21:00:04 UTC (rev 2848)
+++ pkg/vegan/NAMESPACE	2014-02-05 13:37:22 UTC (rev 2849)
@@ -94,6 +94,8 @@
 S3method(anova, betadisper)
 S3method(anova, cca)
 S3method(anova, prc)
+# as.hclust: stats
+S3method(as.hclust, spantree)
 ## Do not export as.mcmc now: would need import(coda)
 # as.mcmc: coda <======= rare
 #S3method(as.mcmc, oecosimu)

Added: pkg/vegan/R/as.hclust.spantree.R
===================================================================
--- pkg/vegan/R/as.hclust.spantree.R	                        (rev 0)
+++ pkg/vegan/R/as.hclust.spantree.R	2014-02-05 13:37:22 UTC (rev 2849)
@@ -0,0 +1,69 @@
+### Casts a vegan spantree object into single linkage dendrogram of
+### class hclust. The non-trivial items in "hclust" object are a
+### 'merge' matrix for fusions of points and/or clusters, a 'height'
+### vector which gives the heights of each fusion, and an 'order'
+### vector that gives the order of leaves in the plotted
+### dendrogram. The 'height's are only sorted spantree segment
+### distances, but for 'merge' we need to establish cluster
+### memberships, and for 'order' we must traverse the tree. The
+### plot.hclust() function seems to require that the left kid is
+### always more compact (a single point or fused earlier than the
+### right kid).
+
+`as.hclust.spantree` <-
+    function(x, ...)
+{
+    ## Order by the lengths of spanning tree links
+    o <- order(x$dist)
+    npoints <- length(o) + 1
+    ## Ordered indices of dads and kids
+    dad <- (2:npoints)[o]
+    kid <- x$kid[o]
+    ## merge matrix of hclust has negative index when a single point
+    ## is added to a tree and a positive index when a group is joined
+    ## to a tree, and the group is numbered by the level it was
+    ## formed.
+    labs <- -seq_len(npoints)
+    merge <- matrix(0, nrow=npoints-1, ncol=2)
+    for(i in 1:nrow(merge)) {
+        ## add items of labs, keep tighter cluster on the left
+        items <- c(labs[dad[i]], labs[kid[i]])
+        if (items[1] > 0 || items[2] > 0)
+            items <- sort(items)
+        else
+            items <- rev(sort(items))
+        merge[i, ] <- items
+        ## update labs for the current group and its kids
+        labs[labs %in% labs[c(dad[i], kid[i])]] <- i
+    }
+    ## Get order of leaves with recursive search from the root
+    visited <- matrix(FALSE, nrow = nrow(merge), ncol=ncol(merge))
+    order <- numeric(npoints)
+    ind <- 0
+    ## "<<-" updates data only within this function, but outside the
+    ## visit() function.
+    visit <- function(i, j) {
+        if(visited[i,j])
+            return(NULL)
+        else {
+            visited[i,j] <<- TRUE
+        }
+        if (merge[i,j] < 0) {
+            ind <<- ind+1
+            order[ind] <<- -merge[i,j]
+            if (j == 1)
+                visit(i, 2)
+        } else {
+            visit(merge[i,j], 1)
+            visit(merge[i,j], 2)
+        }
+    }
+    visit(nrow(merge), 1)
+    visit(nrow(merge), 2)
+    
+    out <- list(merge = merge, height = x$dist[o], order = order,
+                labels = x$labels, method = "spantree", call =
+                match.call())
+    class(out) <- "hclust"
+    out
+}

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2014-02-02 21:00:04 UTC (rev 2848)
+++ pkg/vegan/inst/ChangeLog	2014-02-05 13:37:22 UTC (rev 2849)
@@ -11,6 +11,9 @@
 	be used in an incorrect context". The dots were added in r2765,
 	and now removed.
 
+	* as.hclust.spantree: a new function to cast a "spantree" result
+	object to an "hclust" tree. 
+
 	* biplot.rda: failed in axis scaling with negative 'scaling'
 	values when some species had zero variance (and hence species
 	scores was 0/0 = NaN).

Modified: pkg/vegan/man/spantree.Rd
===================================================================
--- pkg/vegan/man/spantree.Rd	2014-02-02 21:00:04 UTC (rev 2848)
+++ pkg/vegan/man/spantree.Rd	2014-02-05 13:37:22 UTC (rev 2849)
@@ -1,6 +1,7 @@
 \name{spantree}
 \alias{spantree}
 \alias{cophenetic.spantree}
+\alias{as.hclust.spantree}
 \alias{plot.spantree}
 \alias{lines.spantree}
 \alias{spandepth}
@@ -13,6 +14,7 @@
 }
 \usage{
 spantree(d, toolong = 0)
+\method{as.hclust}{spantree}(x, ...)
 \method{cophenetic}{spantree}(x)
 spandepth(x)
 \method{plot}{spantree}(x, ord, cex = 0.7, type = "p", labels, dlim,
@@ -58,6 +60,15 @@
   corresponding link is \code{NA}. Connected subtrees can be identified
   using \code{\link{distconnected}}.
 
+  Minimum spanning tree is closesly related to single linkage
+  clustering, a.k.a. nearest neighbour clustering, and in genetics as
+  neighbour joining tree available in \code{\link{hclust}} and
+  \code{\link[cluster]{agnes}} functions. The most important practical
+  difference is that minimum spanning tree has no concept of cluster
+  membership, but always joins individual points to each other. Function
+  \code{as.hclust} can change the \code{spantree} result into a
+  corresponding \code{\link{hclust}} object.
+
   Function \code{cophenetic} finds distances between all points along
   the tree segments. Function \code{spandepth} returns the depth of
   each node. The nodes of a tree are either leaves (with one link) or
@@ -132,6 +143,8 @@
 ## Depths of nodes
 depths <- spandepth(tr)
 plot(tr, type = "t", label = depths)
+## Plot as a dendrogram
+plot(as.hclust(tr))
 }
 \keyword{ multivariate}
 



More information about the Vegan-commits mailing list