[Vegan-commits] r1471 - in pkg/vegan: R inst man

Sun Jan 30 19:38:47 CET 2011

Author: jarioksa
Date: 2011-01-30 19:38:47 +0100 (Sun, 30 Jan 2011)
New Revision: 1471

Modified:
   pkg/vegan/R/as.preston.R
   pkg/vegan/R/prestonfit.R
   pkg/vegan/inst/ChangeLog
   pkg/vegan/man/fisherfit.Rd
Log:
implemented splitting ties in prestonfit following Williamson & Gaston, J Anim Ecol 43, 381-399; 2005

Modified: pkg/vegan/R/as.preston.R
===================================================================

--- pkg/vegan/R/as.preston.R	2011-01-30 07:37:51 UTC (rev 1470)
+++ pkg/vegan/R/as.preston.R	2011-01-30 18:38:47 UTC (rev 1471)
@@ -1,16 +1,32 @@
-"as.preston" <-
-    function (x, ...) 
+`as.preston` <-
+    function (x, tiesplit = FALSE, ...) 
 {
     if (inherits(x, "preston")) 
         return(x)
     if (!identical(all.equal(x, round(x)), TRUE))
         stop("function accepts only integers (counts)")
-    freq <- x[x > 0]
-    freq <- ceiling(log2(freq))
-    freq <- table(freq)
-    nm <- names(freq)
-    freq <- as.vector(freq)
-    names(freq) <- nm
+    x <- x[x > 0]
+    if (tiesplit) {
+        ## Assume log2(2^k) == k exactly for integer k
+        xlog2 <- log2(x)
+        ties <- xlog2 == ceiling(xlog2)
+        tiefreq <- table(xlog2[ties])
+        notiefreq <- table(ceiling(xlog2[!ties]))
+        itie <- as.numeric(names(tiefreq)) + 1
+        nitie <- as.numeric(names(notiefreq)) + 1
+        freq <- numeric(max(itie, nitie))
+        ## split tied values between two adjacent octaves
+        freq[itie] <- tiefreq/2
+        freq[itie+1] <- freq[itie+1] + tiefreq/2
+        freq[nitie] <- freq[nitie] + notiefreq
+    } else {
+        xlog2 <- ceiling(log2(x))
+        tmp <- table(xlog2)
+        indx <- as.numeric(names(tmp)) + 1
+        freq <- numeric(max(indx))
+        freq[indx] <- tmp
+    }
+    names(freq) <- seq_along(freq) - 1
     class(freq) <- "preston"
     freq
 }

Modified: pkg/vegan/R/prestonfit.R
===================================================================
--- pkg/vegan/R/prestonfit.R	2011-01-30 07:37:51 UTC (rev 1470)
+++ pkg/vegan/R/prestonfit.R	2011-01-30 18:38:47 UTC (rev 1471)
@@ -1,9 +1,10 @@
-"prestonfit" <-
-    function (x, ...) 
+`prestonfit` <-
+    function (x, tiesplit = FALSE, ...) 
 {
-    x <- as.preston(x)
+    x <- as.preston(x, tiesplit = tiesplit)
     oct <- as.numeric(names(x))
-    fit <- glm(x ~ oct + I(oct^2), family = poisson)
+    fit <- glm(x ~ oct + I(oct^2),
+               family = if (tiesplit) quasipoisson else poisson)
     fv <- fitted(fit)
     p <- coef(fit)
     if (!is.na(p[3]) && p[3] < 0) {
@@ -18,6 +19,8 @@
     names(p) <- c("mode", "width", "S0")
     out <- list(freq = unclass(x), fitted = fv, coefficients = p)
     out$method = "Poisson fit to octaves"
+    if(tiesplit)
+        out$method <- paste("Quasi-", out$method, sep="")
     class(out) <- c("prestonfit")
     out
 }

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2011-01-30 07:37:51 UTC (rev 1470)
+++ pkg/vegan/inst/ChangeLog	2011-01-30 18:38:47 UTC (rev 1471)
@@ -4,9 +4,13 @@
 
 Version 1.18-22 (opened January 19, 2011)
 
+	* prestonfit: implemented splitting "tied" counts (1, 2, 4, 8 etc)
+	between octaves following Williamson & Gaston (J Anim Ecol 43,
+	381-399; 2005) with argument 'tiesplit = TRUE'
+	
 	* specaccum: fixed typo -- 'individuals' instead of
 	'invidividuals'. plot() gained argument to use "individuals" as
-	x-axis instead of "sites" with method = "rarefaction"
+	x-axis instead of "sites" with method = "rarefaction".
 	
 Version 1.18-21 (closed January 19, 2011)
 

Modified: pkg/vegan/man/fisherfit.Rd
===================================================================
--- pkg/vegan/man/fisherfit.Rd	2011-01-30 07:37:51 UTC (rev 1470)
+++ pkg/vegan/man/fisherfit.Rd	2011-01-30 18:38:47 UTC (rev 1471)
@@ -27,7 +27,7 @@
 \method{confint}{fisherfit}(object, parm, level = 0.95, ...)
 \method{profile}{fisherfit}(fitted, alpha = 0.01, maxsteps = 20, del = zmax/5, 
     ...)
-prestonfit(x, ...)
+prestonfit(x, tiesplit = FALSE, ...)
 prestondistr(x, truncate = -1, ...)
 \method{plot}{prestonfit}(x, xlab = "Frequency", ylab = "Species", bar.col = "skyblue", 
     line.col = "red", lwd = 2, ...)
@@ -45,6 +45,8 @@
   \item{alpha}{The extend of profiling as significance.}
   \item{maxsteps}{Maximum number of steps in profiling.}
   \item{del}{Step length.}
+  \item{tiesplit}{Split frequencies \eqn{1, 2, 4, 8} etc between adjacent 
+    octaves.}
   \item{truncate}{Truncation point for log-Normal model, in log2
     units. Default value \eqn{-1} corresponds to the left border of zero
     Octave. The choice strongly influences the fitting results.}
@@ -83,52 +85,70 @@
   are similar.
 
   Preston (1948) was not satisfied with Fisher's model which seemed to
-  imply infinite species richness, and postulated that rare species is a
-  diminishing class and most species are in the middle of frequency
+  imply infinite species richness, and postulated that rare species is
+  a diminishing class and most species are in the middle of frequency
   scale. This was achieved by collapsing higher frequency classes into
   wider and wider ``octaves'' of doubling class limits: 1, 2, 3--4,
-  5--8, 9--16 etc. occurrences. Any logseries data will look like
-  lognormal when plotted this way. The expected frequency \eqn{f} at abundance
-  octave \eqn{o} is defined by \eqn{f_o = S_0 \exp(-(\log_2(o) -
-    \mu)^2/2/\sigma^2)}{f = S0 exp(-(log2(o)-mu)^2/2/sigma^2)}, where
-  \eqn{\mu} is the location of the mode and \eqn{\sigma} the width, both
-  in \eqn{\log_2}{log2} scale, and \eqn{S_0}{S0} is the expected number
-  of species at mode. The lognormal model is usually truncated on the
-  left so that some rare species are not observed. Function
+  5--8, 9--16 etc. occurrences. It seems that Preston regarded
+  frequencies 1, 2, 4, \emph{etc.}. as \dQuote{tied} between octaves
+  (Williamson & Gaston 2005). This means that only half of the species
+  with frequency 1 are shown in the lowest octave, and the rest are
+  transferred to the second octave. Half of the species from the
+  second octave are transferred to the higher one as well, but this is
+  usually not as large a number of species. This practise makes data
+  look more lognormal by reducing the usually high lowest
+  octaves. This can be achieved by setting argument \code{tiesplit =
+  TRUE}. With \code{tiesplit = FALSE} the frequencies are not split,
+  but all ones are in the lowest octave, all twos in the second, etc.
+  Williamson & Gaston (2005) discuss alternative definitions in
+  detail, and they should be consulted for a critical review of
+  log-Normal model.
+
+  Any logseries data will look like lognormal when plotted in
+  Preston's way. The expected frequency \eqn{f} at abundance octave
+  \eqn{o} is defined by \eqn{f_o = S_0 \exp(-(\log_2(o) -
+  \mu)^2/2/\sigma^2)}{f = S0 exp(-(log2(o)-mu)^2/2/sigma^2)}, where
+  \eqn{\mu} is the location of the mode and \eqn{\sigma} the width,
+  both in \eqn{\log_2}{log2} scale, and \eqn{S_0}{S0} is the expected
+  number of species at mode. The lognormal model is usually truncated
+  on the left so that some rare species are not observed. Function
   \code{prestonfit} fits the truncated lognormal model as a second
-  degree log-polynomial to the octave pooled data using Poisson
-  error. Function \code{prestondistr} fits left-truncated Normal distribution to
-  \eqn{\log_2}{log2} transformed non-pooled observations with direct
-  maximization of log-likelihood. Function \code{prestondistr} is
-  modelled after function \code{\link[MASS]{fitdistr}} which can be used
-  for alternative distribution models. The functions have common \code{print},
-  \code{plot} and \code{lines} methods. The \code{lines} function adds
-  the fitted curve to the octave range with line segments showing the
-  location of the mode and the width (sd) of the response.
+  degree log-polynomial to the octave pooled data using Poisson (when
+  \eqn{tiesplit = FALSE}) or quasi-Poisson (when \eqn{tiesplit =
+  TRUE}).  error. Function \code{prestondistr} fits left-truncated
+  Normal distribution to \eqn{\log_2}{log2} transformed non-pooled
+  observations with direct maximization of log-likelihood. Function
+  \code{prestondistr} is modelled after function
+  \code{\link[MASS]{fitdistr}} which can be used for alternative
+  distribution models. 
 
-  The total
-  extrapolated richness from a fitted Preston model can be found with
-  function \code{veiledspec}. The function accepts results both from
-  \code{prestonfit} and from \code{prestondistr}. If \code{veiledspec} is
-  called with a species count vector, it will internally use
-  \code{prestonfit}. Function \code{\link{specpool}} provides
-  alternative ways of estimating the number of unseen species. In fact,
-  Preston's lognormal model seems to be truncated at both ends, and this
-  may be the main reason why its result differ from lognormal models
-  fitted in Rank--Abundance diagrams with functions
-  \code{\link{rad.lognormal}}. 
+  The functions have common \code{print}, \code{plot} and \code{lines}
+  methods. The \code{lines} function adds the fitted curve to the
+  octave range with line segments showing the location of the mode and
+  the width (sd) of the response.
+
+  The total extrapolated richness from a fitted Preston model can be
+  found with function \code{veiledspec}. The function accepts results
+  both from \code{prestonfit} and from \code{prestondistr}. If
+  \code{veiledspec} is called with a species count vector, it will
+  internally use \code{prestonfit}. Function \code{\link{specpool}}
+  provides alternative ways of estimating the number of unseen
+  species. In fact, Preston's lognormal model seems to be truncated at
+  both ends, and this may be the main reason why its result differ
+  from lognormal models fitted in Rank--Abundance diagrams with
+  functions \code{\link{rad.lognormal}}.  
 }
-\value{
-  The function \code{prestonfit} returns an object with fitted
+
+\value{ The function \code{prestonfit} returns an object with fitted
   \code{coefficients}, and with observed (\code{freq}) and fitted
   (\code{fitted}) frequencies, and a string describing the fitting
-  \code{method}. Function \code{prestondistr} omits the entry \code{fitted}.
-  The function \code{fisherfit} returns the result of \code{\link{nlm}}, where item
-  \code{estimate} is \eqn{\alpha}. The result object is amended with the
-  following items:
+  \code{method}. Function \code{prestondistr} omits the entry
+  \code{fitted}.  The function \code{fisherfit} returns the result of
+  \code{\link{nlm}}, where item \code{estimate} is \eqn{\alpha}. The
+  result object is amended with the following items:
   \item{df.residuals}{Residual degrees of freedom.}
-  \item{nuisance}{Parameter \eqn{x}.}
-  \item{fisher}{Observed data from \code{as.fisher}.}
+  \item{nuisance}{Parameter \eqn{x}.}  \item{fisher}{Observed data
+  from \code{as.fisher}.}
 
 }
 \references{
@@ -138,8 +158,8 @@
   12: 42--58.
 
   Kempton, R.A. & Taylor, L.R. (1974). Log-series and log-normal
-  parameters as diversity discriminators for Lepidoptera. \emph{Journal
-    of Animal Ecology} 43: 381--399.
+  parameters as diversity discriminators for
+  Lepidoptera. \emph{Journal of Animal Ecology} 43: 381--399.
 
   Preston, F.W. (1948) The commonness and rarity of
   species. \emph{Ecology} 29, 254--283.
@@ -148,21 +168,9 @@
   not an appropriate null hypothesis for the species--abundance
   distribution. \emph{Journal of Animal Ecology} 74, 409--422.
 }
-\author{Bob O'Hara \email{bob.ohara at helsinki.fi} (\code{fisherfit}) and Jari Oksanen. }
 
-\note{It seems that Preston regarded frequencies 1, 2, 4, \emph{etc.}. as ``tied''
-  between octaves. This means that only half of the species with
-  frequency 1 were shown in the lowest octave, and the rest were
-  transferred to the second octave. Half of the species from the second
-  octave were transferred to the higher one as well, but this is usually
-  not as large number of species. This practise makes data look more
-  lognormal by reducing the usually high lowest octaves, but is too
-  unfair to be followed. Therefore the octaves used in this function
-  include the upper limit. If you do not accept this, you must change
-  the function yourself. Williamson & Gaston (2005) discuss alternative
-  definitions in detail, and they should be consulted for a
-  critical review of log-Normal model.
-}
+\author{Bob O'Hara \email{bob.ohara at helsinki.fi} (\code{fisherfit})
+and Jari Oksanen. }
 
 \seealso{\code{\link{diversity}}, \code{\link{fisher.alpha}},
   \code{\link{radfit}}, \code{\link{specpool}}. Function