[Dplr-commits] r735 - in pkg/dplR: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Mar 26 01:50:22 CET 2014


Author: andybunn
Date: 2014-03-26 01:50:21 +0100 (Wed, 26 Mar 2014)
New Revision: 735

Modified:
   pkg/dplR/ChangeLog
   pkg/dplR/R/rwi.stats.running.R
   pkg/dplR/man/rwi.stats.running.Rd
Log:
added SNR to rwi.stats at request of user. modified examples.

Modified: pkg/dplR/ChangeLog
===================================================================
--- pkg/dplR/ChangeLog	2014-03-25 08:44:04 UTC (rev 734)
+++ pkg/dplR/ChangeLog	2014-03-26 00:50:21 UTC (rev 735)
@@ -1,5 +1,12 @@
 * CHANGES IN dplR VERSION 1.6.0
 
+File: rwi.stats.running.R
+-------------------------
+
+- Added signal-to-noise ratio as an output. Followed pg 109 in
+  Cook's chapter in Huhges et al. 2011.
+
+
 * CHANGES IN dplR VERSION 1.5.9
 
 Files: dplR.h, rcompact.c, redfit.c

Modified: pkg/dplR/R/rwi.stats.running.R
===================================================================
--- pkg/dplR/R/rwi.stats.running.R	2014-03-25 08:44:04 UTC (rev 734)
+++ pkg/dplR/R/rwi.stats.running.R	2014-03-26 00:50:21 UTC (rev 735)
@@ -1,368 +1,370 @@
-### Helper functions
-
-### Computes the correlation coefficients between columns of x and y.
-### Requires "limit" overlapping values in each pair.
-cor.with.limit <- function(limit, x, y) {
-    n.x <- ncol(x) # caller makes sure that n.x
-    n.y <- ncol(y) # and n.y >= 1
-    r.mat <- matrix(NA_real_, n.x, n.y)
-    for (i in seq_len(n.x)) {
-        this.x <- x[, i]
-        good.x <- !is.na(this.x)
-        for (j in seq_len(n.y)) {
-            this.y <- y[, j]
-            good.y <- !is.na(this.y)
-            good.both <- which(good.x & good.y)
-            n.good <- length(good.both)
-            if (n.good >= limit && n.good > 0) {
-                r.mat[i, j] <- cor(this.x[good.both], this.y[good.both])
-            }
-        }
-    }
-    r.mat
-}
-
-### Computes the correlation coefficients between different columns of x.
-cor.with.limit.upper <- function(limit, x) {
-    n.x <- ncol(x) # caller makes sure that n.x >= 2
-    r.vec <- rep.int(NA_real_, n.x * (n.x - 1) / 2)
-    good.x <- !is.na(x)
-    k <- 0
-    for (i in seq_len(n.x - 1)) {
-        good.i <- good.x[, i]
-        for (j in (i + 1):n.x) {
-            k <- k + 1
-            good.both <- which(good.i & good.x[, j])
-            if (length(good.both) >= limit) {
-                r.vec[k] <- cor(x[good.both, i], x[good.both, j])
-            }
-        }
-    }
-    r.vec
-}
-
-rwi.stats <- function(rwi, ids=NULL, period=c("max", "common"), ...) {
-    args <- list(...)
-    args[["rwi"]] <- rwi
-    args[["ids"]] <- ids
-    args[["period"]] <- period
-    args[["running.window"]] <- FALSE
-    do.call(rwi.stats.running, args)
-}
-
-### Main function, exported to user
-rwi.stats.running <- function(rwi, ids=NULL, period=c("max", "common"),
-                              prewhiten=FALSE,n=NULL,
-                              running.window=TRUE,
-                              window.length=min(50, nrow(rwi)),
-                              window.overlap=floor(window.length / 2),
-                              first.start=NULL,
-                              min.corr.overlap=min(30, window.length),
-                              round.decimals=3,
-                              zero.is.missing=TRUE) {
-    period2 <- match.arg(period)
-
-    if (running.window) {
-        if (window.length < 3) {
-            stop("minimum 'window.length' is 3")
-        }
-        window.advance <- window.length - window.overlap
-        if (window.advance < 1) {
-            stop(gettextf("'window.overlap' is too large, max value is 'window.length'-1 (%d)",
-                          window.length - 1))
-        }
-        if (window.length < min.corr.overlap) {
-            stop("'window.length' is smaller than 'min.corr.overlap'")
-        }
-    }
-    tmp <- normalize1(rwi, n, prewhiten)
-    if(!all(tmp$idx.good)) {
-        warning("after prewhitening, 'rwi' contains column(s) without at least four observations",
-                call.=FALSE)
-        cat(gettext("note that there is no error checking on column lengths if filtering is not performed\n",
-                    domain="R-dplR"))
-    }
-    rwi2 <- as.matrix(tmp$master)
-    n.cores <- ncol(rwi2)
-
-    zero.flag <- rwi2 == 0
-    if (any(zero.flag, na.rm=TRUE)) {
-        if (!zero.is.missing) {
-            warning("There are zeros in the data. Consider the option 'zero.is.missing'.")
-        } else {
-            rwi2[zero.flag] <- NA
-        }
-    }
-
-    ## If 'ids' is NULL then assume one core per tree
-    if (is.null(ids)) {
-        ids3 <- data.frame(tree=seq_len(n.cores), core=rep.int(1, n.cores))
-        rwi3 <- rwi2
-    } else {
-        ## Make error checks here
-        if (!is.data.frame(ids) || !all(c("tree", "core") %in% names(ids))) {
-            stop("'ids' must be a data.frame with columns 'tree' and 'core'")
-        }
-        if (!all(vapply(ids, is.numeric, TRUE))) {
-            stop("'ids' must have numeric columns")
-        }
-        colnames.rwi <- colnames(rwi2)
-        ## If all column names in 'rwi' are present in the set of row
-        ## names in 'ids', arrange 'ids' to matching order
-        rownames.ids <- row.names(ids)
-        if (!is.null(rownames.ids) && all(colnames.rwi %in% rownames.ids)) {
-            ids2 <- ids[colnames.rwi, c("tree", "core")]
-        } else if (nrow(ids) == n.cores) {
-            ids2 <- ids[c("tree", "core")]
-        } else {
-            stop("dimension problem: ", "'ncol(rwi)' != 'nrow(ids)'")
-        }
-        row.names(ids2) <- NULL
-        unique.ids <- unique(ids2)
-        n.unique <- nrow(unique.ids)
-        if (n.unique < n.cores) {
-            ## If more than one columns of 'rwi' share a tree/core ID pair,
-            ## the columns are averaged and treated as one core
-            ids3 <- unique.ids
-            rwi3 <- matrix(data=as.numeric(NA), nrow=nrow(rwi2), ncol=n.unique,
-                           dimnames=list(rownames(rwi2)))
-            for (i in seq_len(n.unique)) {
-                these.cols <- row.match(ids2, unique.ids[i, ])
-                rwi3[, i] <-
-                    rowMeans(rwi2[, these.cols, drop=FALSE], na.rm=TRUE)
-            }
-            message("Series with matching tree/core IDs have been averaged")
-        } else {
-            ids3 <- ids2
-            rwi3 <- rwi2
-        }
-    }
-
-    n.years <- nrow(rwi3)
-    if (running.window && window.length > n.years) {
-        stop("'window.length' is larger than the number of years in 'rwi'")
-    }
-
-    unique.trees <- unique(ids3$tree)
-    n.trees <- length(unique.trees)
-    if (n.trees < 2) {
-        stop("at least 2 trees are needed")
-    }
-    cores.of.tree <- list()
-    seq.tree <- seq_len(n.trees)
-    for (i in seq.tree) {
-        cores.of.tree[[i]] <- which(ids3$tree==unique.trees[i])
-    }
-
-    ## n.trees.by.year is recorded before setting rows with missing
-    ## data to NA
-    tree.any <- matrix(FALSE, n.years, n.trees)
-    for (i in seq.tree) {
-        tree.any[, i] <-
-            apply(!is.na(rwi3[, ids3$tree == unique.trees[i], drop=FALSE]),
-                  1, any)
-    }
-    n.trees.by.year <- rowSums(tree.any)
-    good.rows <- which(n.trees.by.year > 1)
-
-    ## Easy way to force complete overlap of data
-    if (period2 == "common") {
-        bad.rows <- which(apply(is.na(rwi3), 1, any))
-        rwi3[bad.rows, ] <- NA
-        good.rows <- setdiff(good.rows, bad.rows)
-        period.common <- TRUE
-    } else {
-        period.common <- FALSE
-    }
-
-    if (length(good.rows) < min.corr.overlap) {
-        stop("too few years with enough trees for correlation calculations")
-    }
-
-    if (running.window) {
-        if (is.numeric(first.start)) {
-            if (first.start < 1) {
-                stop("'first.start' too small, must be >= 1")
-            } else if (first.start > n.years - window.length + 1) {
-                stop("'first.start' too large")
-            }
-            first.start2 <- first.start
-        } else {
-            ## Select locations of running windows by maximizing the
-            ## number of data points (sum of number of series for each
-            ## selected year), but don't count rows with less than two
-            ## trees
-            min.offset <-
-                max(0, min(good.rows) - (window.length - min.corr.overlap) - 1)
-            max.offset <-
-                min(min.offset + window.advance - 1, n.years - window.length)
-            offsets <- min.offset:max.offset
-            n.offsets <- length(offsets)
-            n.data <- rep.int(NA_real_, n.offsets)
-            for (i in seq_len(n.offsets)) {
-                offset <- offsets[i]
-                n.windows.minusone <-
-                    (n.years - offset - window.length) %/% window.advance
-                max.idx <-
-                    offset + window.length + n.windows.minusone * window.advance
-                n.data[i] <- sum(!is.na(rwi3[intersect(good.rows,
-                                                       (1 + offset):max.idx), ]))
-            }
-            ## In case of a tie, choose large offset.
-            ## In practice, this prefers recent years.
-            first.start2 <-
-                offsets[n.offsets - which.max(rev(n.data)) + 1] + 1
-        }
-        window.start <- seq(from = first.start2,
-                            to = n.years - window.length + 1,
-                            by = window.advance)
-        window.length2 <- window.length
-    } else {
-        window.start <- 1
-        window.length2 <- n.years
-    }
-
-    all.years <- as.numeric(rownames(rwi3))
-
-    loop.body <- function(s.idx) {
-        rbar.tot <- NA_real_
-        rbar.wt <- NA_real_
-        rbar.bt <- NA_real_
-
-        ## Location of window
-        start.year <- all.years[s.idx]
-        e.idx <- s.idx + window.length2 - 1
-        end.year <- all.years[e.idx]
-        mid.year <- floor((start.year + end.year) / 2)
-        year.idx <- s.idx:e.idx
-
-        ## See p 138 in C&K
-        ## Sum of all correlations among different cores (between trees)
-        rsum.bt <- 0
-        n.bt <- 0
-        good.flag <- rep.int(FALSE, n.trees)
-        for (i in seq_len(n.trees - 1)) {
-            i.data <- rwi3[year.idx, cores.of.tree[[i]], drop=FALSE]
-            for (j in (i + 1):n.trees) {
-                j.data <- rwi3[year.idx, cores.of.tree[[j]], drop=FALSE]
-                bt.r.mat <- cor.with.limit(min.corr.overlap, i.data, j.data)
-                bt.r.mat <- bt.r.mat[!is.na(bt.r.mat)]
-                n.bt.temp <- length(bt.r.mat)
-                if (n.bt.temp > 0) {
-                    rsum.bt <- rsum.bt + sum(bt.r.mat)
-                    n.bt <- n.bt + n.bt.temp
-                    good.flag[c(i, j)] <- TRUE
-                }
-            }
-        }
-
-        ## Sum of all correlations among different cores (within trees)
-        good.trees <- which(good.flag)
-        rsum.wt <- 0
-        n.wt <- 0
-        n.cores.tree <- rep.int(NA_real_, n.trees)
-        for (i in good.trees) {
-            these.cores <- cores.of.tree[[i]]
-            if (length(these.cores)==1) { # make simple case fast
-                n.cores.tree[i] <- 1
-            } else {
-                these.data <- rwi3[year.idx, these.cores, drop=FALSE]
-                wt.r.vec <- cor.with.limit.upper(min.corr.overlap, these.data)
-                wt.r.vec <- wt.r.vec[!is.na(wt.r.vec)]
-                n.wt.temp <- length(wt.r.vec)
-                if (n.wt.temp > 0) {
-                    rsum.wt <- rsum.wt + sum(wt.r.vec)
-                    n.wt <- n.wt + n.wt.temp
-                    ## Solving c (> 0) in the formula n = 0.5 * c * (c-1)
-                    ## leads to c = 0.5 + sqrt(0.25+2*n)
-                    n.cores.tree[i] <- 0.5 + sqrt(0.25 + 2 * n.wt.temp)
-                } else {
-                    n.cores.tree[i] <- 1
-                }
-            }
-        }
-
-        ## Mean correlations
-        n.tot <- n.wt + n.bt
-        if (n.tot > 0) {
-            rbar.tot <- (rsum.wt + rsum.bt) / n.tot
-        }
-        if (n.wt > 0) {
-            rbar.wt <- rsum.wt / n.wt
-        }
-        if (n.bt > 0) {
-            rbar.bt <- rsum.bt / n.bt
-        }
-
-        if (period.common) {
-            ## If period is "common", we are only looking at the rows
-            ## with no missing values.
-            n <- n.trees
-        } else {
-            ## Number of trees averaged over the years in the window.
-            ## We keep this number separate of the correlation
-            ## estimates, i.e. the data from some tree / year may
-            ## contribute to n without taking part in the correlation
-            ## estimates.
-            n <- mean(n.trees.by.year[year.idx], na.rm=TRUE)
-        }
-
-        ## Expressed population signal
-        if (n.wt == 0) {
-            c.eff <- 1
-            rbar.eff <- rbar.bt
-        } else {
-            c.eff.rproc <- mean(1 / n.cores.tree, na.rm=TRUE)
-            c.eff <- 1 / c.eff.rproc # bookkeeping only
-            rbar.eff <- rbar.bt / (rbar.wt + (1 - rbar.wt) * c.eff.rproc)
-        }
-        ## EPS is on page 146 of C&K.
-        ## In our interpretation of EPS, we use the average number of trees.
-        eps <- n * rbar.eff / ((n - 1) * rbar.eff + 1)
-
-        if (running.window) {
-            c(start.year = start.year, mid.year = mid.year, end.year = end.year,
-              n.tot = n.tot, n.wt = n.wt, n.bt = n.bt, rbar.tot = rbar.tot,
-              rbar.wt = rbar.wt, rbar.bt = rbar.bt, c.eff = c.eff,
-              rbar.eff = rbar.eff, eps = eps, n = n)
-        } else {
-            c(n.tot = n.tot, n.wt = n.wt, n.bt = n.bt, rbar.tot = rbar.tot,
-              rbar.wt = rbar.wt, rbar.bt = rbar.bt, c.eff = c.eff,
-              rbar.eff = rbar.eff, eps = eps, n = n)
-        }
-    }
-
-    ## Iterate over all windows
-    if (running.window &&
-        !inherits(try(suppressWarnings(req.fe <-
-                                       requireNamespace("foreach",
-                                                        quietly=TRUE)),
-                      silent = TRUE),
-                  "try-error") && req.fe) {
-
-        exportFun <- c("<-", "+", "-", "floor", ":", "rep.int", "for",
-                       "seq_len", "[", "[[", "cor.with.limit", "!",
-                       "is.na", "length", "if", ">", "sum", "c",
-                       "[<-", "which", "==", "cor.with.limit.upper",
-                       "sqrt", "*", "/", "(", "{", "mean")
-
-        compos.stats <-
-            foreach::"%dopar%"(foreach::foreach(s.idx=window.start,
-                                                .combine="rbind",
-                                                .export=exportFun),
-                               loop.body(s.idx))
-    } else {
-        compos.stats <- NULL
-        for (s.idx in window.start) {
-            compos.stats <- rbind(compos.stats, loop.body(s.idx))
-        }
-    }
-
-    rownames(compos.stats) <- NULL
-    if (is.numeric(round.decimals) && length(round.decimals) > 0 &&
-        is.finite(round.decimals[1]) && round.decimals[1] >= 0) {
-        data.frame(round(compos.stats, round.decimals[1]))
-    } else {
-        data.frame(compos.stats)
-    }
-}
+### Helper functions
+
+### Computes the correlation coefficients between columns of x and y.
+### Requires "limit" overlapping values in each pair.
+cor.with.limit <- function(limit, x, y) {
+    n.x <- ncol(x) # caller makes sure that n.x
+    n.y <- ncol(y) # and n.y >= 1
+    r.mat <- matrix(NA_real_, n.x, n.y)
+    for (i in seq_len(n.x)) {
+        this.x <- x[, i]
+        good.x <- !is.na(this.x)
+        for (j in seq_len(n.y)) {
+            this.y <- y[, j]
+            good.y <- !is.na(this.y)
+            good.both <- which(good.x & good.y)
+            n.good <- length(good.both)
+            if (n.good >= limit && n.good > 0) {
+                r.mat[i, j] <- cor(this.x[good.both], this.y[good.both])
+            }
+        }
+    }
+    r.mat
+}
+
+### Computes the correlation coefficients between different columns of x.
+cor.with.limit.upper <- function(limit, x) {
+    n.x <- ncol(x) # caller makes sure that n.x >= 2
+    r.vec <- rep.int(NA_real_, n.x * (n.x - 1) / 2)
+    good.x <- !is.na(x)
+    k <- 0
+    for (i in seq_len(n.x - 1)) {
+        good.i <- good.x[, i]
+        for (j in (i + 1):n.x) {
+            k <- k + 1
+            good.both <- which(good.i & good.x[, j])
+            if (length(good.both) >= limit) {
+                r.vec[k] <- cor(x[good.both, i], x[good.both, j])
+            }
+        }
+    }
+    r.vec
+}
+
+rwi.stats <- function(rwi, ids=NULL, period=c("max", "common"), ...) {
+    args <- list(...)
+    args[["rwi"]] <- rwi
+    args[["ids"]] <- ids
+    args[["period"]] <- period
+    args[["running.window"]] <- FALSE
+    do.call(rwi.stats.running, args)
+}
+
+### Main function, exported to user
+rwi.stats.running <- function(rwi, ids=NULL, period=c("max", "common"),
+                              prewhiten=FALSE,n=NULL,
+                              running.window=TRUE,
+                              window.length=min(50, nrow(rwi)),
+                              window.overlap=floor(window.length / 2),
+                              first.start=NULL,
+                              min.corr.overlap=min(30, window.length),
+                              round.decimals=3,
+                              zero.is.missing=TRUE) {
+    period2 <- match.arg(period)
+
+    if (running.window) {
+        if (window.length < 3) {
+            stop("minimum 'window.length' is 3")
+        }
+        window.advance <- window.length - window.overlap
+        if (window.advance < 1) {
+            stop(gettextf("'window.overlap' is too large, max value is 'window.length'-1 (%d)",
+                          window.length - 1))
+        }
+        if (window.length < min.corr.overlap) {
+            stop("'window.length' is smaller than 'min.corr.overlap'")
+        }
+    }
+    tmp <- normalize1(rwi, n, prewhiten)
+    if(!all(tmp$idx.good)) {
+        warning("after prewhitening, 'rwi' contains column(s) without at least four observations",
+                call.=FALSE)
+        cat(gettext("note that there is no error checking on column lengths if filtering is not performed\n",
+                    domain="R-dplR"))
+    }
+    rwi2 <- as.matrix(tmp$master)
+    n.cores <- ncol(rwi2)
+
+    zero.flag <- rwi2 == 0
+    if (any(zero.flag, na.rm=TRUE)) {
+        if (!zero.is.missing) {
+            warning("There are zeros in the data. Consider the option 'zero.is.missing'.")
+        } else {
+            rwi2[zero.flag] <- NA
+        }
+    }
+
+    ## If 'ids' is NULL then assume one core per tree
+    if (is.null(ids)) {
+        ids3 <- data.frame(tree=seq_len(n.cores), core=rep.int(1, n.cores))
+        rwi3 <- rwi2
+    } else {
+        ## Make error checks here
+        if (!is.data.frame(ids) || !all(c("tree", "core") %in% names(ids))) {
+            stop("'ids' must be a data.frame with columns 'tree' and 'core'")
+        }
+        if (!all(vapply(ids, is.numeric, TRUE))) {
+            stop("'ids' must have numeric columns")
+        }
+        colnames.rwi <- colnames(rwi2)
+        ## If all column names in 'rwi' are present in the set of row
+        ## names in 'ids', arrange 'ids' to matching order
+        rownames.ids <- row.names(ids)
+        if (!is.null(rownames.ids) && all(colnames.rwi %in% rownames.ids)) {
+            ids2 <- ids[colnames.rwi, c("tree", "core")]
+        } else if (nrow(ids) == n.cores) {
+            ids2 <- ids[c("tree", "core")]
+        } else {
+            stop("dimension problem: ", "'ncol(rwi)' != 'nrow(ids)'")
+        }
+        row.names(ids2) <- NULL
+        unique.ids <- unique(ids2)
+        n.unique <- nrow(unique.ids)
+        if (n.unique < n.cores) {
+            ## If more than one columns of 'rwi' share a tree/core ID pair,
+            ## the columns are averaged and treated as one core
+            ids3 <- unique.ids
+            rwi3 <- matrix(data=as.numeric(NA), nrow=nrow(rwi2), ncol=n.unique,
+                           dimnames=list(rownames(rwi2)))
+            for (i in seq_len(n.unique)) {
+                these.cols <- row.match(ids2, unique.ids[i, ])
+                rwi3[, i] <-
+                    rowMeans(rwi2[, these.cols, drop=FALSE], na.rm=TRUE)
+            }
+            message("Series with matching tree/core IDs have been averaged")
+        } else {
+            ids3 <- ids2
+            rwi3 <- rwi2
+        }
+    }
+
+    n.years <- nrow(rwi3)
+    if (running.window && window.length > n.years) {
+        stop("'window.length' is larger than the number of years in 'rwi'")
+    }
+
+    unique.trees <- unique(ids3$tree)
+    n.trees <- length(unique.trees)
+    if (n.trees < 2) {
+        stop("at least 2 trees are needed")
+    }
+    cores.of.tree <- list()
+    seq.tree <- seq_len(n.trees)
+    for (i in seq.tree) {
+        cores.of.tree[[i]] <- which(ids3$tree==unique.trees[i])
+    }
+
+    ## n.trees.by.year is recorded before setting rows with missing
+    ## data to NA
+    tree.any <- matrix(FALSE, n.years, n.trees)
+    for (i in seq.tree) {
+        tree.any[, i] <-
+            apply(!is.na(rwi3[, ids3$tree == unique.trees[i], drop=FALSE]),
+                  1, any)
+    }
+    n.trees.by.year <- rowSums(tree.any)
+    good.rows <- which(n.trees.by.year > 1)
+
+    ## Easy way to force complete overlap of data
+    if (period2 == "common") {
+        bad.rows <- which(apply(is.na(rwi3), 1, any))
+        rwi3[bad.rows, ] <- NA
+        good.rows <- setdiff(good.rows, bad.rows)
+        period.common <- TRUE
+    } else {
+        period.common <- FALSE
+    }
+
+    if (length(good.rows) < min.corr.overlap) {
+        stop("too few years with enough trees for correlation calculations")
+    }
+
+    if (running.window) {
+        if (is.numeric(first.start)) {
+            if (first.start < 1) {
+                stop("'first.start' too small, must be >= 1")
+            } else if (first.start > n.years - window.length + 1) {
+                stop("'first.start' too large")
+            }
+            first.start2 <- first.start
+        } else {
+            ## Select locations of running windows by maximizing the
+            ## number of data points (sum of number of series for each
+            ## selected year), but don't count rows with less than two
+            ## trees
+            min.offset <-
+                max(0, min(good.rows) - (window.length - min.corr.overlap) - 1)
+            max.offset <-
+                min(min.offset + window.advance - 1, n.years - window.length)
+            offsets <- min.offset:max.offset
+            n.offsets <- length(offsets)
+            n.data <- rep.int(NA_real_, n.offsets)
+            for (i in seq_len(n.offsets)) {
+                offset <- offsets[i]
+                n.windows.minusone <-
+                    (n.years - offset - window.length) %/% window.advance
+                max.idx <-
+                    offset + window.length + n.windows.minusone * window.advance
+                n.data[i] <- sum(!is.na(rwi3[intersect(good.rows,
+                                                       (1 + offset):max.idx), ]))
+            }
+            ## In case of a tie, choose large offset.
+            ## In practice, this prefers recent years.
+            first.start2 <-
+                offsets[n.offsets - which.max(rev(n.data)) + 1] + 1
+        }
+        window.start <- seq(from = first.start2,
+                            to = n.years - window.length + 1,
+                            by = window.advance)
+        window.length2 <- window.length
+    } else {
+        window.start <- 1
+        window.length2 <- n.years
+    }
+
+    all.years <- as.numeric(rownames(rwi3))
+
+    loop.body <- function(s.idx) {
+        rbar.tot <- NA_real_
+        rbar.wt <- NA_real_
+        rbar.bt <- NA_real_
+
+        ## Location of window
+        start.year <- all.years[s.idx]
+        e.idx <- s.idx + window.length2 - 1
+        end.year <- all.years[e.idx]
+        mid.year <- floor((start.year + end.year) / 2)
+        year.idx <- s.idx:e.idx
+
+        ## See p 138 in C&K
+        ## Sum of all correlations among different cores (between trees)
+        rsum.bt <- 0
+        n.bt <- 0
+        good.flag <- rep.int(FALSE, n.trees)
+        for (i in seq_len(n.trees - 1)) {
+            i.data <- rwi3[year.idx, cores.of.tree[[i]], drop=FALSE]
+            for (j in (i + 1):n.trees) {
+                j.data <- rwi3[year.idx, cores.of.tree[[j]], drop=FALSE]
+                bt.r.mat <- cor.with.limit(min.corr.overlap, i.data, j.data)
+                bt.r.mat <- bt.r.mat[!is.na(bt.r.mat)]
+                n.bt.temp <- length(bt.r.mat)
+                if (n.bt.temp > 0) {
+                    rsum.bt <- rsum.bt + sum(bt.r.mat)
+                    n.bt <- n.bt + n.bt.temp
+                    good.flag[c(i, j)] <- TRUE
+                }
+            }
+        }
+
+        ## Sum of all correlations among different cores (within trees)
+        good.trees <- which(good.flag)
+        rsum.wt <- 0
+        n.wt <- 0
+        n.cores.tree <- rep.int(NA_real_, n.trees)
+        for (i in good.trees) {
+            these.cores <- cores.of.tree[[i]]
+            if (length(these.cores)==1) { # make simple case fast
+                n.cores.tree[i] <- 1
+            } else {
+                these.data <- rwi3[year.idx, these.cores, drop=FALSE]
+                wt.r.vec <- cor.with.limit.upper(min.corr.overlap, these.data)
+                wt.r.vec <- wt.r.vec[!is.na(wt.r.vec)]
+                n.wt.temp <- length(wt.r.vec)
+                if (n.wt.temp > 0) {
+                    rsum.wt <- rsum.wt + sum(wt.r.vec)
+                    n.wt <- n.wt + n.wt.temp
+                    ## Solving c (> 0) in the formula n = 0.5 * c * (c-1)
+                    ## leads to c = 0.5 + sqrt(0.25+2*n)
+                    n.cores.tree[i] <- 0.5 + sqrt(0.25 + 2 * n.wt.temp)
+                } else {
+                    n.cores.tree[i] <- 1
+                }
+            }
+        }
+
+        ## Mean correlations
+        n.tot <- n.wt + n.bt
+        if (n.tot > 0) {
+            rbar.tot <- (rsum.wt + rsum.bt) / n.tot
+        }
+        if (n.wt > 0) {
+            rbar.wt <- rsum.wt / n.wt
+        }
+        if (n.bt > 0) {
+            rbar.bt <- rsum.bt / n.bt
+        }
+
+        if (period.common) {
+            ## If period is "common", we are only looking at the rows
+            ## with no missing values.
+            n <- n.trees
+        } else {
+            ## Number of trees averaged over the years in the window.
+            ## We keep this number separate of the correlation
+            ## estimates, i.e. the data from some tree / year may
+            ## contribute to n without taking part in the correlation
+            ## estimates.
+            n <- mean(n.trees.by.year[year.idx], na.rm=TRUE)
+        }
+
+        ## Expressed population signal
+        if (n.wt == 0) {
+            c.eff <- 1
+            rbar.eff <- rbar.bt
+        } else {
+            c.eff.rproc <- mean(1 / n.cores.tree, na.rm=TRUE)
+            c.eff <- 1 / c.eff.rproc # bookkeeping only
+            rbar.eff <- rbar.bt / (rbar.wt + (1 - rbar.wt) * c.eff.rproc)
+        }
+        ## EPS is on page 146 of C&K.
+        ## In our interpretation of EPS, we use the average number of trees.
+        eps <- n * rbar.eff / ((n - 1) * rbar.eff + 1)
+        ## SNR is on page 109 of Hughes et al. 2011
+        snr <- n * rbar.eff / (1-rbar.eff)
+
+        if (running.window) {
+            c(start.year = start.year, mid.year = mid.year, end.year = end.year,
+              n.tot = n.tot, n.wt = n.wt, n.bt = n.bt, rbar.tot = rbar.tot,
+              rbar.wt = rbar.wt, rbar.bt = rbar.bt, c.eff = c.eff,
+              rbar.eff = rbar.eff, eps = eps, snr = snr, n = n)
+        } else {
+            c(n.tot = n.tot, n.wt = n.wt, n.bt = n.bt, rbar.tot = rbar.tot,
+              rbar.wt = rbar.wt, rbar.bt = rbar.bt, c.eff = c.eff,
+              rbar.eff = rbar.eff, eps = eps, snr = snr, n = n)
+        }
+    }
+
+    ## Iterate over all windows
+    if (running.window &&
+        !inherits(try(suppressWarnings(req.fe <-
+                                       requireNamespace("foreach",
+                                                        quietly=TRUE)),
+                      silent = TRUE),
+                  "try-error") && req.fe) {
+
+        exportFun <- c("<-", "+", "-", "floor", ":", "rep.int", "for",
+                       "seq_len", "[", "[[", "cor.with.limit", "!",
+                       "is.na", "length", "if", ">", "sum", "c",
+                       "[<-", "which", "==", "cor.with.limit.upper",
+                       "sqrt", "*", "/", "(", "{", "mean")
+
+        compos.stats <-
+            foreach::"%dopar%"(foreach::foreach(s.idx=window.start,
+                                                .combine="rbind",
+                                                .export=exportFun),
+                               loop.body(s.idx))
+    } else {
+        compos.stats <- NULL
+        for (s.idx in window.start) {
+            compos.stats <- rbind(compos.stats, loop.body(s.idx))
+        }
+    }
+
+    rownames(compos.stats) <- NULL
+    if (is.numeric(round.decimals) && length(round.decimals) > 0 &&
+        is.finite(round.decimals[1]) && round.decimals[1] >= 0) {
+        data.frame(round(compos.stats, round.decimals[1]))
+    } else {
+        data.frame(compos.stats)
+    }
+}

Modified: pkg/dplR/man/rwi.stats.running.Rd
===================================================================
--- pkg/dplR/man/rwi.stats.running.Rd	2014-03-25 08:44:04 UTC (rev 734)
+++ pkg/dplR/man/rwi.stats.running.Rd	2014-03-26 00:50:21 UTC (rev 735)
@@ -1,242 +1,248 @@
-\name{rwi.stats.running}
-\alias{rwi.stats.running}
-\alias{rwi.stats}
-\alias{rwi.stats.legacy}
-\title{ (Running Window) Statistics on Detrended Ring-Width Series }
-\description{
-  These functions calculate descriptive statistics on a
-  \code{data.frame} of (usually) ring-width indices. The statistics are
-  optionally computed in a running window with adjustable length and
-  overlap. The data can be filtered so that the comparisons are made to
-  on just high-frequency data. 
-}
-\usage{
-rwi.stats.running(rwi, ids = NULL, period = c("max", "common"),
-                  prewhiten=FALSE,n=NULL,
-                  running.window = TRUE,
-                  window.length = min(50, nrow(rwi)),
-                  window.overlap = floor(window.length / 2),
-                  first.start = NULL,
-                  min.corr.overlap = min(30, window.length),
-                  round.decimals = 3,
-                  zero.is.missing = TRUE)
-
-rwi.stats(rwi, ids=NULL, period=c("max", "common"), \dots)
-
-rwi.stats.legacy(rwi, ids=NULL, period=c("max", "common"))
-}
-\arguments{
-
-  \item{rwi}{ a \code{data.frame} with detrended and standardized ring
-    width indices as columns and years as rows such as that produced by
-    \code{\link{detrend}}. }
-
-  \item{ids}{ an optional \code{data.frame} with column one named
-    \code{"tree"} giving a \code{numeric} \acronym{ID} for each tree and
-    column two named \code{"core"} giving a \code{numeric} \acronym{ID}
-    for each core. Defaults to one core per tree as
-    \code{data.frame(tree = 1:ncol(\var{rwi}), core = rep(1,
-      ncol(\var{rwi})))}. }
-  \item{period}{ a \code{character} string, either \code{"common"} or
-    \code{"max"} indicating whether correlations should be limited to
-    complete observations over the period common to all cores (i.e. rows
-    common to all samples) or the maximum pairwise overlap. Defaults
-    to \code{"max"}. }
-  \item{n}{ \code{NULL} or an integral value giving the filter length
-    for the \code{\link{hanning}} filter used for removal of low
-    frequency variation. }
-  \item{prewhiten}{ \code{logical} flag.  If \code{TRUE} each series is
-    whitened using \code{\link{ar}}. }
-  \item{running.window}{ \code{logical} flag indicating whether to use a
-    running window (\code{TRUE}, the default) or to ignore the other
-    window parameters and effectively use one window covering all years
-    (\code{FALSE}). }
-  \item{window.length}{ \code{numeric} value indicating the length of
-    the running window in years. The default is 50 years or the number
-    of years (rows) in \code{\var{rwi}}, whichever is smaller. }
-  \item{window.overlap}{ \code{numeric} value indicating the overlap of
-    consecutive window positions, i.e. the number of common years. The
-    default is half of the window length, rounded down. }
-  \item{first.start}{ an optional \code{numeric} value setting the
-    position of the first window. Must be a value between \code{1} and
-    \code{\var{n.years}-\var{window.length}+1}, where
-    \code{\var{n.years}} is the number of years in \code{\var{rwi}}. The
-    default value \code{NULL} lets the function make the decision using
-    some heuristic rules. }
-  \item{min.corr.overlap}{ \code{numeric} value setting the minimum
-    number of common years in any pair of ring-width series required for
-    their correlation to be included in the calculations. Smaller
-    overlaps are considered to yield unreliable correlation values which
-    are ignored. Defaults to the minimum of 30 and the length of the
-    window. One way to lift the restriction and include all correlations
-    is to set \code{\var{min.corr.overlap} = 0}. }
-  \item{round.decimals}{ non-negative integer \code{numeric} value
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/dplr -r 735


More information about the Dplr-commits mailing list