[Analogue-commits] r267 - in pkg: . R inst man tests/Examples
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri May 4 15:10:45 CEST 2012
Author: gsimpson
Date: 2012-05-04 15:10:44 +0200 (Fri, 04 May 2012)
New Revision: 267
Added:
pkg/R/fitted.logitreg.R
pkg/R/predict.logitreg.R
pkg/man/fitted.logitreg.Rd
pkg/man/predict.logitreg.Rd
Modified:
pkg/DESCRIPTION
pkg/NAMESPACE
pkg/R/logitreg.R
pkg/R/logitreg.analog.R
pkg/R/plot.logitreg.R
pkg/R/summary.logitreg.R
pkg/inst/ChangeLog
pkg/man/logitreg.Rd
pkg/tests/Examples/analogue-Ex.Rout.save
Log:
add predict and fitted methods for 'logitreg'. Required a number of changes to logitreg() and associated methods. Bump to 0.9-4.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/DESCRIPTION 2012-05-04 13:10:44 UTC (rev 267)
@@ -1,7 +1,7 @@
Package: analogue
Type: Package
Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.9-3
+Version: 0.9-4
Date: $Date$
Depends: R (>= 2.15.0), stats, graphics, vegan (>= 1.17-12), lattice, grid,
MASS, princurve, mgcv
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/NAMESPACE 2012-05-04 13:10:44 UTC (rev 267)
@@ -135,9 +135,11 @@
S3method(coef, wa)
S3method(fitted, pcr)
S3method(fitted, bootstrap.mat)
+S3method(fitted, logitreg)
S3method(fitted, mat)
S3method(fitted, timetrack)
S3method(fitted, wa)
+S3method(predict, logitreg)
S3method(predict, mat)
S3method(predict, wa)
S3method(residuals, bootstrap.mat)
Added: pkg/R/fitted.logitreg.R
===================================================================
--- pkg/R/fitted.logitreg.R (rev 0)
+++ pkg/R/fitted.logitreg.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -0,0 +1,33 @@
+##' Extracts fitted values for training set samples from logistic
+##' regression models fitted to each group of samples that describe
+##' the probability two samples are analogues (from the same group) as
+##' a function of dissimilarity between the paired samples.
+##'
+##' @title Fitted values for the training set from logistic regression
+##' models
+##' @param object an object of class \code{"logitreg"} resulting from
+##' a call to \code{\link{logitreg}}.
+##' @param combined logical; should the fitted values for the overall
+##' combined analysis be returned.
+##' @param ... arguments passed to other methods
+##' @return if \code{combined == FALSE} (the default) then a matrix of
+##' fitted probabilities, where the rows are the training set samples
+##' and the columns the groupings, is returned. If
+##' \code{combined == TRUE}, then a list with components \code{"group"}
+##' and \code{"combined"}. \code{"group"} is a matrix of fitted
+##' probabilities as above. \code{"combined"} is a vector of fitted
+##' values for the entire set of pairwise comparisons considered.
+##' @author Gavin Simpson
+##' @method fitted logitreg
+##' @S3method fitted logitreg
+`fitted.logitreg` <- function(object, combined = FALSE, ...) {
+ n <- length(object$models)
+ groups <- sapply(object$models[seq_len(n - 1)], fitted)
+ if(combined) {
+ combined <- fitted(object$models[[n]])
+ out <- list(groups = groups, combined = combined)
+ } else {
+ out <- groups
+ }
+ out
+}
Modified: pkg/R/logitreg.R
===================================================================
--- pkg/R/logitreg.R 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/R/logitreg.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -5,7 +5,6 @@
if(!is.factor(groups))
groups <- factor(groups)
lev <- levels(groups)
- #n.g <- length(lev)
within <- without <- vector(mode = "list", length = length(lev))
names(within) <- names(without) <- lev
models <- vector(mode = "list", length = length(lev) + 1)
@@ -19,7 +18,6 @@
function(x, k) {x[order(x)[k]]}, k = k))
analogs <- rep(c(TRUE, FALSE), times = c(length(IN), length(OUT)))
Dij <- c(IN, OUT)
- #dat <- data.frame(analogs, Dij)
models[[l]] <- glm(analogs ~ Dij, data = data.frame(analogs, Dij),
family = binomial(link = "logit"))
models[[l]]$Dij <- Dij
@@ -30,21 +28,23 @@
OUT <- do.call(c, without)
analogs <- rep(c(TRUE, FALSE), times = c(length(IN), length(OUT)))
Dij <- c(IN, OUT)
- #dat <- data.frame(analogs, Dij)
models[["Combined"]] <- glm(analogs ~ Dij,
data = data.frame(analogs, Dij),
family = binomial(link = "logit"))
models[["Combined"]]$Dij <- Dij
- #models <- lapply(models, function(x) {class(x) <- "glm"; x})
- class(models) <- "logitreg"
- if(!is.null(attr(object, "method")))
- attr(models, "method") <- attr(object, "method")
- return(models)
+ ##class(models) <- "logitreg"
+ out <- list(models = models, groups = groups, method = NULL)
+ class(out) <- "logitreg"
+ if(!is.null(attr(object, "method"))){
+ out$method <- attr(object, "method")
+ ## attr(models, "method") <- attr(object, "method")
+ }
+ out
}
print.logitreg <- function(x, ...) {
- nams <- names(x)
- N <- length(x)
+ nams <- names(x$models)
+ N <- length(x$models)
cat("\n")
writeLines(strwrap("Object of class: \"logitreg\""))
writeLines(strwrap(paste("Number of models:", N)))
Modified: pkg/R/logitreg.analog.R
===================================================================
--- pkg/R/logitreg.analog.R 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/R/logitreg.analog.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -1,8 +1,10 @@
`logitreg.analog` <- function(object, groups, k = 1, ...) {
if(is.null(object$train))
stop("'object$train' missing. Refit 'object' with argument 'keep.train = TRUE'")
- models <- logitreg(object$train, groups = groups, k = k, ...)
- if(!is.null(attr(object, "method")))
- attr(models, "method") <- attr(object, "method")
- return(models)
+ out <- logitreg(object$train, groups = groups, k = k, ...)
+ if(!is.null(attr(object, "method"))) {
+ out$method <- attr(object, "method")
+ ##attr(models, "method") <- attr(object, "method")
+ }
+ out
}
Modified: pkg/R/plot.logitreg.R
===================================================================
--- pkg/R/plot.logitreg.R 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/R/plot.logitreg.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -57,8 +57,8 @@
group <- group[1]
warning("More than 1 'group' specified. Using only the first\nDid you mean to use '\"all'\"")
}
- n.groups <- length(x)
- g.names <- names(x)
+ n.groups <- length(x$models)
+ g.names <- names(x$models)
GROUP <- c("all", g.names)
group <- match.arg(group, GROUP)
conf.type <- match.arg(conf.type)
@@ -74,7 +74,7 @@
layout(1)
})
min <- 0
- max <- max(x[["Combined"]]$data$Dij)
+ max <- max(x$models[["Combined"]]$data$Dij)
}
else {
n.plot <- 1
@@ -85,7 +85,7 @@
for (i in seq_len(n.groups)) {
if (group != "all" && group != g.names[i])
next
- pfun(x[[i]], min = min, max = max, npred = npred,
+ pfun(x$models[[i]], min = min, max = max, npred = npred,
col = col, lwd = lwd, shade = shade, main = g.names[i],
conf.type = conf.type, conf.int = conf.int, rug = rug,
ticksize = ticksize, ref.col = ref.col,
Added: pkg/R/predict.logitreg.R
===================================================================
--- pkg/R/predict.logitreg.R (rev 0)
+++ pkg/R/predict.logitreg.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -0,0 +1,67 @@
+##' Predict the posterior probability of analogue-ness between fossil
+##' and training set samples based on logistic regression fits.
+##'
+##' @title Posterior probability of analogue-ness for fossil samples
+##' @param object object of class \code{"logitreg"}
+##' @param newdata matrix of dissimilarities between training and
+##' fossil samples. Should be an object of class \code{"distance"}.
+##' @param group The group to plot the logit model for. Can be one or
+##' more of the group labels or ‘"Combined"’ to draw the individual
+##' logit models. Alternatively, and the default, is to use ‘"all"’,
+##' which divides the plotting region into the required number of
+##' plotting regions draws all the fitted curves.
+##' @param k numeric; the number of close modern analogues to consider.
+##' Currently not to be used!
+##' @param ... additional arguments passed to other methods.
+##' @return A matrix of posterior probabilities is returned.
+##' @author Gavin Simpson
+##' @method predict logitreg
+##' @S3method predict logitreg
+##' @keywords methods
+`predict.logitreg` <- function(object, newdata, group = "all",
+ k = 1, ...) {
+ if(missing(newdata)) {
+ return(fitted(object)) ## fitted.logitreg defined
+ }
+ ## want the close modern analogues - so this all comes from]
+ ## newdata. Code is based on cma() - must refactor!
+ nsamp <- ncol(newdata)
+ nams <- colnames(newdata)
+ close <- lapply(seq_len(nsamp),
+ function(i, newdata) {
+ sort(newdata[, i])
+ }, newdata = newdata)
+ names(close) <- nams
+
+ ## minimum distance to a group
+ mindist <- t(sapply(close, .minDijGroup,
+ groups = object$groups, k = k))
+
+ ## posterior probability of analogue-ness per group
+ posterior <- .postProbGroup(mindist, object$models)
+
+ ## return object -- just return posterior for now
+ ## is a matrix, cols = groups; rows = fossil samples
+ posterior
+}
+
+## Function to extract the min Dij for each biome type
+.minDijGroup <- function(x, groups, k = 1) {
+ lev <- levels(groups)
+ out <- numeric(length = length(lev))
+ names(out) <- lev
+ for(i in seq_along(lev)) {
+ out[i] <- unname(x[which(groups[names(x)] == lev[i])[k]])
+ }
+ out
+}
+
+## Function to compute the probability of analogue for the minimum Dij
+.postProbGroup <- function(x, model) {
+ foo <- function(i, x, model) {
+ dat <- data.frame(Dij = x[,i])
+ predict(model[[i]], newdata = dat, type = "response")
+ }
+ out <- sapply(colnames(x), FUN = foo, x = x, model = model)
+ out
+}
Modified: pkg/R/summary.logitreg.R
===================================================================
--- pkg/R/summary.logitreg.R 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/R/summary.logitreg.R 2012-05-04 13:10:44 UTC (rev 267)
@@ -8,7 +8,7 @@
c(IN, OUT, coefs, unname(dose[1]),
unname(attr(dose, "SE")[,1]))
}
- DF <- t(sapply(object, FOO, p = p, USE.NAMES = FALSE))
+ DF <- t(sapply(object$models, FOO, p = p, USE.NAMES = FALSE))
DF <- data.frame(DF)
names(DF) <- c("In","Out","Est.(Dij)","Std.Err", "Z-value","p-value",
paste("Dij(p=", format(p), ")", sep = ""),
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/inst/ChangeLog 2012-05-04 13:10:44 UTC (rev 267)
@@ -1,5 +1,25 @@
analogue Change Log
+Version 0.9-4
+
+ * logitreg: the returned object has changed. The list of logistic
+ regression models is now returned as component `models`.
+
+ New methods `fitted()` and `predict()` provided for objects of
+ class "logitreg" compute the fitted probabilities for the
+ training set samples and for new (e.g. fossil) samples
+ respectively. The probabilities are in respect to the analogue-
+ ness of samples to the groups in the training set (e.g.
+ vegetation biomes in the case of pollen data).
+
+ These changes allow an analysis similar in spirit to that of
+ Gavin et al (2003, Quaternary Research 60; 356--367) in their
+ Figure 8. Here though logistic regression fits are used rather
+ than the ROC method they employ. Similar methods could be
+ provided for objects fitted by `roc()` but require a little more
+ thought about how to model the likelihood ratios derived from
+ the ROC curves.
+
Version 0.9-3
* wa: deshrinking via a monotonic cubic regression spline
Added: pkg/man/fitted.logitreg.Rd
===================================================================
--- pkg/man/fitted.logitreg.Rd (rev 0)
+++ pkg/man/fitted.logitreg.Rd 2012-05-04 13:10:44 UTC (rev 267)
@@ -0,0 +1,41 @@
+\name{fitted.logitreg}
+\alias{fitted.logitreg}
+
+\title{Fitted values for the training set from logistic regression
+ models}
+
+\description{
+ Extracts fitted values for training set samples from logistic
+ regression models fitted to each group of samples that describe the
+ probability two samples are analogues (from the same group) as a
+ function of dissimilarity between the paired samples.
+}
+\usage{
+\method{fitted}{logitreg}(object, combined = FALSE, ...)
+}
+
+\arguments{
+ \item{object}{an object of class \code{"logitreg"} resulting from a
+ call to \code{\link{logitreg}}.}
+ \item{combined}{logical; should the fitted values for the overall
+ combined analysis be returned.}
+ \item{\dots}{arguments passed to other methods.}
+}
+
+\value{
+ If \code{combined == FALSE} (the default) then a matrix of fitted
+ probabilities, where the rows are the training set samples and the
+ columns the groupings, is returned. If \code{combined == TRUE}, then a
+ list with components \code{"group"} and
+ \code{"combined"}. \code{"group"} is a matrix of fitted probabilities
+ as above. \code{"combined"} is a vector of fitted values for the
+ entire set of pairwise comparisons considered.
+}
+
+\author{Gavin L. Simpson}
+
+\seealso{
+ See \code{\link{logitreg}} for example usage.
+}
+
+\keyword{methods}
Modified: pkg/man/logitreg.Rd
===================================================================
--- pkg/man/logitreg.Rd 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/man/logitreg.Rd 2012-05-04 13:10:44 UTC (rev 267)
@@ -98,7 +98,7 @@
## fit the ROC curve to the SWAP diatom data using the AM results
## Generate a grouping for the SWAP lakes
clust <- hclust(as.dist(swap.ana$train), method = "ward")
-grps <- cutree(clust, 12)
+grps <- cutree(clust, 6)
## fit the logit models to the analog object
swap.lrm <- logitreg(swap.ana, grps)
@@ -109,6 +109,18 @@
## plot the fitted logit curves
plot(swap.lrm, conf.type = "polygon")
+
+## extract fitted posterior probabilities for training samples
+## for the individual groups
+fit <- fitted(swap.lrm)
+head(fit)
+
+## compute posterior probabilities of analogue-ness for the rlgh
+## samples. Here we take the dissimilarities between fossil and
+## training samples from the `swap.ana` object rather than re-
+## compute them
+pred <- predict(swap.lrm, newdata = swap.ana$analogs)
+head(pred)
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
Added: pkg/man/predict.logitreg.Rd
===================================================================
--- pkg/man/predict.logitreg.Rd (rev 0)
+++ pkg/man/predict.logitreg.Rd 2012-05-04 13:10:44 UTC (rev 267)
@@ -0,0 +1,59 @@
+\name{predict.logitreg}
+\alias{predict.logitreg}
+
+\title{Posterior probability of analogue-ness for fossil samples}
+
+\description{
+ Predict the posterior probability of analogue-ness between fossil and
+ training set samples based on logistic regression fits.
+}
+
+\usage{
+
+\method{predict}{logitreg}(object, newdata, group = "all", k = 1, ...)
+
+}
+
+\arguments{
+ \item{object}{an object of class \code{"logitreg"}, resulting from a
+ call to \code{\link{logitreg}}.}
+ \item{newdata}{an object of class \code{"distance"} representing the
+ dissimilarity between fossil samples and those from the training
+ set.}
+ \item{group}{character; for which group(s) are predictions
+ sought. \code{"all"}, the default, indicates that predictions for
+ all groups and the combined analysis are produced. Currently
+ ignored.}
+ \item{k}{numeric; the number of close modern analogues to each fossil
+ sample to consider. Not currently used and may be removed from the
+ method as varying \code{k} should require refitting the logistic
+ regression model with that number of close modern analogues
+ considered.}
+ \item{\dots}{additional arguments passed to other methods.}
+}
+% \details{
+
+% }
+
+\value{
+ A matrix of posterior probabilities of analogue-ness is returned. The
+ rows represent the fossil samples and the columns the groupings. There
+ are \eqn{g+1} columns where \eqn{g} is the number of groups. The
+ \eqn{g+1}th column represents the Combined analysis which is the
+ overall posterior probability that a fossil sample is an analogue to a
+ training set sample (i.e. to any one of the groups).
+}
+
+\author{Gavin L. Simpson}
+
+\seealso{
+ See \code{\link{logitreg}} for example usage. \code{\link{cma}} for
+ extraction of close modern analogue information and
+ \code{\link{analog}} for the underlying computations.
+}
+
+% \examples{
+
+% }
+
+\keyword{methods}
Modified: pkg/tests/Examples/analogue-Ex.Rout.save
===================================================================
--- pkg/tests/Examples/analogue-Ex.Rout.save 2012-04-20 14:13:14 UTC (rev 266)
+++ pkg/tests/Examples/analogue-Ex.Rout.save 2012-05-04 13:10:44 UTC (rev 267)
@@ -1,5 +1,5 @@
-R version 2.15.0 Patched (2012-04-16 r59049) -- "Easter Beagle"
+R version 2.15.0 Patched (2012-04-18 r59085) -- "Easter Beagle"
Copyright (C) 2012 The R Foundation for Statistical Computing
ISBN 3-900051-07-0
Platform: x86_64-unknown-linux-gnu (64-bit)
@@ -31,7 +31,7 @@
Loading required package: princurve
Loading required package: mgcv
This is mgcv 1.7-13. For overview type 'help("mgcv-package")'.
-This is analogue 0.9-3
+This is analogue 0.9-4
>
> assign(".oldSearch", search(), pos = 'CheckExEnv')
> cleanEx()
@@ -3253,25 +3253,19 @@
> ## fit the ROC curve to the SWAP diatom data using the AM results
> ## Generate a grouping for the SWAP lakes
> clust <- hclust(as.dist(swap.ana$train), method = "ward")
-> grps <- cutree(clust, 12)
+> grps <- cutree(clust, 6)
>
> ## fit the logit models to the analog object
> swap.lrm <- logitreg(swap.ana, grps)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
-Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
-Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
-Warning: glm.fit: algorithm did not converge
-Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
> swap.lrm
Object of class: "logitreg"
-Number of models: 13
+Number of models: 7
For groups:
- [1] "1" "2" "3" "4" "5" "6"
- [7] "7" "8" "9" "10" "11" "12"
-[13] "Combined"
+[1] "1" "2" "3" "4" "5" "6" "Combined"
>
> ## summary statistics
@@ -3279,41 +3273,47 @@
Logit regression models
- In Out Est.(Dij) Std.Err Z-value p-value Dij(p=0.9)
-1 13 154 -18.8 4.80e+00 -3.9047 9.4353e-05 0.304
-2 15 152 -36.2 1.02e+01 -3.5454 0.00039206 0.429
-3 22 145 -34.1 1.02e+01 -3.3479 0.00081417 0.366
-4 15 152 -24.1 7.15e+00 -3.3699 0.00075189 0.473
-5 13 154 -21.0 7.97e+00 -2.6325 0.00847487 0.395
-6 17 150 -21.5 4.77e+00 -4.4932 7.0156e-06 0.335
-7 15 152 -11.0 2.71e+00 -4.0649 4.8045e-05 0.283
-8 6 161 -40.1 2.59e+01 -1.5477 0.12170414 0.440
-9 16 151 -10.8 2.49e+00 -4.3522 1.3475e-05 0.200
-10 20 147 -47.0 1.60e+01 -2.9408 0.00327391 0.422
-11 9 158 -1280.9 1.26e+05 -0.0102 0.99189657 0.601
-12 6 161 -21.4 8.79e+00 -2.4401 0.01468365 0.599
-Combined 167 1837 -16.6 1.25e+00 -13.2395 < 2.22e-16 0.335
- Std.Err(Dij)
-1 0.0494
-2 0.0300
-3 0.0264
-4 0.0400
-5 0.0613
-6 0.0371
-7 0.0794
-8 0.0431
-9 0.0795
-10 0.0203
-11 1.3037
-12 0.0702
-Combined 0.0157
+ In Out Est.(Dij) Std.Err Z-value p-value Dij(p=0.9) Std.Err(Dij)
+1 46 121 -15.8 2.80 -5.63 1.8454e-08 0.338 0.0332
+2 35 132 -32.2 6.87 -4.68 2.8597e-06 0.409 0.0200
+3 22 145 -34.1 10.19 -3.35 0.00081417 0.366 0.0264
+4 24 143 -27.2 6.74 -4.03 5.6098e-05 0.495 0.0274
+5 25 142 -16.2 3.93 -4.12 3.7464e-05 0.461 0.0494
+6 15 152 -11.0 2.71 -4.06 4.8045e-05 0.283 0.0794
+Combined 167 835 -16.2 1.41 -11.51 < 2.22e-16 0.359 0.0166
>
> ## plot the fitted logit curves
> plot(swap.lrm, conf.type = "polygon")
>
+> ## extract fitted posterior probabilities for training samples
+> ## for the individual groups
+> fit <- fitted(swap.lrm)
+> head(fit)
+ 1 2 3 4 5 6
+1 0.7704741 0.992926550 0.9705455 0.9967696 0.9977100 0.8004397
+2 0.9480710 0.008663829 0.9961969 0.7215776 0.9573624 0.4823301
+3 0.9740472 0.847497770 0.9714598 0.6919208 0.9965144 0.2744509
+4 0.7168113 0.847497770 0.9535765 0.3007374 0.9990662 0.2870750
+5 0.9661203 0.997014448 0.9970177 0.9946234 0.9178795 0.4975797
+6 0.9740472 0.445288611 0.3826365 0.7858915 0.9990662 0.2870750
>
+> ## compute posterior probabilities of analogue-ness for the rlgh
+> ## samples. Here we take the dissimilarities between fossil and
+> ## training samples from the `swap.ana` object rather than re-
+> ## compute them
+> pred <- predict(swap.lrm, newdata = swap.ana$analogs)
+> head(pred)
+ 1 2 3 4 5 6
+000.3 0.8888987 0.08607136 0.0541194 0.005482089 8.536340e-05 0.03915959
+000.8 0.9508126 0.01931803 0.4638425 0.070408351 1.349192e-05 0.02206208
+001.3 0.9423900 0.04554104 0.7782695 0.043191558 1.474048e-05 0.01984818
+001.8 0.9523237 0.03181413 0.3485667 0.010448649 2.326359e-05 0.02003420
+002.3 0.9372000 0.02182155 0.3034852 0.031319684 1.133939e-05 0.01383880
+002.8 0.9732644 0.05464832 0.5834611 0.027163318 4.119295e-05 0.03144971
>
+>
+>
> cleanEx()
> nameEx("mat")
> ### * mat
@@ -7412,7 +7412,7 @@
> ### * <FOOTER>
> ###
> cat("Time elapsed: ", proc.time() - get("ptime", pos = 'CheckExEnv'),"\n")
-Time elapsed: 16.957 0.457 17.764 0 0
+Time elapsed: 30.227 0.536 30.769 0 0
> grDevices::dev.off()
null device
1
More information about the Analogue-commits
mailing list