[Analogue-commits] r146 - in pkg: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Aug 16 13:52:48 CEST 2009
Author: gsimpson
Date: 2009-08-16 13:52:47 +0200 (Sun, 16 Aug 2009)
New Revision: 146
Modified:
pkg/DESCRIPTION
pkg/R/join.R
pkg/inst/ChangeLog
pkg/man/join.Rd
Log:
adds left join to 'join' and new argument to supply the value using in replacing missing values.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2009-08-15 21:25:23 UTC (rev 145)
+++ pkg/DESCRIPTION 2009-08-16 11:52:47 UTC (rev 146)
@@ -1,7 +1,7 @@
Package: analogue
Type: Package
Title: Analogue and weighted averaging methods for palaeoecology
-Version: 0.6-17
+Version: 0.6-18
Date: $Date$
Depends: R (>= 2.5.0), stats, graphics, vegan, lattice, MASS
Author: Gavin L. Simpson, Jari Oksanen
Modified: pkg/R/join.R
===================================================================
--- pkg/R/join.R 2009-08-15 21:25:23 UTC (rev 145)
+++ pkg/R/join.R 2009-08-16 11:52:47 UTC (rev 146)
@@ -4,8 +4,8 @@
## ##
## Created : 17-Apr-2006 ##
## Author : Gavin Simpson ##
-## Version : 0.2-0 ##
-## Last modified : 23-Jul-2007 ##
+## Version : 0.5-0 ##
+## Last modified : 16-Aug-2009 ##
## ##
## ARGUMENTS: ##
## ... - the data frames to be merged ##
@@ -35,37 +35,62 @@
## class data.frame, not that it is that ##
## class. This allows join to work with the ##
## results of join(..., split = FALSE ##
+## 16-Aug-2009 - GLS - 0.5-0 * now has left outer as well as outer join ##
+## * replacement value can now be specified ##
## ##
###########################################################################
-join <- function(..., verbose = FALSE, na.replace = TRUE, split = TRUE)
- {
+join <- function(..., verbose = FALSE, na.replace = TRUE, split = TRUE,
+ value = 0, type = c("outer","left"))
+{
+ outerJoin <- function(X) {
+ ## From code provided by Sundar Dorai-Raj in R-Help posting:
+ ## http://article.gmane.org/gmane.comp.lang.r.general/63042/
+ cn <- unique(unlist(lapply(X, colnames)))
+ for(i in seq(along = X)) {
+ if(any(m <- !cn %in% colnames(X[[i]]))) {
+ na <- matrix(NA, nrow(X[[i]]), sum(m))
+ dimnames(na) <- list(rownames(X[[i]]), cn[m])
+ X[[i]] <- cbind(X[[i]], na)
+ }
+ }
+ joined <- do.call(rbind, X)
+ colnames(joined) <- cn
+ return(joined)
+ }
+ leftJoin <- function(X) {
+ cn <- unique(unlist(lapply(X, colnames)[[1]]))
+ ## if more than 2 df in X, merge all bar first
+ if(length(X) > 2)
+ dfs <- outerJoin(X[-1])
+ else
+ dfs <- X[[2]]
+ ## matched column names
+ mcn <- match(colnames(dfs), cn)
+ mcn <- mcn[!is.na(mcn)]
+ joined <- matrix(NA, ncol = dims[1,2], nrow = sum(dims[,1]))
+ joined[1:dims[1,1], ] <- data.matrix(X[[1]])
+ joined[(dims[1,1]+1):NROW(joined), mcn] <- data.matrix(dfs[, mcn])
+ colnames(joined) <- cn
+ return(joined)
+ }
x <- list(...)
- ##if(any(sapply(x, class) != "data.frame"))
if(any(!sapply(x, inherits, "data.frame")))
- stop("\nall objects to be merged must be data frames.")
+ stop("\nall objects to be merged must be data frames.")
dims <- do.call(rbind, lapply(x, dim))
n.joined <- nrow(dims)
- ## From code provided by Sundar Dorai-Raj in R-Help posting:
- ## http://article.gmane.org/gmane.comp.lang.r.general/63042/match=merging
- cn <- unique(unlist(lapply(x, colnames)))
- for(i in seq(along = x)) {
- if(any(m <- !cn %in% colnames(x[[i]]))) {
- na <- matrix(NA, nrow(x[[i]]), sum(m))
- dimnames(na) <- list(rownames(x[[i]]), cn[m])
- x[[i]] <- cbind(x[[i]], na)
- }
+ if(missing(type))
+ type <- "outer"
+ type <- match.arg(type)
+ if(type == "outer") {
+ joined <- outerJoin(x)
+ } else if(type == "left") {
+ joined <- leftJoin(x)
}
- joined <- do.call(rbind, x)
- ## End Sundar code
if(na.replace) {
- dim.names <- dimnames(joined)
- ##joined <- sapply(joined, function(x) {x[is.na(x)] <- 0; x})
- joined <- data.frame(lapply(joined,
- function(x) {x[is.na(x)] <- 0; x}))
- dimnames(joined) <- dim.names
+ joined[is.na(joined)] <- value
}
- if(verbose)
- {
+ rn <- lapply(x, rownames)
+ if(verbose) {
stats <- rbind(dims, dim(joined))
rownames(stats) <- c(paste("Data set ", c(1:n.joined), ":", sep = ""),
"Merged:")
@@ -74,20 +99,21 @@
printCoefmat(stats, digits = max(3, getOption("digits") - 3),
na.print = "")
cat("\n")
- }
+ }
if(split) {
- retval <- vector(mode = "list", length = n.joined)
- ends<- cumsum(dims[,1])
- start <- c(1, ends[-n.joined] + 1)
- for(i in 1:n.joined) {
- retval[[i]] <- as.data.frame(joined[start[i]:ends[i], ])
- names(retval) <- as.character(match.call())[c(2, 2 + (n.joined-1))]
- }
- class(retval) <- "join"
- return(retval)
+ retval <- vector(mode = "list", length = n.joined)
+ ends<- cumsum(dims[,1])
+ start <- c(1, ends[-n.joined] + 1)
+ for(i in 1:n.joined) {
+ retval[[i]] <- as.data.frame(joined[start[i]:ends[i], ])
+ rownames(retval[[i]]) <- rn[[i]]
+ }
+ names(retval) <- as.character(match.call())[2:(n.joined+1)]
+ class(retval) <- "join"
} else {
- retval <- as.data.frame(joined, row.names = rownames(joined))
- class(retval) <- c("join", class(retval))
- return(retval)
+ retval <- as.data.frame(joined, row.names = rownames(joined))
+ class(retval) <- c("join", class(retval))
}
- }
+ attr(retval, "type") <- type
+ retval
+}
Modified: pkg/inst/ChangeLog
===================================================================
--- pkg/inst/ChangeLog 2009-08-15 21:25:23 UTC (rev 145)
+++ pkg/inst/ChangeLog 2009-08-16 11:52:47 UTC (rev 146)
@@ -1,5 +1,19 @@
analogue Change Log
+Version 0.6-18
+
+ * join: new arguments 'type' and 'value'.
+
+ 'type' controls which join is performed. Options are (currently)
+ "outer" (default) and "left". The left join is used to prepare two
+ or more data sets for ordinating the first and subsequently
+ passively projecting the other data sets into this ordination.
+ The outer join is used to prepare data for transfer functions such
+ as MAT and WA.
+
+ 'value' allows the user to supply a numeric value to be used to
+ replace 'NA's.
+
Version 0.6-17
* predict.wa: deshrinking method was not being honoured when
Modified: pkg/man/join.Rd
===================================================================
--- pkg/man/join.Rd 2009-08-15 21:25:23 UTC (rev 145)
+++ pkg/man/join.Rd 2009-08-16 11:52:47 UTC (rev 146)
@@ -11,7 +11,8 @@
data sets with respect to training set samples.
}
\usage{
-join(\dots, verbose = FALSE, na.replace = TRUE, split = TRUE)
+join(\dots, verbose = FALSE, na.replace = TRUE, split = TRUE, value = 0,
+ type = c("outer", "left"))
\method{head}{join}(x, \dots)
@@ -37,9 +38,33 @@
\item{split}{logical; should the merged data sets samples be split
back into individual data frames, but now with common columns
(i.e. species)?}
+ \item{value}{numeric; value to replace \code{NA} with if
+ \code{na.replace} is \code{TRUE}.}
+ \item{type}{logical; type of join to perform. \code{"outer"} returns
+ the \emph{union} of the variables in data frames to be merged, such
+ that the resulting objects have columns for all variables found
+ across all the data frames to be merged. \code{"left"} returns the
+ left outer (or the left) join, such that the merged data frames
+ contain the set of variables found in the first supplied data
+ frame. See Details.}
\item{x}{an object of class \code{"join"}, usually the result of a
call to \code{\link{join}}.}
}
+\details{
+ When merging multiple data frames the set of variables in the merged
+ data can be determined via a number of routes. \code{join} provides
+ for two (currently) join types; the \emph{outer} join and the
+ \emph{left outer} (or simply the \emph{left}) join. Which type of join
+ is performed is determined by the argument \code{type}.
+
+ The \emph{outer} join returns the union of the set of variables found
+ in the data frames to be merged. This means that the resulting data
+ frame(s) contain columns for all the variable observed across all the
+ data frames supplied for merging.
+
+ With the \emph{left outer} join the resulting data frame(s) contain
+ only the set of variables found in the first data frame provided.
+}
\value{
If \code{split = TRUE}, an object of class \code{"join"}, a list of
data frames, with as many components as the number of data frames
@@ -84,6 +109,13 @@
## show just the last few lines of each data set
tail(dat, n = 4)
+
+## merge training and test set using left join
+head(join(ImbrieKipp, V12.122, verbose = TRUE, type = "left"))
+
+## merge training and test set using outer join and replace
+## NA with -99.9
+head(join(ImbrieKipp, V12.122, verbose = TRUE, value = -99.9))
}
\keyword{multivariate}% at least one, from doc/KEYWORDS
\keyword{manip}% __ONLY ONE__ keyword per line
More information about the Analogue-commits
mailing list