[Coxflexboost-commits] r12 - in pkg: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Jul 6 18:49:54 CEST 2009
Author: hofner
Date: 2009-07-06 18:49:50 +0200 (Mon, 06 Jul 2009)
New Revision: 12
Modified:
pkg/NAMESPACE
pkg/R/bols.R
pkg/R/crossvalidation.R
pkg/R/helpers.R
pkg/inst/CHANGES
pkg/man/bols.Rd
pkg/man/boost_control.Rd
pkg/man/crossvalidation.Rd
Log:
added support for package multicore for crossvalidation function "cv"
added "center" argument and possibility to handle categorical covariates to bols()
minor bugfixes
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/NAMESPACE 2009-07-06 16:49:50 UTC (rev 12)
@@ -22,6 +22,7 @@
S3method(plot, oobag)
S3method(print, cv)
S3method(plot, cv)
+S3method(mstop, cv)
S3method(predict, baselearner)
Modified: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/bols.R 2009-07-06 16:49:50 UTC (rev 12)
@@ -6,20 +6,30 @@
bols(..., timedep=TRUE)
}
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE, df = NULL,
+ timedep=FALSE, contrasts.arg = "contr.treatment") {
if (is.null(xname)) xname = deparse(substitute(x))
if (is.null(zname)) zname = deparse(substitute(z))
cc <- complete_cases(x = x, z = z)
- newX <- function(x, z = NULL, na.rm = TRUE) {
+ newX <- function(x, z = NULL, na.rm = TRUE){
if (na.rm) {
x <- x[cc]
if (!is.null(z))
z <- z[cc]
}
- X <- model.matrix(~ x)
+
+ if (is.factor(x)) {
+ X <- model.matrix(~ x, contrasts.arg = list(x = contrasts.arg))
+ } else {
+ X <- model.matrix(~ x)
+ }
+
+ if (center)
+ X <- X[, -1, drop = FALSE]
+
if (any(!cc) & !na.rm) {
Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
Xtmp[cc,] <- X
Modified: pkg/R/crossvalidation.R
===================================================================
--- pkg/R/crossvalidation.R 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/crossvalidation.R 2009-07-06 16:49:50 UTC (rev 12)
@@ -4,9 +4,7 @@
cv.cfboost <- function(object, folds, grid = c(1:mstop(object, opt=FALSE)), ...){
oobrisk <- matrix(0, nrow = ncol(folds), ncol = length(grid))
-
ctrl <- object$control
-
ctrl$risk <- "oobag"
# fehlt noch: ctrl$savedata <- FALSE
# fehlt noch: ctrl$saveensss <- FALSE
@@ -18,6 +16,17 @@
data <- object$data$data
formula <- object$data$formula
+ myapply <- lapply
+ if (ctrl$parallel && require("multicore")) {
+ if (!multicore:::isChild()) {
+ myapply <- mclapply
+ if (ctrl$trace) {
+ ctrl$trace <- FALSE
+ cat("\n Running in parallel with `trace = FALSE'\n")
+ }
+ }
+ }
+
## free memory
rm("object")
@@ -25,15 +34,17 @@
dummyfct <- function(weights, control, data, formula, grid){
i <<- i + 1
- cat("\n>>> Fold ", i, "\n\n")
+ if (ctrl$trace) cat("\n>>> Fold ", i, "\n\n")
model <- cfboost(formula, data = data, control = control, weights = weights)
ret <- risk(model)[grid]
rm("model")
ret
}
- oobrisk <- apply(folds, 2, dummyfct, control = ctrl, data = data, formula = formula, grid = grid)
- oobrisk <- t(oobrisk)
+ oobrisk <- myapply(1:ncol(folds), function(i)
+ dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid),
+ ...)
+ oobrisk <- t(as.data.frame(oobrisk))
oobrisk <- oobrisk/colSums(folds == 0)
colnames(oobrisk) <- grid
rownames(oobrisk) <- 1:nrow(oobrisk)
Modified: pkg/R/helpers.R
===================================================================
--- pkg/R/helpers.R 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/helpers.R 2009-07-06 16:49:50 UTC (rev 12)
@@ -82,13 +82,14 @@
## (adapted version from mboost)
boost_control <- function(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag", "oobag", "none"),
which.offset = c("mle", "zero"), savedata = TRUE,
- trace = TRUE, hardStop = TRUE) {
+ trace = TRUE, parallel = require("multicore"), hardStop = TRUE) {
which.offset <- match.arg(which.offset)
risk <- match.arg(risk)
RET <- list(mstop = mstop, nu = nu, maxit = maxit,
risk = risk, which.offset = which.offset,
- savedata = savedata, trace = trace, hardStop = hardStop)
+ savedata = savedata, trace = trace, parallel = parallel,
+ hardStop = hardStop)
class(RET) <- c("boost_control")
RET
}
Modified: pkg/inst/CHANGES
===================================================================
--- pkg/inst/CHANGES 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/inst/CHANGES 2009-07-06 16:49:50 UTC (rev 12)
@@ -1,13 +1,17 @@
- CHANGES in `CoxFlexBoost' VERSION 0.7-0 (2009-03-XX)
+ CHANGES in `CoxFlexBoost' VERSION 0.7-0 (2009-07-XX)
- o changed assignment and storrage of lambda and penalty in bbs objects
+ o added support for package multicore for crossvalidation function "cv"
- o changed computations (of upper boundary) in df2lambda
+ o added "center" argument and possibility to handle categorical covariates to bols()
+ o changed assignment and storrage of lambda and penalty in bbs() objects
+
+ o changed computations (of upper boundary) in df2lambda()
+
o changed dependencies: droped mboost (as it depends on many other packages)
and added modeltools instead
- o improved plot.cfboost (for time-varying effects)
+ o improved plot.cfboost() (for time-varying effects)
o TODO: change integration
Modified: pkg/man/bols.Rd
===================================================================
--- pkg/man/bols.Rd 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/bols.Rd 2009-07-06 16:49:50 UTC (rev 12)
@@ -9,7 +9,8 @@
effects (and thus the baseline hazard) can be specified.
}
\usage{
-bols(x, z = NULL, xname = NULL, zname = NULL, timedep = FALSE)
+bols(x, z = NULL, xname = NULL, zname = NULL, center = FALSE,
+ df = NULL, timedep = FALSE, contrasts.arg = "contr.treatment")
bolsTime(\dots)
}
@@ -18,9 +19,17 @@
\item{z}{ factor or numeric. A vector containing data. }
\item{xname}{ optional. Name of the variable given in \code{x}. }
\item{zname}{ optional. Name of the variable given in \code{z}. }
+ \item{center}{ logical. If \code{center=TRUE} the intercept in the
+ linear model is omitted.}
+ \item{df}{ Used to specify the degrees of freedom via the trace of the
+ hat matrix. Ridge penalization is used for categorical base-learners
+ to obtain df that are smaller than the number of categories minor
+ one.}
\item{timedep}{ logic. If \code{timedep = TRUE} the base-learner is
taken to be time-dependent, i.e., \code{x} must be the time. For more
details see below. }
+ \item{contrasts.arg}{ a character string suitable for input to the
+ \code{\link{contrasts}} replacement function.}
\item{\dots}{ further arguments passed to \code{bols}. }
}
\details{
Modified: pkg/man/boost_control.Rd
===================================================================
--- pkg/man/boost_control.Rd 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/boost_control.Rd 2009-07-06 16:49:50 UTC (rev 12)
@@ -6,7 +6,7 @@
}
\usage{
boost_control(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag","oobag", "none"),
- which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE, hardStop = TRUE)
+ which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE, parallel = require("multicore"), hardStop = TRUE)
}
\arguments{
\item{mstop}{ integer. Initial number of boosting iterations }
@@ -23,7 +23,10 @@
\item{savedata}{ logic. Indicating whether the data should be saved in
the returned \code{cfboost} object. }
\item{trace}{ logic. Determining if status information should be
- printed during estimation of the model. }
+ printed during estimation of the model. }
+\item{parallel}{logic. enable parallelization using \pkg{multicore} technology
+ for crossvalidation (in function \code{\link{cv}}). If \pkg{multicore} is available
+ parallelization is used per default but can be switched off. }
\item{hardStop}{ logic. Indicates if the initial value of
\code{mstop} is the maximal number of iterations (\code{mstop =
TRUE}) or if it should be increased if the algorithm did not
Modified: pkg/man/crossvalidation.Rd
===================================================================
--- pkg/man/crossvalidation.Rd 2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/crossvalidation.Rd 2009-07-06 16:49:50 UTC (rev 12)
@@ -45,6 +45,12 @@
sample which consists of observations with \code{weights == 1} and
and an out-of-bag sample with \code{weights == 0}. The latter
is used to determine the empirical risk (negative log likelihood).
+
+ If package \pkg{multicore} is available, \code{cv}
+ runs in parallel on cores/processors available. The scheduling
+ can be changed by the corresponding arguments of
+ \code{\link[multicore]{mclapply}} (via the dot arguments).
+ No trace output is given when running in parallel.
}
\value{
\code{cv} returns an object of class \code{cv}, which consists of
More information about the Coxflexboost-commits
mailing list