[Coxflexboost-commits] r13 - in pkg: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jul 8 14:06:20 CEST 2009
Author: hofner
Date: 2009-07-08 14:06:20 +0200 (Wed, 08 Jul 2009)
New Revision: 13
Modified:
pkg/DESCRIPTION
pkg/R/bols.R
pkg/R/cfboost.R
pkg/R/crossvalidation.R
pkg/R/helpers.R
pkg/man/bols.Rd
pkg/man/boost_control.Rd
Log:
small changes/updates
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/DESCRIPTION 2009-07-08 12:06:20 UTC (rev 13)
@@ -2,7 +2,7 @@
Type: Package
Title: Boosting Flexible Cox Models (with Time-Varying Effects)
Version: 0.7-0
-Date: 2009-03-XX
+Date: 2009-07-XX
Author: Benjamin Hofner
Maintainer: Benjamin Hofner <benjamin.hofner at imbe.med.uni-erlangen.de>
Description: Likelihood-based boosting approach to fit flexible,
@@ -10,5 +10,6 @@
base-learners. Variable selection and model choice are built in
features.
Depends: methods, survival, modeltools
+Suggests: multicore
License: GPL-2
LazyLoad: yes
Modified: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/bols.R 2009-07-08 12:06:20 UTC (rev 13)
@@ -6,7 +6,7 @@
bols(..., timedep=TRUE)
}
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE, df = NULL,
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE,
timedep=FALSE, contrasts.arg = "contr.treatment") {
if (is.null(xname)) xname = deparse(substitute(x))
Modified: pkg/R/cfboost.R
===================================================================
--- pkg/R/cfboost.R 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/cfboost.R 2009-07-08 12:06:20 UTC (rev 13)
@@ -34,6 +34,10 @@
y <- object$y
if (!inherits(y, "Surv")) stop("response is not an object of class ", sQuote("Surv"))
+ if (!control$savedata){ ## free memory
+ rm("object")
+ }
+
## hyper parameters
mstop <- control$mstop
risk <- control$risk
@@ -55,7 +59,10 @@
## the ensemble
ens <- rep(NA, mstop)
- ensss <- vector(mode = "list", length = mstop)
+ if (control$saveensss)
+ ensss <- vector(mode = "list", length = mstop)
+ else
+ ensss <- NULL
## vector of empirical risks for all boosting iterations
mrisk <- numeric(mstop)
@@ -120,7 +127,8 @@
## save the model, i.e., the selected coefficient and base-learner
ens[m] <- xselect
- ensss[[m]] <- coefs[[xselect]]
+ if (control$saveensss)
+ ensss[[m]] <- coefs[[xselect]]
## save updated parameters in x[[xselect]]
x[[xselect]] <- updatecoefs(x[[xselect]], coefs[[xselect]])
@@ -163,8 +171,7 @@
class(mrisk) <- risk
- RET <- list(data = object, ### original object
- ensemble = ens, ### selected base-learners
+ RET <- list(ensemble = ens, ### selected base-learners
ensembless = ensss, ### list of coefficients in each iteration
fit = fit, ### vector of fitted values
offset = offset, ### offset
@@ -175,6 +182,8 @@
df = df_est, ### estimated degrees of freedom for smooth base-learners
coefs = lapply(x[1:length(x)], getcoefs, nu = nu) ### coefficients
)
+ ### save learning sample
+ if (control$savedata) RET$data <- object
RET$predict <- function(newdata = NULL, mstop = mstop, ...) {
if (!is.null(newdata)) {
Modified: pkg/R/crossvalidation.R
===================================================================
--- pkg/R/crossvalidation.R 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/crossvalidation.R 2009-07-08 12:06:20 UTC (rev 13)
@@ -6,8 +6,8 @@
oobrisk <- matrix(0, nrow = ncol(folds), ncol = length(grid))
ctrl <- object$control
ctrl$risk <- "oobag"
- # fehlt noch: ctrl$savedata <- FALSE
- # fehlt noch: ctrl$saveensss <- FALSE
+ ctrl$savedata <- FALSE
+ ctrl$saveensss <- FALSE
if (is.null(object$data))
stop(sQuote("object"), " does not contain data. Estimate model with option ", sQuote("savedata = TRUE"))
@@ -30,20 +30,19 @@
## free memory
rm("object")
- i <- 0
-
dummyfct <- function(weights, control, data, formula, grid){
- i <<- i + 1
- if (ctrl$trace) cat("\n>>> Fold ", i, "\n\n")
model <- cfboost(formula, data = data, control = control, weights = weights)
ret <- risk(model)[grid]
rm("model")
ret
}
- oobrisk <- myapply(1:ncol(folds), function(i)
- dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid),
- ...)
+ oobrisk <- myapply(1:ncol(folds),
+ function(i){
+ cat("\n>>> Fold ", i, "started. \n\n")
+ dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid)
+ }
+ , ...)
oobrisk <- t(as.data.frame(oobrisk))
oobrisk <- oobrisk/colSums(folds == 0)
colnames(oobrisk) <- grid
Modified: pkg/R/helpers.R
===================================================================
--- pkg/R/helpers.R 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/helpers.R 2009-07-08 12:06:20 UTC (rev 13)
@@ -82,14 +82,15 @@
## (adapted version from mboost)
boost_control <- function(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag", "oobag", "none"),
which.offset = c("mle", "zero"), savedata = TRUE,
- trace = TRUE, parallel = require("multicore"), hardStop = TRUE) {
+ trace = TRUE, parallel = require("multicore"), hardStop = TRUE,
+ save_ensembless=TRUE) {
which.offset <- match.arg(which.offset)
risk <- match.arg(risk)
RET <- list(mstop = mstop, nu = nu, maxit = maxit,
risk = risk, which.offset = which.offset,
savedata = savedata, trace = trace, parallel = parallel,
- hardStop = hardStop)
+ hardStop = hardStop, saveensss=save_ensembless)
class(RET) <- c("boost_control")
RET
}
Modified: pkg/man/bols.Rd
===================================================================
--- pkg/man/bols.Rd 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/man/bols.Rd 2009-07-08 12:06:20 UTC (rev 13)
@@ -10,7 +10,7 @@
}
\usage{
bols(x, z = NULL, xname = NULL, zname = NULL, center = FALSE,
- df = NULL, timedep = FALSE, contrasts.arg = "contr.treatment")
+ timedep = FALSE, contrasts.arg = "contr.treatment")
bolsTime(\dots)
}
@@ -21,10 +21,10 @@
\item{zname}{ optional. Name of the variable given in \code{z}. }
\item{center}{ logical. If \code{center=TRUE} the intercept in the
linear model is omitted.}
- \item{df}{ Used to specify the degrees of freedom via the trace of the
- hat matrix. Ridge penalization is used for categorical base-learners
- to obtain df that are smaller than the number of categories minor
- one.}
+% \item{df}{ Used to specify the degrees of freedom via the trace of the
+% hat matrix. Ridge penalization is used for categorical base-learners
+% to obtain df that are smaller than the number of categories minor
+% one.}
\item{timedep}{ logic. If \code{timedep = TRUE} the base-learner is
taken to be time-dependent, i.e., \code{x} must be the time. For more
details see below. }
Modified: pkg/man/boost_control.Rd
===================================================================
--- pkg/man/boost_control.Rd 2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/man/boost_control.Rd 2009-07-08 12:06:20 UTC (rev 13)
@@ -6,7 +6,8 @@
}
\usage{
boost_control(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag","oobag", "none"),
- which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE, parallel = require("multicore"), hardStop = TRUE)
+ which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE,
+ parallel = require("multicore"), hardStop = TRUE, save_ensembless=TRUE)
}
\arguments{
\item{mstop}{ integer. Initial number of boosting iterations }
@@ -23,15 +24,19 @@
\item{savedata}{ logic. Indicating whether the data should be saved in
the returned \code{cfboost} object. }
\item{trace}{ logic. Determining if status information should be
- printed during estimation of the model. }
-\item{parallel}{logic. enable parallelization using \pkg{multicore} technology
+ printed during estimation of the model. }
+ \item{parallel}{logic. enable parallelization using \pkg{multicore} technology
for crossvalidation (in function \code{\link{cv}}). If \pkg{multicore} is available
parallelization is used per default but can be switched off. }
\item{hardStop}{ logic. Indicates if the initial value of
- \code{mstop} is the maximal number of iterations (\code{mstop =
- TRUE}) or if it should be increased if the algorithm did not
+ \code{mstop} is the maximal number of iterations (\code{mstop = TRUE})
+ or if it should be increased if the algorithm did not
converge until \code{mstop}. The latter is only possible with
- \code{risk = "oobag"}. }
+ \code{risk = "oobag"}.}
+ \item{save_ensembless}{ logic. Indicating if the list of
+ coefficients for the selected base-learners should be saved and
+ returned. This list is generally needed but can be suppressed to
+ reduce memory usage (not recommended).}
}
\value{
An object of class \code{boost_control} is returned (as a list).
More information about the Coxflexboost-commits
mailing list