[Coxflexboost-commits] r13 - in pkg: . R man

Wed Jul 8 14:06:20 CEST 2009

Author: hofner
Date: 2009-07-08 14:06:20 +0200 (Wed, 08 Jul 2009)
New Revision: 13

Modified:
   pkg/DESCRIPTION
   pkg/R/bols.R
   pkg/R/cfboost.R
   pkg/R/crossvalidation.R
   pkg/R/helpers.R
   pkg/man/bols.Rd
   pkg/man/boost_control.Rd
Log:
small changes/updates

Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/DESCRIPTION	2009-07-08 12:06:20 UTC (rev 13)
@@ -2,7 +2,7 @@
 Type: Package
 Title: Boosting Flexible Cox Models (with Time-Varying Effects)
 Version: 0.7-0
-Date: 2009-03-XX
+Date: 2009-07-XX
 Author: Benjamin Hofner
 Maintainer: Benjamin Hofner <benjamin.hofner at imbe.med.uni-erlangen.de>
 Description: Likelihood-based boosting approach to fit flexible,
@@ -10,5 +10,6 @@
   base-learners. Variable selection and model choice are built in
   features.
 Depends: methods, survival, modeltools
+Suggests: multicore
 License: GPL-2
 LazyLoad: yes

Modified: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/bols.R	2009-07-08 12:06:20 UTC (rev 13)
@@ -6,7 +6,7 @@
     bols(..., timedep=TRUE)
 }
 
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE, df = NULL,
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE,
                  timedep=FALSE, contrasts.arg = "contr.treatment") {
 
     if (is.null(xname)) xname = deparse(substitute(x))

Modified: pkg/R/cfboost.R
===================================================================
--- pkg/R/cfboost.R	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/cfboost.R	2009-07-08 12:06:20 UTC (rev 13)
@@ -34,6 +34,10 @@
     y <- object$y
     if (!inherits(y, "Surv")) stop("response is not an object of class ", sQuote("Surv"))
 
+    if (!control$savedata){ ## free memory
+        rm("object")
+    }
+
     ## hyper parameters
     mstop <- control$mstop
     risk <- control$risk
@@ -55,7 +59,10 @@
 
     ## the ensemble
     ens <- rep(NA, mstop)
-    ensss <- vector(mode = "list", length = mstop)
+    if (control$saveensss)
+        ensss <- vector(mode = "list", length = mstop)
+    else
+        ensss <- NULL
 
     ## vector of empirical risks for all boosting iterations
     mrisk <- numeric(mstop)
@@ -120,7 +127,8 @@
 
         ## save the model, i.e., the selected coefficient and base-learner
         ens[m] <- xselect
-        ensss[[m]] <- coefs[[xselect]]
+        if (control$saveensss)
+            ensss[[m]] <- coefs[[xselect]]
 
         ## save updated parameters in x[[xselect]]
         x[[xselect]] <- updatecoefs(x[[xselect]], coefs[[xselect]])
@@ -163,8 +171,7 @@
 
     class(mrisk) <- risk
 
-    RET <- list(data = object,          ### original object
-                ensemble = ens,         ### selected base-learners
+    RET <- list(ensemble = ens,         ### selected base-learners
                 ensembless = ensss,     ### list of coefficients in each iteration
                 fit = fit,              ### vector of fitted values
                 offset = offset,        ### offset
@@ -175,6 +182,8 @@
                 df = df_est,            ### estimated degrees of freedom for smooth base-learners
                 coefs = lapply(x[1:length(x)], getcoefs, nu = nu)  ### coefficients
     )
+    ### save learning sample
+    if (control$savedata) RET$data <- object
 
     RET$predict <- function(newdata = NULL, mstop = mstop, ...) {
         if (!is.null(newdata)) {

Modified: pkg/R/crossvalidation.R
===================================================================
--- pkg/R/crossvalidation.R	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/crossvalidation.R	2009-07-08 12:06:20 UTC (rev 13)
@@ -6,8 +6,8 @@
     oobrisk <- matrix(0, nrow = ncol(folds), ncol = length(grid))
     ctrl <- object$control
     ctrl$risk <- "oobag"
-    # fehlt noch: ctrl$savedata <- FALSE
-    # fehlt noch: ctrl$saveensss <- FALSE
+    ctrl$savedata <- FALSE
+    ctrl$saveensss <- FALSE
 
     if (is.null(object$data))
         stop(sQuote("object"), " does not contain data. Estimate model with option ", sQuote("savedata = TRUE"))
@@ -30,20 +30,19 @@
     ## free memory
     rm("object")
 
-    i <- 0
-
     dummyfct <- function(weights, control, data, formula, grid){
-        i <<- i + 1
-        if (ctrl$trace) cat("\n>>> Fold ", i, "\n\n")
         model <- cfboost(formula, data = data, control = control, weights = weights)
         ret <- risk(model)[grid]
         rm("model")
         ret
     }
 
-    oobrisk <- myapply(1:ncol(folds), function(i)
-                       dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid),
-                       ...)
+    oobrisk <- myapply(1:ncol(folds),
+                       function(i){
+                           cat("\n>>> Fold ", i, "started. \n\n")
+                           dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid)
+                       }
+                       , ...)
     oobrisk <- t(as.data.frame(oobrisk))
     oobrisk <- oobrisk/colSums(folds == 0)
     colnames(oobrisk) <- grid

Modified: pkg/R/helpers.R
===================================================================
--- pkg/R/helpers.R	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/R/helpers.R	2009-07-08 12:06:20 UTC (rev 13)
@@ -82,14 +82,15 @@
 ## (adapted version from mboost)
 boost_control <- function(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag", "oobag", "none"),
                           which.offset = c("mle", "zero"), savedata = TRUE,
-                          trace = TRUE,  parallel = require("multicore"), hardStop = TRUE) {
+                          trace = TRUE,  parallel = require("multicore"), hardStop = TRUE,
+                          save_ensembless=TRUE) {
 
     which.offset <- match.arg(which.offset)
     risk <- match.arg(risk)
     RET <- list(mstop = mstop, nu = nu, maxit = maxit,
                 risk = risk, which.offset = which.offset,
                 savedata = savedata, trace = trace, parallel = parallel,
-                hardStop = hardStop)
+                hardStop = hardStop, saveensss=save_ensembless)
     class(RET) <- c("boost_control")
     RET
 }

Modified: pkg/man/bols.Rd
===================================================================
--- pkg/man/bols.Rd	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/man/bols.Rd	2009-07-08 12:06:20 UTC (rev 13)
@@ -10,7 +10,7 @@
 }
 \usage{
 bols(x, z = NULL, xname = NULL, zname = NULL,  center = FALSE,
-     df = NULL, timedep = FALSE, contrasts.arg = "contr.treatment")
+     timedep = FALSE, contrasts.arg = "contr.treatment")
 bolsTime(\dots)
 }
 
@@ -21,10 +21,10 @@
   \item{zname}{ optional. Name of the variable given in \code{z}. }
   \item{center}{ logical. If \code{center=TRUE} the intercept in the
     linear model is omitted.}
-  \item{df}{ Used to specify the degrees of freedom via the trace of the
-    hat matrix. Ridge penalization is used for categorical base-learners
-    to obtain df that are smaller than the number of categories minor
-    one.}
+%  \item{df}{ Used to specify the degrees of freedom via the trace of the
+%    hat matrix. Ridge penalization is used for categorical base-learners
+%    to obtain df that are smaller than the number of categories minor
+%    one.}
   \item{timedep}{ logic. If \code{timedep = TRUE} the base-learner is
     taken to be time-dependent, i.e., \code{x} must be the time. For more
     details see below. }

Modified: pkg/man/boost_control.Rd
===================================================================
--- pkg/man/boost_control.Rd	2009-07-06 16:49:50 UTC (rev 12)
+++ pkg/man/boost_control.Rd	2009-07-08 12:06:20 UTC (rev 13)
@@ -6,7 +6,8 @@
 }
 \usage{
 boost_control(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag","oobag", "none"),
-    which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE,  parallel = require("multicore"), hardStop = TRUE)
+    which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE,
+    parallel = require("multicore"), hardStop = TRUE, save_ensembless=TRUE)
 }
 \arguments{
   \item{mstop}{ integer. Initial number of boosting iterations }
@@ -23,15 +24,19 @@
   \item{savedata}{ logic. Indicating whether the data should be saved in
     the returned \code{cfboost} object. }
   \item{trace}{ logic. Determining if status information should be
-    printed during estimation of the model. } 
-\item{parallel}{logic. enable parallelization using \pkg{multicore} technology
+    printed during estimation of the model. }
+  \item{parallel}{logic. enable parallelization using \pkg{multicore} technology
     for crossvalidation (in function \code{\link{cv}}). If \pkg{multicore} is available
     parallelization is used per default but can be switched off. }
   \item{hardStop}{ logic. Indicates if the initial value of
-    \code{mstop} is the maximal number of iterations (\code{mstop =
-      TRUE}) or if it should be increased if the algorithm did not
+    \code{mstop} is the maximal number of iterations (\code{mstop = TRUE})
+    or if it should be increased if the algorithm did not
     converge until \code{mstop}. The latter is only possible with
-    \code{risk = "oobag"}. }
+    \code{risk = "oobag"}.}
+  \item{save_ensembless}{ logic. Indicating if the list of
+    coefficients for the selected base-learners should be saved and
+    returned. This list is generally needed but can be suppressed to
+    reduce memory usage (not recommended).}
 }
 \value{
   An object of class \code{boost_control} is returned (as a list).