[Coxflexboost-commits] r12 - in pkg: . R inst man

Mon Jul 6 18:49:54 CEST 2009

Author: hofner
Date: 2009-07-06 18:49:50 +0200 (Mon, 06 Jul 2009)
New Revision: 12

Modified:
   pkg/NAMESPACE
   pkg/R/bols.R
   pkg/R/crossvalidation.R
   pkg/R/helpers.R
   pkg/inst/CHANGES
   pkg/man/bols.Rd
   pkg/man/boost_control.Rd
   pkg/man/crossvalidation.Rd
Log:
added support for package multicore for crossvalidation function "cv"
added "center" argument and possibility to handle categorical covariates to bols()
minor bugfixes

Modified: pkg/NAMESPACE
===================================================================

--- pkg/NAMESPACE	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/NAMESPACE	2009-07-06 16:49:50 UTC (rev 12)
@@ -22,6 +22,7 @@
 S3method(plot, oobag)
 S3method(print, cv)
 S3method(plot, cv)
+S3method(mstop, cv)
 
 S3method(predict, baselearner)
 

Modified: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/bols.R	2009-07-06 16:49:50 UTC (rev 12)
@@ -6,20 +6,30 @@
     bols(..., timedep=TRUE)
 }
 
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, center = FALSE, df = NULL,
+                 timedep=FALSE, contrasts.arg = "contr.treatment") {
 
     if (is.null(xname)) xname = deparse(substitute(x))
     if (is.null(zname)) zname = deparse(substitute(z))
 
     cc <- complete_cases(x = x, z = z)
 
-    newX <- function(x, z = NULL, na.rm = TRUE) {
+    newX <- function(x, z = NULL, na.rm = TRUE){
         if (na.rm) {
             x <- x[cc]
             if (!is.null(z))
                 z <- z[cc]
         }
-        X <- model.matrix(~ x)
+
+        if (is.factor(x)) {
+            X <- model.matrix(~ x, contrasts.arg = list(x = contrasts.arg))
+        } else {
+            X <- model.matrix(~ x)
+        }
+
+        if (center)
+            X <- X[, -1, drop = FALSE]
+
         if (any(!cc) & !na.rm) {
             Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
             Xtmp[cc,] <- X

Modified: pkg/R/crossvalidation.R
===================================================================
--- pkg/R/crossvalidation.R	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/crossvalidation.R	2009-07-06 16:49:50 UTC (rev 12)
@@ -4,9 +4,7 @@
 cv.cfboost <- function(object, folds, grid = c(1:mstop(object, opt=FALSE)), ...){
 
     oobrisk <- matrix(0, nrow = ncol(folds), ncol = length(grid))
-
     ctrl <- object$control
-
     ctrl$risk <- "oobag"
     # fehlt noch: ctrl$savedata <- FALSE
     # fehlt noch: ctrl$saveensss <- FALSE
@@ -18,6 +16,17 @@
     data <- object$data$data
     formula <- object$data$formula
 
+    myapply <- lapply
+    if (ctrl$parallel && require("multicore")) {
+        if (!multicore:::isChild()) {
+            myapply <- mclapply
+            if (ctrl$trace) {
+                ctrl$trace <- FALSE
+                cat("\n Running in parallel with `trace = FALSE'\n")
+            }
+        }
+    }
+
     ## free memory
     rm("object")
 
@@ -25,15 +34,17 @@
 
     dummyfct <- function(weights, control, data, formula, grid){
         i <<- i + 1
-        cat("\n>>> Fold ", i, "\n\n")
+        if (ctrl$trace) cat("\n>>> Fold ", i, "\n\n")
         model <- cfboost(formula, data = data, control = control, weights = weights)
         ret <- risk(model)[grid]
         rm("model")
         ret
     }
 
-    oobrisk <- apply(folds, 2, dummyfct, control = ctrl, data = data, formula = formula, grid = grid)
-    oobrisk <- t(oobrisk)
+    oobrisk <- myapply(1:ncol(folds), function(i)
+                       dummyfct(folds[,i], control = ctrl, data = data, formula = formula, grid = grid),
+                       ...)
+    oobrisk <- t(as.data.frame(oobrisk))
     oobrisk <- oobrisk/colSums(folds == 0)
     colnames(oobrisk) <- grid
     rownames(oobrisk) <- 1:nrow(oobrisk)

Modified: pkg/R/helpers.R
===================================================================
--- pkg/R/helpers.R	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/R/helpers.R	2009-07-06 16:49:50 UTC (rev 12)
@@ -82,13 +82,14 @@
 ## (adapted version from mboost)
 boost_control <- function(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag", "oobag", "none"),
                           which.offset = c("mle", "zero"), savedata = TRUE,
-                          trace = TRUE, hardStop = TRUE) {
+                          trace = TRUE,  parallel = require("multicore"), hardStop = TRUE) {
 
     which.offset <- match.arg(which.offset)
     risk <- match.arg(risk)
     RET <- list(mstop = mstop, nu = nu, maxit = maxit,
                 risk = risk, which.offset = which.offset,
-                savedata = savedata, trace = trace, hardStop = hardStop)
+                savedata = savedata, trace = trace, parallel = parallel,
+                hardStop = hardStop)
     class(RET) <- c("boost_control")
     RET
 }

Modified: pkg/inst/CHANGES
===================================================================
--- pkg/inst/CHANGES	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/inst/CHANGES	2009-07-06 16:49:50 UTC (rev 12)
@@ -1,13 +1,17 @@
-		CHANGES in `CoxFlexBoost' VERSION 0.7-0 (2009-03-XX)
+		CHANGES in `CoxFlexBoost' VERSION 0.7-0 (2009-07-XX)
 
-  o  changed assignment and storrage of lambda and penalty in bbs objects
+  o  added support for package multicore for crossvalidation function "cv"
 
-  o  changed computations (of upper boundary) in df2lambda
+  o  added "center" argument and possibility to handle categorical covariates to bols()
 
+  o  changed assignment and storrage of lambda and penalty in bbs() objects
+
+  o  changed computations (of upper boundary) in df2lambda()
+
   o  changed dependencies: droped mboost (as it depends on many other packages)
      and added modeltools instead
 
-  o  improved plot.cfboost (for time-varying effects)
+  o  improved plot.cfboost() (for time-varying effects)
 
   o  TODO: change integration
 

Modified: pkg/man/bols.Rd
===================================================================
--- pkg/man/bols.Rd	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/bols.Rd	2009-07-06 16:49:50 UTC (rev 12)
@@ -9,7 +9,8 @@
   effects (and thus the baseline hazard) can be specified.
 }
 \usage{
-bols(x, z = NULL, xname = NULL, zname = NULL, timedep = FALSE)
+bols(x, z = NULL, xname = NULL, zname = NULL,  center = FALSE,
+     df = NULL, timedep = FALSE, contrasts.arg = "contr.treatment")
 bolsTime(\dots)
 }
 
@@ -18,9 +19,17 @@
   \item{z}{ factor or numeric. A vector containing data. }
   \item{xname}{ optional. Name of the variable given in \code{x}. }
   \item{zname}{ optional. Name of the variable given in \code{z}. }
+  \item{center}{ logical. If \code{center=TRUE} the intercept in the
+    linear model is omitted.}
+  \item{df}{ Used to specify the degrees of freedom via the trace of the
+    hat matrix. Ridge penalization is used for categorical base-learners
+    to obtain df that are smaller than the number of categories minor
+    one.}
   \item{timedep}{ logic. If \code{timedep = TRUE} the base-learner is
     taken to be time-dependent, i.e., \code{x} must be the time. For more
     details see below. }
+  \item{contrasts.arg}{ a character string suitable for input to the
+    \code{\link{contrasts}} replacement function.}
   \item{\dots}{ further arguments passed to \code{bols}. }
 }
 \details{

Modified: pkg/man/boost_control.Rd
===================================================================
--- pkg/man/boost_control.Rd	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/boost_control.Rd	2009-07-06 16:49:50 UTC (rev 12)
@@ -6,7 +6,7 @@
 }
 \usage{
 boost_control(mstop = 100, nu = 0.1, maxit = 30000, risk = c("inbag","oobag", "none"),
-    which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE, hardStop = TRUE)
+    which.offset = c("mle", "zero"), savedata = TRUE, trace = TRUE,  parallel = require("multicore"), hardStop = TRUE)
 }
 \arguments{
   \item{mstop}{ integer. Initial number of boosting iterations }
@@ -23,7 +23,10 @@
   \item{savedata}{ logic. Indicating whether the data should be saved in
     the returned \code{cfboost} object. }
   \item{trace}{ logic. Determining if status information should be
-    printed during estimation of the model. }
+    printed during estimation of the model. } 
+\item{parallel}{logic. enable parallelization using \pkg{multicore} technology
+    for crossvalidation (in function \code{\link{cv}}). If \pkg{multicore} is available
+    parallelization is used per default but can be switched off. }
   \item{hardStop}{ logic. Indicates if the initial value of
     \code{mstop} is the maximal number of iterations (\code{mstop =
       TRUE}) or if it should be increased if the algorithm did not

Modified: pkg/man/crossvalidation.Rd
===================================================================
--- pkg/man/crossvalidation.Rd	2009-03-15 18:47:14 UTC (rev 11)
+++ pkg/man/crossvalidation.Rd	2009-07-06 16:49:50 UTC (rev 12)
@@ -45,6 +45,12 @@
   sample which consists of observations with \code{weights == 1} and
   and an out-of-bag sample with \code{weights == 0}. The latter
   is used to determine the empirical risk (negative log likelihood).
+
+  If package \pkg{multicore} is available, \code{cv}
+  runs in parallel on cores/processors available. The scheduling
+  can be changed by the corresponding arguments of
+  \code{\link[multicore]{mclapply}} (via the dot arguments).
+  No trace output is given when running in parallel.
 }
 \value{
   \code{cv} returns an object of class \code{cv}, which consists of