[Coxflexboost-commits] r8 - in pkg: . R inst man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Mar 2 18:24:19 CET 2009


Author: hofner
Date: 2009-03-02 18:24:18 +0100 (Mon, 02 Mar 2009)
New Revision: 8

Added:
   pkg/R/PMLE.R
   pkg/R/bbs.R
   pkg/R/bols.R
   pkg/R/cfboost.R
   pkg/R/helpers.R
   pkg/R/integr.R
   pkg/R/integr_fisher.R
   pkg/R/integr_score.R
   pkg/R/methods.R
   pkg/R/mstop.R
   pkg/R/rSurvTime.R
   pkg/R/risk.R
Removed:
   pkg/R/PMLE.r
   pkg/R/bbs.r
   pkg/R/bols.r
   pkg/R/cfboost.r
   pkg/R/helpers.r
   pkg/R/integr.r
   pkg/R/integr_fisher.r
   pkg/R/integr_score.r
   pkg/R/methods.r
   pkg/R/mstop.r
   pkg/R/rSurvTime.r
   pkg/R/risk.r
Modified:
   pkg/DESCRIPTION
   pkg/NAMESPACE
   pkg/inst/CHANGES
   pkg/man/CoxFlexBoost-package.Rd
Log:
- changed assignment and storrage of lambda and penalty in bbs objects
- changed computations (of upper boundary) in df2lambda
- changed dependencies: droped mboost (as it depends on many other packages) and added modeltools instead
- renamed *.r to *.R

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/DESCRIPTION	2009-03-02 17:24:18 UTC (rev 8)
@@ -1,14 +1,14 @@
 Package: CoxFlexBoost
 Type: Package
 Title: Boosting Flexible Cox Models (with Time-Varying Effects)
-Version: 0.6-0
-Date: 2008-12-09
+Version: 0.7-0
+Date: 2009-03-XX
 Author: Benjamin Hofner
 Maintainer: Benjamin Hofner <benjamin.hofner at imbe.med.uni-erlangen.de>
 Description: Likelihood-based boosting approach to fit flexible,
   structured survival models with component-wise linear or P-spline
   base-learners. Variable selection and model choice are built in
   features.
-Depends: methods, mboost, survival
+Depends: methods, survival, modeltools
 License: GPL-2
 LazyLoad: yes

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/NAMESPACE	2009-03-02 17:24:18 UTC (rev 8)
@@ -1,4 +1,5 @@
-import(mboost)
+#import(mboost)
+import(modeltools)
 
 export(cfboost, boost_control, mstop, risk, freq.sel,
        bbs, bbsTime, bols, bolsTime, rSurvTime)

Added: pkg/R/PMLE.R
===================================================================
--- pkg/R/PMLE.R	                        (rev 0)
+++ pkg/R/PMLE.R	2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,148 @@
+###################
+## Penalized MLE ##
+###################
+
+## Function for penalized ML estimation
+PMLE <- function(y, x, offset, fit, ens, nu, maxit, subdivisions = 100, estimation = TRUE){
+
+    time <- y[,1]
+    delta <- y[,2]
+
+    ## currently added base-learner
+    added_bl <- ens[length(ens)]
+    ## number of coefficients to estimate
+    coefs <- rep(0, ncol(x[[added_bl]]))
+    ## get penalty for currently added base-learner
+    if (estimation)
+        pen <- penalty(x[[added_bl]])
+    ## get selected base-learners (uniquely)
+    ens_uni <- unique(ens)
+
+    if(any(sapply(x[ens_uni], attr, which = "timedep"))){
+        ## make grid and compute grid-width if there is ANY time-dependent base-learner
+        sub = subdivisions      ## subdivisions of time
+        n = length(time)        ## number of observations
+
+        grid <- function(upper, length){
+            ## helper function to compute grid
+            seq(from = 0, to = upper, length = length)
+        }
+
+        ## make grid
+        grid <- lapply(time, grid, length = sub)
+
+        trapezoid_width <- rep(NA, n)
+        for (i in 1:n)
+            trapezoid_width[i] <- grid[[i]][2]  # = second element, as first element == 0 and the grid equidistant for every i
+    } else {
+        grid = NULL
+        trapezoid_width = NULL
+    }
+
+    ## remove the most recently added base-learner
+    ens_uni <- ens_uni[ens_uni != added_bl]
+
+    ## initialization (needed if either no time-dependent or no time-constant base-learner present)
+    predictions_tconst = 0
+    predictions_td = 0
+
+    ## data pre-processing
+    if(length(ens_uni) > 0){
+        ## extract time-dependency
+        timedep <- sapply(x[ens_uni], attr, which = "timedep")
+        if (any(!timedep)){
+            ## predictions for time-constant base-learners
+            predictions_tconst <- sapply(x[ens_uni[!timedep]], predict)
+            predictions_tconst <- apply(predictions_tconst, 1, sum)
+        }
+        if (any(timedep)){
+            ## prediction for time-dependent base-learners
+            predictions_td <- sapply(x[ens_uni[timedep]], predict_td, time_grid = grid)
+            predictions_td <- apply(predictions_td, 1, sum)
+            predictions_td <- matrix(predictions_td, nrow = n, ncol = sub, byrow = TRUE)
+        }
+    }
+
+    ## build design matrix for currently added base-learner
+    if (attr(x[[added_bl]], "timedep")){
+        xd <- unlist(grid)
+        xname <- get("xname", environment(attr(x[[added_bl]], "predict")))
+        zd <- get("z", environment(attr(x[[added_bl]], "predict")))
+        if (!is.null(zd)){
+            zname <- get("zname", environment(attr(x[[added_bl]], "predict")))
+            zd <- rep(zd, each = length(grid[[1]]))
+            newdata <- data.frame(cbind(xd, zd))
+            names(newdata) <- c(xname, zname)
+        } else {
+            newdata <- data.frame(xd)
+            names(newdata) <- c(xname)
+        }
+        desMat <- attr(x[[added_bl]],"designMat")(newdata = newdata)
+    } else {
+        desMat <- attr(x[[added_bl]],"designMat")()
+    }
+
+    exp_offset <- exp(offset)
+    exp_pred_tconst <- exp(predictions_tconst * nu)
+    exp_pred_td <- exp(predictions_td * nu)
+
+    logLH_pen <- function(coefs){
+        log_lik <- sum(delta * (fit + x[[added_bl]] %*% coefs)
+                       - integr(x[[added_bl]], coefs, desMat,
+                                predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+                                controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu)))
+        if (is.null(pen)) pen <- 0 else pen <- 0.5 * (coefs %*% pen %*% coefs)
+        return(log_lik - pen)
+    }
+
+    s_pen <- function(coefs){
+        int <- integr_score(x[[added_bl]], coefs, desMat,
+                            predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+                            controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu))
+        score_vec <- t(delta) %*% x[[added_bl]] - int
+        if (is.null(pen)) pen <- 0 else pen <- (pen %*% coefs)
+        return(score_vec - as.vector(pen))
+    }
+
+    F_pen <- function(coefs){
+        Fisher_mat <- integr_fisher(x[[added_bl]], coefs, desMat,
+                                    predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+                                    controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu, which = "fisher"))
+        if (is.null(pen)) pen <- 0
+        return(list(F = Fisher_mat, F_pen = Fisher_mat + pen))
+    }
+
+    if (estimation){
+        repeat{
+            ## optimize (penalized) log-likelihood
+            result <- optim(par = coefs, fn = logLH_pen, method = "BFGS", control = list(fnscale = -1, maxit = maxit))
+
+            if (result$convergence == 0){
+                if (!is.null(pen)){
+                    ## compute (penalized and unpenalized) _observed_ Fisher matrix
+                    result$Fisher <- F_pen(result$par)
+                    if (is.null(pen)) pen <- 0
+                    ## compute estimated degrees of freedom
+                    m <- result$Fisher$F  %*% solve(result$Fisher$F_pen)
+                    result$df <- sum(diag(m))  # trace of m
+                    ## set penalty matrix NULL
+                    pen <- NULL
+                }
+                ## calculate criterion value (i.e., unpenalized log-likelihood)
+                result$maxll <- logLH_pen(result$par)
+                ## return function for (unpenalized) log-likelihood
+                result$logLH <- logLH_pen
+                return(result)
+            }
+            maxit <- maxit * 10
+            warning(sQuote("optim"), " did not converge. ",
+                    sQuote("maxit"), "was increased. For faster computation please increase ", sQuote("boost_control(maxit)"))
+            if (maxit > 5000000) stop(sQuote("optim"), " did not converge.")
+        }
+    } else {
+        pen <- NULL
+        ## return function for (unpenalized) log-likelihood
+        return(list(logLH = logLH_pen))
+    }
+}
+


Property changes on: pkg/R/PMLE.R
___________________________________________________________________
Name: svn:executable
   + *

Deleted: pkg/R/PMLE.r
===================================================================
--- pkg/R/PMLE.r	2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/PMLE.r	2009-03-02 17:24:18 UTC (rev 8)
@@ -1,147 +0,0 @@
-###################
-## Penalized MLE ##
-###################
-
-## Function for penalized ML estimation
-PMLE <- function(y, x, offset, fit, ens, nu, maxit, subdivisions = 100, estimation = TRUE){
-
-    time <- y[,1]
-    delta <- y[,2]
-
-    ## currently added base-learner
-    added_bl <- ens[length(ens)]
-    ## number of coefficients to estimate
-    coefs <- rep(0, ncol(x[[added_bl]]))
-    ## get penalty for currently added base-learner
-    pen <- attr(x[[added_bl]], "pen")
-    ## get selected base-learners (uniquely)
-    ens_uni <- unique(ens)
-
-    if(any(sapply(x[ens_uni], attr, which = "timedep"))){
-        ## make grid and compute grid-width if there is ANY time-dependent base-learner
-        sub = subdivisions      ## subdivisions of time
-        n = length(time)        ## number of observations
-
-        grid <- function(upper, length){
-            ## helper function to compute grid
-            seq(from = 0, to = upper, length = length)
-        }
-
-        ## make grid
-        grid <- lapply(time, grid, length = sub)
-
-        trapezoid_width <- rep(NA, n)
-        for (i in 1:n)
-            trapezoid_width[i] <- grid[[i]][2]  # = second element, as first element == 0 and the grid equidistant for every i
-    } else {
-        grid = NULL
-        trapezoid_width = NULL
-    }
-
-    ## remove the most recently added base-learner
-    ens_uni <- ens_uni[ens_uni != added_bl]
-
-    ## initialization (needed if either no time-dependent or no time-constant base-learner present)
-    predictions_tconst = 0
-    predictions_td = 0
-
-    ## data pre-processing
-    if(length(ens_uni) > 0){
-        ## extract time-dependency
-        timedep <- sapply(x[ens_uni], attr, which = "timedep")
-        if (any(!timedep)){
-            ## predictions for time-constant base-learners
-            predictions_tconst <- sapply(x[ens_uni[!timedep]], predict)
-            predictions_tconst <- apply(predictions_tconst, 1, sum)
-        }
-        if (any(timedep)){
-            ## prediction for time-dependent base-learners
-            predictions_td <- sapply(x[ens_uni[timedep]], predict_td, time_grid = grid)
-            predictions_td <- apply(predictions_td, 1, sum)
-            predictions_td <- matrix(predictions_td, nrow = n, ncol = sub, byrow = TRUE)
-        }
-    }
-
-    ## build design matrix for currently added base-learner
-    if (attr(x[[added_bl]], "timedep")){
-        xd <- unlist(grid)
-        xname <- get("xname", environment(attr(x[[added_bl]], "predict")))
-        zd <- get("z", environment(attr(x[[added_bl]], "predict")))
-        if (!is.null(zd)){
-            zname <- get("zname", environment(attr(x[[added_bl]], "predict")))
-            zd <- rep(zd, each = length(grid[[1]]))
-            newdata <- data.frame(cbind(xd, zd))
-            names(newdata) <- c(xname, zname)
-        } else {
-            newdata <- data.frame(xd)
-            names(newdata) <- c(xname)
-        }
-        desMat <- attr(x[[added_bl]],"designMat")(newdata = newdata)
-    } else {
-        desMat <- attr(x[[added_bl]],"designMat")()
-    }
-
-    exp_offset <- exp(offset)
-    exp_pred_tconst <- exp(predictions_tconst * nu)
-    exp_pred_td <- exp(predictions_td * nu)
-
-    logLH_pen <- function(coefs){
-        log_lik <- sum(delta * (fit + x[[added_bl]] %*% coefs)
-                       - integr(x[[added_bl]], coefs, desMat,
-                                predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
-                                controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu)))
-        if (is.null(pen)) pen <- 0 else pen <- 0.5 * (coefs %*% pen %*% coefs)
-        return(log_lik - pen)
-    }
-
-    s_pen <- function(coefs){
-        int <- integr_score(x[[added_bl]], coefs, desMat,
-                            predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
-                            controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu))
-        score_vec <- t(delta) %*% x[[added_bl]] - int
-        if (is.null(pen)) pen <- 0 else pen <- (pen %*% coefs)
-        return(score_vec - as.vector(pen))
-    }
-
-    F_pen <- function(coefs){
-        Fisher_mat <- integr_fisher(x[[added_bl]], coefs, desMat,
-                                    predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
-                                    controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu, which = "fisher"))
-        if (is.null(pen)) pen <- 0
-        return(list(F = Fisher_mat, F_pen = Fisher_mat + pen))
-    }
-
-    if (estimation){
-        repeat{
-            ## optimize (penalized) log-likelihood
-            result <- optim(par = coefs, fn = logLH_pen, method = "BFGS", control = list(fnscale = -1, maxit = maxit))
-
-            if (result$convergence == 0){
-                if (!is.null(pen)){
-                    ## compute (penalized and unpenalized) _observed_ Fisher matrix
-                    result$Fisher <- F_pen(result$par)
-                    if (is.null(pen)) pen <- 0
-                    ## compute estimated degrees of freedom
-                    m <- result$Fisher$F  %*% solve(result$Fisher$F_pen)
-                    result$df <- sum(diag(m))  # trace of m
-                    ## set penalty matrix NULL
-                    pen <- NULL
-                }
-                ## calculate criterion value (i.e., unpenalized log-likelihood)
-                result$maxll <- logLH_pen(result$par)
-                ## return function for (unpenalized) log-likelihood
-                result$logLH <- logLH_pen
-                return(result)
-            }
-            maxit <- maxit * 10
-            warning(sQuote("optim"), " did not converge. ",
-                    sQuote("maxit"), "was increased. For faster computation please increase ", sQuote("boost_control(maxit)"))
-            if (maxit > 5000000) stop(sQuote("optim"), " did not converge.")
-        }
-    } else {
-        pen <- NULL
-        ## return function for (unpenalized) log-likelihood
-        return(list(logLH = logLH_pen))
-    }
-}
-

Added: pkg/R/bbs.R
===================================================================
--- pkg/R/bbs.R	                        (rev 0)
+++ pkg/R/bbs.R	2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,137 @@
+###########################
+## P-Spline Base-Learner ##
+###########################
+
+bbsTime <- function(...){
+    bbs(..., timedep=TRUE)
+}
+
+bbs <- function(x, z = NULL, knots = 20, degree = 3, differences = 2, df = 4,
+                center = FALSE, xname = NULL, zname = NULL, timedep = FALSE) {
+    cc <- complete_cases(x = x, z = z)
+
+    if (is.null(xname)) xname <- deparse(substitute(x))
+    if (is.null(zname)) zname <- deparse(substitute(z))
+
+    if (is.factor(x) || (df <= 2 && !center))
+        return(bols(x = x, z = z, xname = xname, zname = zname))
+
+    if (!differences %in% 1:3)
+        stop(sQuote("differences"), " are not in 1:3")
+    if ((!center) && (df < differences))
+        stop(sQuote("df"), " is less than ", sQuote("differences"))
+    if(center && (degree < (differences-1)))
+        stop(sQuote("degree"), " is less than ", sQuote("differences"), "-1")
+    if (length(unique(x)) < 6)
+        stop(sQuote(xname), " has less than 6 unique values")
+
+    n.kn <- function(n) {
+        ## Number of inner knots
+        if(n < 40) n
+        else 40
+    }
+
+    if (is.null(knots)) {
+        n <- length(x)
+        nk <- n.kn(n)
+        ### ADDED: maximal 20 knots (to reduce computational burden)
+        if (nk > 20){
+            warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
+            nk <- 20
+        }
+        knots <- seq(from = min(x, na.rm = TRUE),
+                     to = max(x, na.rm = TRUE), length = nk)
+        knots <- knots[2:(length(knots) - 1)]
+    } else {
+        if (length(unique(diff(knots))) > 1)
+            warning("non-equidistant ", sQuote("knots"),
+                    " might be inappropriate")
+    }
+
+    if (length(knots) == 1) {
+        ### ADDED: maximal 20 knots (to reduce computational burden)
+        if (knots > 20){
+            warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
+            knots <- 20
+        }
+        knots <- seq(from = min(x, na.rm = TRUE),
+                     to = max(x, na.rm = TRUE), length = knots+2)
+        knots <- knots[2:(length(knots) - 1)]
+    }
+    boundary.knots <- range(x, na.rm = TRUE)
+
+    newX <- function(x, z = NULL, na.rm = TRUE) {
+        if (na.rm) {
+            x <- x[cc]
+            if (!is.null(z))
+                z <- z[cc]
+        }
+        X <- bs(x, knots = knots, degree = degree, intercept = TRUE,
+                Boundary.knots = boundary.knots)
+        if (!is.null(z))
+            X <- X * z
+        if (center) {
+            K <- diff(diag(ncol(X)), differences = differences)
+            X <- tcrossprod(X, K) %*% solve(tcrossprod(K))
+        }
+        return(X)
+    }
+
+    X <- newX(x, z)
+    Xna <- X
+    if (any(!cc))
+        Xna <- newX(x, z, na.rm = FALSE)
+
+    if (center) {
+        K <- diag(ncol(X))
+    } else {
+        K <- diff(diag(ncol(X)), differences = differences)
+        K <- crossprod(K, K)
+    }
+
+    predictfun <- function(coefs, newdata = NULL) {
+        if (is.null(newdata)) return(Xna %*% coefs)
+        nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+        nX %*% coefs
+    }
+
+    designMat <- function(newdata = NULL){
+        if (is.null(newdata)) return(Xna)
+        newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+    }
+
+    lambda <- NULL
+    pen <- NULL
+
+    df2lambda <- function(y, offset){
+        ## FIXME: was ist mit nu und maxit ##
+        dummy <- helper_fct(y, X, offset, pen = K)
+        df2l <- function(lambda, df){
+            tmp <- dummy(lambda)$F %*% solve(dummy(lambda)$F_pen)
+            sum(diag(tmp)) - df
+        }
+        upper <- 1000
+        while (TRUE){
+            result <- try(uniroot(f= df2l, interval = c(1,upper), df=df), silent=TRUE)
+            if (!inherits(result, "try-error")) break
+            upper <- upper * 100
+            if (upper == Inf) stop("could not solve ", sQuote("df2lambda()"))
+        }
+        lambda <<- result$root
+        pen <<- lambda * K
+        result$root
+    }
+
+    attr(X, "designMat") <- designMat
+    attr(X, "df") <- df
+    attr(X, "df2lambda") <- df2lambda
+    attr(X, "lambda") <- getlambda <- function() {
+        if(!is.null(lambda)){ return(lambda) } else { stop("Call ", sQuote('attr( ,"df2lambda")'), " for ", sQuote("bbs()"), " base-learner first") }}
+    attr(X, "pen") <- getpen <- function() {
+        if(!is.null(pen)){ return(pen) } else { stop("Call ", sQuote('attr( ,"df2lambda")'), " for ", sQuote("bbs()"), " base-learner first") }}
+    attr(X, "timedep") <- timedep
+    attr(X, "coefs") <- rep(0, ncol(X))
+    attr(X, "predict") <- predictfun
+    class(X) <- c("baselearner", "bbs")
+    return(X)
+}

Deleted: pkg/R/bbs.r
===================================================================
--- pkg/R/bbs.r	2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/bbs.r	2009-03-02 17:24:18 UTC (rev 8)
@@ -1,131 +0,0 @@
-###########################
-## P-Spline Base-Learner ##
-###########################
-
-bbsTime <- function(...){
-    bbs(..., timedep=TRUE)
-}
-
-bbs <- function(x, z = NULL, knots = 20, degree = 3, differences = 2, df = 4,
-                center = FALSE, xname = NULL, zname = NULL, timedep = FALSE) {
-    cc <- mboost:::complete_cases(x = x, z = z)
-
-    if (is.null(xname)) xname <- deparse(substitute(x))
-    if (is.null(zname)) zname <- deparse(substitute(z))
-
-    if (is.factor(x) || (df <= 2 && !center))
-        return(bols(x = x, z = z, xname = xname, zname = zname))
-
-    if (!differences %in% 1:3)
-        stop(sQuote("differences"), " are not in 1:3")
-    if ((!center) && (df < differences))
-        stop(sQuote("df"), " is less than ", sQuote("differences"))
-    if(center && (degree < (differences-1)))
-        stop(sQuote("degree"), " is less than ", sQuote("differences"), "-1")
-    if (length(unique(x)) < 6)
-        stop(sQuote(xname), " has less than 6 unique values")
-
-    n.kn <- function(n) {
-        ## Number of inner knots
-        if(n < 40) n
-        else 40
-    }
-
-    if (is.null(knots)) {
-        n <- length(x)
-        nk <- n.kn(n)
-        ### ADDED: maximal 20 knots (to reduce computational burden)
-        if (nk > 20){
-            warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
-            nk <- 20
-        }
-        knots <- seq(from = min(x, na.rm = TRUE),
-                     to = max(x, na.rm = TRUE), length = nk)
-        knots <- knots[2:(length(knots) - 1)]
-    } else {
-        if (length(unique(diff(knots))) > 1)
-            warning("non-equidistant ", sQuote("knots"),
-                    " might be inappropriate")
-    }
-
-    if (length(knots) == 1) {
-        ### ADDED: maximal 20 knots (to reduce computational burden)
-        if (knots > 20){
-            warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
-            knots <- 20
-        }
-        knots <- seq(from = min(x, na.rm = TRUE),
-                     to = max(x, na.rm = TRUE), length = knots+2)
-        knots <- knots[2:(length(knots) - 1)]
-    }
-    boundary.knots <- range(x, na.rm = TRUE)
-
-    newX <- function(x, z = NULL, na.rm = TRUE) {
-        if (na.rm) {
-            x <- x[cc]
-            if (!is.null(z))
-                z <- z[cc]
-        }
-        X <- bs(x, knots = knots, degree = degree, intercept = TRUE,
-                Boundary.knots = boundary.knots)
-        if (!is.null(z))
-            X <- X * z
-        if (center) {
-            K <- diff(diag(ncol(X)), differences = differences)
-            X <- tcrossprod(X, K) %*% solve(tcrossprod(K))
-        }
-        return(X)
-    }
-
-    X <- newX(x, z)
-    Xna <- X
-    if (any(!cc))
-        Xna <- newX(x, z, na.rm = FALSE)
-
-    if (center) {
-        K <- diag(ncol(X))
-    } else {
-        K <- diff(diag(ncol(X)), differences = differences)
-        K <- crossprod(K, K)
-    }
-
-    predictfun <- function(coefs, newdata = NULL) {
-        if (is.null(newdata)) return(Xna %*% coefs)
-        nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
-        nX %*% coefs
-    }
-
-    designMat <- function(newdata = NULL){
-        if (is.null(newdata)) return(Xna)
-        newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
-    }
-
-    lambda <- mboost:::df2lambda(X, df = df, dmat = K, weights =rep(1,nrow(X)))
-
-    df2lambda <- function(y, offset){
-        ## FIXME: was ist mit nu und maxit ##
-        ## FIXME: bessere Funktionenname
-        dummy <- helper_fct(y, X, offset, pen = K)
-        df2l <- function(lambda, df){
-            tmp <- dummy(lambda)$F %*% solve(dummy(lambda)$F_pen)
-            sum(diag(tmp)) - df
-        }
-        ## FIXME: lambda gibt es später nicht mehr - wie ist obere Intervallgrenze? ##
-        result <- uniroot(f= df2l, interval = c(0,lambda + 100), df=df)
-        result$root
-    }
-
-    penalty <- function(lambda){
-        lambda * K
-    }
-
-    attr(X, "designMat") <- designMat
-    attr(X, "df") <- df
-    attr(X, "lambda") <- df2lambda
-    attr(X, "pen") <- penalty
-    attr(X, "timedep") <- timedep
-    attr(X, "coefs") <- rep(0, ncol(X))
-    attr(X, "predict") <- predictfun
-    class(X) <- c("baselearner", "bbs")
-    return(X)
-}

Added: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R	                        (rev 0)
+++ pkg/R/bols.R	2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,55 @@
+#########################
+## Linear Base-Learner ##
+#########################
+
+bolsTime <- function(...){
+    bols(..., timedep=TRUE)
+}
+
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
+
+    if (is.null(xname)) xname = deparse(substitute(x))
+    if (is.null(zname)) zname = deparse(substitute(z))
+
+    cc <- complete_cases(x = x, z = z)
+
+    newX <- function(x, z = NULL, na.rm = TRUE) {
+        if (na.rm) {
+            x <- x[cc]
+            if (!is.null(z))
+                z <- z[cc]
+        }
+        X <- model.matrix(~ x)
+        if (any(!cc) & !na.rm) {
+            Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
+            Xtmp[cc,] <- X
+            X <- Xtmp
+        }
+        if (!is.null(z)) X <- X * z
+        X
+    }
+    X <- newX(x, z)
+    Xna <- X
+    if (any(!cc))
+        Xna <- newX(x, z, na.rm = FALSE)
+
+    predictfun <- function(coefs, newdata = NULL) {
+        if (is.null(newdata)) return(Xna %*% coefs)
+        nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+        nX %*% coefs
+    }
+
+    designMat <- function(newdata = NULL){
+        if (is.null(newdata)) return(Xna)
+        newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+    }
+
+    attr(X, "designMat") <- designMat
+    attr(X, "timedep") <- timedep
+    attr(X, "coefs") <- rep(0, ncol(X))
+    attr(X, "predict") <- predictfun
+    class(X) <- c("baselearner", "bols")
+    return(X)
+}
+
+

Deleted: pkg/R/bols.r
===================================================================
--- pkg/R/bols.r	2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/bols.r	2009-03-02 17:24:18 UTC (rev 8)
@@ -1,55 +0,0 @@
-#########################
-## Linear Base-Learner ##
-#########################
-
-bolsTime <- function(...){
-    bols(..., timedep=TRUE)
-}
-
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
-
-    if (is.null(xname)) xname = deparse(substitute(x))
-    if (is.null(zname)) zname = deparse(substitute(z))
-
-    cc <- mboost:::complete_cases(x = x, z = z)
-
-    newX <- function(x, z = NULL, na.rm = TRUE) {
-        if (na.rm) {
-            x <- x[cc]
-            if (!is.null(z))
-                z <- z[cc]
-        }
-        X <- model.matrix(~ x)
-        if (any(!cc) & !na.rm) {
-            Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
-            Xtmp[cc,] <- X
-            X <- Xtmp
-        }
-        if (!is.null(z)) X <- X * z
-        X
-    }
-    X <- newX(x, z)
-    Xna <- X
-    if (any(!cc))
-        Xna <- newX(x, z, na.rm = FALSE)
-
-    predictfun <- function(coefs, newdata = NULL) {
-        if (is.null(newdata)) return(Xna %*% coefs)
-        nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
-        nX %*% coefs
-    }
-
-    designMat <- function(newdata = NULL){
-        if (is.null(newdata)) return(Xna)
-        newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
-    }
-
-    attr(X, "designMat") <- designMat
-    attr(X, "timedep") <- timedep
-    attr(X, "coefs") <- rep(0, ncol(X))
-    attr(X, "predict") <- predictfun
-    class(X) <- c("baselearner", "bols")
-    return(X)
-}
-
-

Added: pkg/R/cfboost.R
===================================================================
--- pkg/R/cfboost.R	                        (rev 0)
+++ pkg/R/cfboost.R	2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,186 @@
+########################
+## Boosting Algorithm ##
+########################
+
+### generic method for likelihood-based boosting with component-wise P-splines
+### for fitting Cox-type additive models (flexible Cox boosting)
+cfboost <- function(x, ...)
+    UseMethod("cfboost")
+
+### formula interface
+cfboost.formula <- function(formula, data = list(), weights = NULL, na.action = na.omit,  control = boost_control(), ...) {
+    ## construct design matrix etc.
+    object <- boost_dpp(formula, data, weights, na.action)
+    ## fit the ensemble
+    object$input <- object$menv at get("input")
+    if (!is.null(weights))
+        object$oob$input <- object$oob$menv at get("input")
+    RET <- cfboost_fit(object, control = control, data = data, weights = weights, ...)
+    RET$call <- match.call()
+    return(RET)
+}
+
+### fitting-function
+cfboost_fit <- function(object, control = boost_control(), data, weights = NULL, ...) {
+
+    ## data and base-learner
+    x <- object$input
+    class(x) <- "list"
+
+    oob <- list(x = object$oob$input , y = object$oob$y)
+    if (!is.null(oob$x)) class(oob$x) <- "list"
+
+    y <- object$y
+    if (!inherits(y, "Surv")) stop("response is not an object of class ", sQuote("Surv"))
+
+    ## hyper parameters
+    mstop <- control$mstop
+    risk <- control$risk
+    nu <- control$nu
+    trace <- control$trace
+    tracestep <- options("width")$width / 2
+    maxit <- control$maxit # maximum iterations in optim (see PMLE())
+    which.offset <- control$which.offset
+    hardStop <- control$hardStop # option: "continue boosting if minimum not reached" or "stop"
+
+    ## check if (enough) oob-data is present if risk="oobag"
+    if (risk == "oobag"){
+        checksum <- sum(weights==0)
+        if(checksum == 0)
+            stop("All observations are used for estimation.\nSpecify some weights equal to zero for ", sQuote("risk = oobag"))
+        if (checksum < length(weights)/10)
+            warning("Less than 1/10 of the data used for out-of-bag risk.\n", sQuote("object$risk"), " might be misleading.")
+    }
+
+    ## the ensemble
+    ens <- rep(NA, mstop)
+    ensss <- vector(mode = "list", length = mstop)
+
+    ## vector of empirical risks for all boosting iterations
+    mrisk <- numeric(mstop)
+    mrisk[1:mstop] <- NA
+
+    maxll <- numeric(length(x))
+    coefs <- logLH <- vector(mode = "list", length = length(x))
+
+    fit <- fit_oob <- offset <- getoffset(y, which.offset)
+    if (trace)
+        cat("Offset: ", offset, "\n")
+
+    mstart <- 1
+    hSi <- 1     # number of iterations in the repeat loop
+    df_est <- matrix(NA, nrow = mstop, ncol = length(x)) # matrix of estimated degrees of freedom
+
+    ## compute df2lambda which depends on the offset and on y
+    for (i in 1:length(x)){
+        if (!is.null( attr(x[[i]], "df"))){
+            attr(x[[i]], "df2lambda")(y, offset)
+        }
+    }
+
+    ##################################
+    #### start boosting iteration ####
+    ##################################
+    repeat{
+      for (m in mstart:mstop) {
+        if (trace)
+          cat("Step ", m, "\n")
+
+        ## fit MLE component-wise
+        for (i in 1:length(x)) {
+            maxll[i] <- NA
+            dummy_ens <- ens[1:m]   # get the first m-1 selected base-learners
+            dummy_ens[m] <- i       # and set the m-th base-learner temporarily to i
+            ## try to compute the (component-wise) penalized MLE
+            dummy <- try(PMLE(y, x, offset, fit, dummy_ens, nu, maxit))
+            if (inherits(dummy, "try-error")) next
+            coefs[[i]] <- dummy$par
+            maxll[i] <- dummy$maxll
+            logLH[[i]] <- dummy$logLH
+            if (!is.null(dummy$df)) df_est[m,i] <- dummy$df
+        }
+        if (all(is.na(maxll)))
+            stop("could not fit base learner in boosting iteration ", m)
+
+        ## select base-learner
+        xselect <- which.max(maxll)
+
+        ## output for debugging
+        if (trace)
+            cat("\tSelected: ", xselect, "   with log LH ", maxll[xselect],"\n", sep = "")
+        ## update step
+        fit <- fit + nu * x[[xselect]] %*% coefs[[xselect]]
+
+        ## save the model, i.e., the selected coefficient and base-learner
+        ens[m] <- xselect
+        ensss[[m]] <- coefs[[xselect]]
+
+        ## save updated parameters in x[[xselect]]
+        x[[xselect]] <- updatecoefs(x[[xselect]], coefs[[xselect]])
+
+        if (risk == "inbag"){
+            mrisk[m] <- - logLH[[xselect]](coefs[[xselect]] * nu)
+            if (trace)
+                cat("\trisk (inbag) = ", mrisk[m], "\n")
+        }
+        if (risk == "oobag"){
+            ## make a call to PMLE function to built matrices and get the logLH function (without estimation of coefficients)
+            dummy <- PMLE(oob$y, oob$x, offset, fit_oob, ens[1:m], nu, maxit, estimation = FALSE)
+            mrisk[m] <- - dummy$logLH(coefs[[xselect]] * nu)
+            if (trace)
+                cat("\trisk (oobag) = ", mrisk[m], "\n")
+
+            ## update step for oob data
+            oob$x[[xselect]] <- updatecoefs(oob$x[[xselect]], coefs[[xselect]])
+            fit_oob <- fit_oob + nu *  oob$x[[xselect]] %*% coefs[[xselect]]
+        }
+      }
+      if (hardStop | risk != "oobag" | which.min(mrisk) < (mstop - mstop * 0.2/hSi) | hSi == 5) break
+
+      ## else if minimum is at the end of the boosting algorithm,
+      ## don't stop but proceed with boosting: Therefore, increase mstop
+      ## and the length of the arrays and vectors for storage of results
+      warning("risk in test sample seems not to be minimal. ", sQuote("mstop"), " increased.")
+
+      hSi <- hSi + 1
+      increase <- 100
+      ens <-  c(ens, rep(NA, increase))
+      dummy <- vector("list", length = (mstop + increase))
+      dummy[1:mstop] <- ensss
+      ensss <- dummy
+      mrisk <- c(mrisk, rep(NA, increase))
+      df_est <- rbind(df_est, matrix(NA, nrow = increase, ncol = ncol(df_est)))# matrix of estimated degrees of freedom
+      mstart <- mstop + 1
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/coxflexboost -r 8


More information about the Coxflexboost-commits mailing list