[Coxflexboost-commits] r8 - in pkg: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Mar 2 18:24:19 CET 2009
Author: hofner
Date: 2009-03-02 18:24:18 +0100 (Mon, 02 Mar 2009)
New Revision: 8
Added:
pkg/R/PMLE.R
pkg/R/bbs.R
pkg/R/bols.R
pkg/R/cfboost.R
pkg/R/helpers.R
pkg/R/integr.R
pkg/R/integr_fisher.R
pkg/R/integr_score.R
pkg/R/methods.R
pkg/R/mstop.R
pkg/R/rSurvTime.R
pkg/R/risk.R
Removed:
pkg/R/PMLE.r
pkg/R/bbs.r
pkg/R/bols.r
pkg/R/cfboost.r
pkg/R/helpers.r
pkg/R/integr.r
pkg/R/integr_fisher.r
pkg/R/integr_score.r
pkg/R/methods.r
pkg/R/mstop.r
pkg/R/rSurvTime.r
pkg/R/risk.r
Modified:
pkg/DESCRIPTION
pkg/NAMESPACE
pkg/inst/CHANGES
pkg/man/CoxFlexBoost-package.Rd
Log:
- changed assignment and storrage of lambda and penalty in bbs objects
- changed computations (of upper boundary) in df2lambda
- changed dependencies: droped mboost (as it depends on many other packages) and added modeltools instead
- renamed *.r to *.R
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/DESCRIPTION 2009-03-02 17:24:18 UTC (rev 8)
@@ -1,14 +1,14 @@
Package: CoxFlexBoost
Type: Package
Title: Boosting Flexible Cox Models (with Time-Varying Effects)
-Version: 0.6-0
-Date: 2008-12-09
+Version: 0.7-0
+Date: 2009-03-XX
Author: Benjamin Hofner
Maintainer: Benjamin Hofner <benjamin.hofner at imbe.med.uni-erlangen.de>
Description: Likelihood-based boosting approach to fit flexible,
structured survival models with component-wise linear or P-spline
base-learners. Variable selection and model choice are built in
features.
-Depends: methods, mboost, survival
+Depends: methods, survival, modeltools
License: GPL-2
LazyLoad: yes
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/NAMESPACE 2009-03-02 17:24:18 UTC (rev 8)
@@ -1,4 +1,5 @@
-import(mboost)
+#import(mboost)
+import(modeltools)
export(cfboost, boost_control, mstop, risk, freq.sel,
bbs, bbsTime, bols, bolsTime, rSurvTime)
Added: pkg/R/PMLE.R
===================================================================
--- pkg/R/PMLE.R (rev 0)
+++ pkg/R/PMLE.R 2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,148 @@
+###################
+## Penalized MLE ##
+###################
+
+## Function for penalized ML estimation
+PMLE <- function(y, x, offset, fit, ens, nu, maxit, subdivisions = 100, estimation = TRUE){
+
+ time <- y[,1]
+ delta <- y[,2]
+
+ ## currently added base-learner
+ added_bl <- ens[length(ens)]
+ ## number of coefficients to estimate
+ coefs <- rep(0, ncol(x[[added_bl]]))
+ ## get penalty for currently added base-learner
+ if (estimation)
+ pen <- penalty(x[[added_bl]])
+ ## get selected base-learners (uniquely)
+ ens_uni <- unique(ens)
+
+ if(any(sapply(x[ens_uni], attr, which = "timedep"))){
+ ## make grid and compute grid-width if there is ANY time-dependent base-learner
+ sub = subdivisions ## subdivisions of time
+ n = length(time) ## number of observations
+
+ grid <- function(upper, length){
+ ## helper function to compute grid
+ seq(from = 0, to = upper, length = length)
+ }
+
+ ## make grid
+ grid <- lapply(time, grid, length = sub)
+
+ trapezoid_width <- rep(NA, n)
+ for (i in 1:n)
+ trapezoid_width[i] <- grid[[i]][2] # = second element, as first element == 0 and the grid equidistant for every i
+ } else {
+ grid = NULL
+ trapezoid_width = NULL
+ }
+
+ ## remove the most recently added base-learner
+ ens_uni <- ens_uni[ens_uni != added_bl]
+
+ ## initialization (needed if either no time-dependent or no time-constant base-learner present)
+ predictions_tconst = 0
+ predictions_td = 0
+
+ ## data pre-processing
+ if(length(ens_uni) > 0){
+ ## extract time-dependency
+ timedep <- sapply(x[ens_uni], attr, which = "timedep")
+ if (any(!timedep)){
+ ## predictions for time-constant base-learners
+ predictions_tconst <- sapply(x[ens_uni[!timedep]], predict)
+ predictions_tconst <- apply(predictions_tconst, 1, sum)
+ }
+ if (any(timedep)){
+ ## prediction for time-dependent base-learners
+ predictions_td <- sapply(x[ens_uni[timedep]], predict_td, time_grid = grid)
+ predictions_td <- apply(predictions_td, 1, sum)
+ predictions_td <- matrix(predictions_td, nrow = n, ncol = sub, byrow = TRUE)
+ }
+ }
+
+ ## build design matrix for currently added base-learner
+ if (attr(x[[added_bl]], "timedep")){
+ xd <- unlist(grid)
+ xname <- get("xname", environment(attr(x[[added_bl]], "predict")))
+ zd <- get("z", environment(attr(x[[added_bl]], "predict")))
+ if (!is.null(zd)){
+ zname <- get("zname", environment(attr(x[[added_bl]], "predict")))
+ zd <- rep(zd, each = length(grid[[1]]))
+ newdata <- data.frame(cbind(xd, zd))
+ names(newdata) <- c(xname, zname)
+ } else {
+ newdata <- data.frame(xd)
+ names(newdata) <- c(xname)
+ }
+ desMat <- attr(x[[added_bl]],"designMat")(newdata = newdata)
+ } else {
+ desMat <- attr(x[[added_bl]],"designMat")()
+ }
+
+ exp_offset <- exp(offset)
+ exp_pred_tconst <- exp(predictions_tconst * nu)
+ exp_pred_td <- exp(predictions_td * nu)
+
+ logLH_pen <- function(coefs){
+ log_lik <- sum(delta * (fit + x[[added_bl]] %*% coefs)
+ - integr(x[[added_bl]], coefs, desMat,
+ predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+ controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu)))
+ if (is.null(pen)) pen <- 0 else pen <- 0.5 * (coefs %*% pen %*% coefs)
+ return(log_lik - pen)
+ }
+
+ s_pen <- function(coefs){
+ int <- integr_score(x[[added_bl]], coefs, desMat,
+ predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+ controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu))
+ score_vec <- t(delta) %*% x[[added_bl]] - int
+ if (is.null(pen)) pen <- 0 else pen <- (pen %*% coefs)
+ return(score_vec - as.vector(pen))
+ }
+
+ F_pen <- function(coefs){
+ Fisher_mat <- integr_fisher(x[[added_bl]], coefs, desMat,
+ predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
+ controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu, which = "fisher"))
+ if (is.null(pen)) pen <- 0
+ return(list(F = Fisher_mat, F_pen = Fisher_mat + pen))
+ }
+
+ if (estimation){
+ repeat{
+ ## optimize (penalized) log-likelihood
+ result <- optim(par = coefs, fn = logLH_pen, method = "BFGS", control = list(fnscale = -1, maxit = maxit))
+
+ if (result$convergence == 0){
+ if (!is.null(pen)){
+ ## compute (penalized and unpenalized) _observed_ Fisher matrix
+ result$Fisher <- F_pen(result$par)
+ if (is.null(pen)) pen <- 0
+ ## compute estimated degrees of freedom
+ m <- result$Fisher$F %*% solve(result$Fisher$F_pen)
+ result$df <- sum(diag(m)) # trace of m
+ ## set penalty matrix NULL
+ pen <- NULL
+ }
+ ## calculate criterion value (i.e., unpenalized log-likelihood)
+ result$maxll <- logLH_pen(result$par)
+ ## return function for (unpenalized) log-likelihood
+ result$logLH <- logLH_pen
+ return(result)
+ }
+ maxit <- maxit * 10
+ warning(sQuote("optim"), " did not converge. ",
+ sQuote("maxit"), "was increased. For faster computation please increase ", sQuote("boost_control(maxit)"))
+ if (maxit > 5000000) stop(sQuote("optim"), " did not converge.")
+ }
+ } else {
+ pen <- NULL
+ ## return function for (unpenalized) log-likelihood
+ return(list(logLH = logLH_pen))
+ }
+}
+
Property changes on: pkg/R/PMLE.R
___________________________________________________________________
Name: svn:executable
+ *
Deleted: pkg/R/PMLE.r
===================================================================
--- pkg/R/PMLE.r 2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/PMLE.r 2009-03-02 17:24:18 UTC (rev 8)
@@ -1,147 +0,0 @@
-###################
-## Penalized MLE ##
-###################
-
-## Function for penalized ML estimation
-PMLE <- function(y, x, offset, fit, ens, nu, maxit, subdivisions = 100, estimation = TRUE){
-
- time <- y[,1]
- delta <- y[,2]
-
- ## currently added base-learner
- added_bl <- ens[length(ens)]
- ## number of coefficients to estimate
- coefs <- rep(0, ncol(x[[added_bl]]))
- ## get penalty for currently added base-learner
- pen <- attr(x[[added_bl]], "pen")
- ## get selected base-learners (uniquely)
- ens_uni <- unique(ens)
-
- if(any(sapply(x[ens_uni], attr, which = "timedep"))){
- ## make grid and compute grid-width if there is ANY time-dependent base-learner
- sub = subdivisions ## subdivisions of time
- n = length(time) ## number of observations
-
- grid <- function(upper, length){
- ## helper function to compute grid
- seq(from = 0, to = upper, length = length)
- }
-
- ## make grid
- grid <- lapply(time, grid, length = sub)
-
- trapezoid_width <- rep(NA, n)
- for (i in 1:n)
- trapezoid_width[i] <- grid[[i]][2] # = second element, as first element == 0 and the grid equidistant for every i
- } else {
- grid = NULL
- trapezoid_width = NULL
- }
-
- ## remove the most recently added base-learner
- ens_uni <- ens_uni[ens_uni != added_bl]
-
- ## initialization (needed if either no time-dependent or no time-constant base-learner present)
- predictions_tconst = 0
- predictions_td = 0
-
- ## data pre-processing
- if(length(ens_uni) > 0){
- ## extract time-dependency
- timedep <- sapply(x[ens_uni], attr, which = "timedep")
- if (any(!timedep)){
- ## predictions for time-constant base-learners
- predictions_tconst <- sapply(x[ens_uni[!timedep]], predict)
- predictions_tconst <- apply(predictions_tconst, 1, sum)
- }
- if (any(timedep)){
- ## prediction for time-dependent base-learners
- predictions_td <- sapply(x[ens_uni[timedep]], predict_td, time_grid = grid)
- predictions_td <- apply(predictions_td, 1, sum)
- predictions_td <- matrix(predictions_td, nrow = n, ncol = sub, byrow = TRUE)
- }
- }
-
- ## build design matrix for currently added base-learner
- if (attr(x[[added_bl]], "timedep")){
- xd <- unlist(grid)
- xname <- get("xname", environment(attr(x[[added_bl]], "predict")))
- zd <- get("z", environment(attr(x[[added_bl]], "predict")))
- if (!is.null(zd)){
- zname <- get("zname", environment(attr(x[[added_bl]], "predict")))
- zd <- rep(zd, each = length(grid[[1]]))
- newdata <- data.frame(cbind(xd, zd))
- names(newdata) <- c(xname, zname)
- } else {
- newdata <- data.frame(xd)
- names(newdata) <- c(xname)
- }
- desMat <- attr(x[[added_bl]],"designMat")(newdata = newdata)
- } else {
- desMat <- attr(x[[added_bl]],"designMat")()
- }
-
- exp_offset <- exp(offset)
- exp_pred_tconst <- exp(predictions_tconst * nu)
- exp_pred_td <- exp(predictions_td * nu)
-
- logLH_pen <- function(coefs){
- log_lik <- sum(delta * (fit + x[[added_bl]] %*% coefs)
- - integr(x[[added_bl]], coefs, desMat,
- predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
- controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu)))
- if (is.null(pen)) pen <- 0 else pen <- 0.5 * (coefs %*% pen %*% coefs)
- return(log_lik - pen)
- }
-
- s_pen <- function(coefs){
- int <- integr_score(x[[added_bl]], coefs, desMat,
- predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
- controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu))
- score_vec <- t(delta) %*% x[[added_bl]] - int
- if (is.null(pen)) pen <- 0 else pen <- (pen %*% coefs)
- return(score_vec - as.vector(pen))
- }
-
- F_pen <- function(coefs){
- Fisher_mat <- integr_fisher(x[[added_bl]], coefs, desMat,
- predictions = list(offset = exp_offset, tconst = exp_pred_tconst, td = exp_pred_td),
- controls = list(grid = grid, trapezoid_width = trapezoid_width, upper = time, nu = nu, which = "fisher"))
- if (is.null(pen)) pen <- 0
- return(list(F = Fisher_mat, F_pen = Fisher_mat + pen))
- }
-
- if (estimation){
- repeat{
- ## optimize (penalized) log-likelihood
- result <- optim(par = coefs, fn = logLH_pen, method = "BFGS", control = list(fnscale = -1, maxit = maxit))
-
- if (result$convergence == 0){
- if (!is.null(pen)){
- ## compute (penalized and unpenalized) _observed_ Fisher matrix
- result$Fisher <- F_pen(result$par)
- if (is.null(pen)) pen <- 0
- ## compute estimated degrees of freedom
- m <- result$Fisher$F %*% solve(result$Fisher$F_pen)
- result$df <- sum(diag(m)) # trace of m
- ## set penalty matrix NULL
- pen <- NULL
- }
- ## calculate criterion value (i.e., unpenalized log-likelihood)
- result$maxll <- logLH_pen(result$par)
- ## return function for (unpenalized) log-likelihood
- result$logLH <- logLH_pen
- return(result)
- }
- maxit <- maxit * 10
- warning(sQuote("optim"), " did not converge. ",
- sQuote("maxit"), "was increased. For faster computation please increase ", sQuote("boost_control(maxit)"))
- if (maxit > 5000000) stop(sQuote("optim"), " did not converge.")
- }
- } else {
- pen <- NULL
- ## return function for (unpenalized) log-likelihood
- return(list(logLH = logLH_pen))
- }
-}
-
Added: pkg/R/bbs.R
===================================================================
--- pkg/R/bbs.R (rev 0)
+++ pkg/R/bbs.R 2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,137 @@
+###########################
+## P-Spline Base-Learner ##
+###########################
+
+bbsTime <- function(...){
+ bbs(..., timedep=TRUE)
+}
+
+bbs <- function(x, z = NULL, knots = 20, degree = 3, differences = 2, df = 4,
+ center = FALSE, xname = NULL, zname = NULL, timedep = FALSE) {
+ cc <- complete_cases(x = x, z = z)
+
+ if (is.null(xname)) xname <- deparse(substitute(x))
+ if (is.null(zname)) zname <- deparse(substitute(z))
+
+ if (is.factor(x) || (df <= 2 && !center))
+ return(bols(x = x, z = z, xname = xname, zname = zname))
+
+ if (!differences %in% 1:3)
+ stop(sQuote("differences"), " are not in 1:3")
+ if ((!center) && (df < differences))
+ stop(sQuote("df"), " is less than ", sQuote("differences"))
+ if(center && (degree < (differences-1)))
+ stop(sQuote("degree"), " is less than ", sQuote("differences"), "-1")
+ if (length(unique(x)) < 6)
+ stop(sQuote(xname), " has less than 6 unique values")
+
+ n.kn <- function(n) {
+ ## Number of inner knots
+ if(n < 40) n
+ else 40
+ }
+
+ if (is.null(knots)) {
+ n <- length(x)
+ nk <- n.kn(n)
+ ### ADDED: maximal 20 knots (to reduce computational burden)
+ if (nk > 20){
+ warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
+ nk <- 20
+ }
+ knots <- seq(from = min(x, na.rm = TRUE),
+ to = max(x, na.rm = TRUE), length = nk)
+ knots <- knots[2:(length(knots) - 1)]
+ } else {
+ if (length(unique(diff(knots))) > 1)
+ warning("non-equidistant ", sQuote("knots"),
+ " might be inappropriate")
+ }
+
+ if (length(knots) == 1) {
+ ### ADDED: maximal 20 knots (to reduce computational burden)
+ if (knots > 20){
+ warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
+ knots <- 20
+ }
+ knots <- seq(from = min(x, na.rm = TRUE),
+ to = max(x, na.rm = TRUE), length = knots+2)
+ knots <- knots[2:(length(knots) - 1)]
+ }
+ boundary.knots <- range(x, na.rm = TRUE)
+
+ newX <- function(x, z = NULL, na.rm = TRUE) {
+ if (na.rm) {
+ x <- x[cc]
+ if (!is.null(z))
+ z <- z[cc]
+ }
+ X <- bs(x, knots = knots, degree = degree, intercept = TRUE,
+ Boundary.knots = boundary.knots)
+ if (!is.null(z))
+ X <- X * z
+ if (center) {
+ K <- diff(diag(ncol(X)), differences = differences)
+ X <- tcrossprod(X, K) %*% solve(tcrossprod(K))
+ }
+ return(X)
+ }
+
+ X <- newX(x, z)
+ Xna <- X
+ if (any(!cc))
+ Xna <- newX(x, z, na.rm = FALSE)
+
+ if (center) {
+ K <- diag(ncol(X))
+ } else {
+ K <- diff(diag(ncol(X)), differences = differences)
+ K <- crossprod(K, K)
+ }
+
+ predictfun <- function(coefs, newdata = NULL) {
+ if (is.null(newdata)) return(Xna %*% coefs)
+ nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+ nX %*% coefs
+ }
+
+ designMat <- function(newdata = NULL){
+ if (is.null(newdata)) return(Xna)
+ newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+ }
+
+ lambda <- NULL
+ pen <- NULL
+
+ df2lambda <- function(y, offset){
+ ## FIXME: was ist mit nu und maxit ##
+ dummy <- helper_fct(y, X, offset, pen = K)
+ df2l <- function(lambda, df){
+ tmp <- dummy(lambda)$F %*% solve(dummy(lambda)$F_pen)
+ sum(diag(tmp)) - df
+ }
+ upper <- 1000
+ while (TRUE){
+ result <- try(uniroot(f= df2l, interval = c(1,upper), df=df), silent=TRUE)
+ if (!inherits(result, "try-error")) break
+ upper <- upper * 100
+ if (upper == Inf) stop("could not solve ", sQuote("df2lambda()"))
+ }
+ lambda <<- result$root
+ pen <<- lambda * K
+ result$root
+ }
+
+ attr(X, "designMat") <- designMat
+ attr(X, "df") <- df
+ attr(X, "df2lambda") <- df2lambda
+ attr(X, "lambda") <- getlambda <- function() {
+ if(!is.null(lambda)){ return(lambda) } else { stop("Call ", sQuote('attr( ,"df2lambda")'), " for ", sQuote("bbs()"), " base-learner first") }}
+ attr(X, "pen") <- getpen <- function() {
+ if(!is.null(pen)){ return(pen) } else { stop("Call ", sQuote('attr( ,"df2lambda")'), " for ", sQuote("bbs()"), " base-learner first") }}
+ attr(X, "timedep") <- timedep
+ attr(X, "coefs") <- rep(0, ncol(X))
+ attr(X, "predict") <- predictfun
+ class(X) <- c("baselearner", "bbs")
+ return(X)
+}
Deleted: pkg/R/bbs.r
===================================================================
--- pkg/R/bbs.r 2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/bbs.r 2009-03-02 17:24:18 UTC (rev 8)
@@ -1,131 +0,0 @@
-###########################
-## P-Spline Base-Learner ##
-###########################
-
-bbsTime <- function(...){
- bbs(..., timedep=TRUE)
-}
-
-bbs <- function(x, z = NULL, knots = 20, degree = 3, differences = 2, df = 4,
- center = FALSE, xname = NULL, zname = NULL, timedep = FALSE) {
- cc <- mboost:::complete_cases(x = x, z = z)
-
- if (is.null(xname)) xname <- deparse(substitute(x))
- if (is.null(zname)) zname <- deparse(substitute(z))
-
- if (is.factor(x) || (df <= 2 && !center))
- return(bols(x = x, z = z, xname = xname, zname = zname))
-
- if (!differences %in% 1:3)
- stop(sQuote("differences"), " are not in 1:3")
- if ((!center) && (df < differences))
- stop(sQuote("df"), " is less than ", sQuote("differences"))
- if(center && (degree < (differences-1)))
- stop(sQuote("degree"), " is less than ", sQuote("differences"), "-1")
- if (length(unique(x)) < 6)
- stop(sQuote(xname), " has less than 6 unique values")
-
- n.kn <- function(n) {
- ## Number of inner knots
- if(n < 40) n
- else 40
- }
-
- if (is.null(knots)) {
- n <- length(x)
- nk <- n.kn(n)
- ### ADDED: maximal 20 knots (to reduce computational burden)
- if (nk > 20){
- warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
- nk <- 20
- }
- knots <- seq(from = min(x, na.rm = TRUE),
- to = max(x, na.rm = TRUE), length = nk)
- knots <- knots[2:(length(knots) - 1)]
- } else {
- if (length(unique(diff(knots))) > 1)
- warning("non-equidistant ", sQuote("knots"),
- " might be inappropriate")
- }
-
- if (length(knots) == 1) {
- ### ADDED: maximal 20 knots (to reduce computational burden)
- if (knots > 20){
- warning("Number of (inner) ", sQuote("knots"), " should not exceed 20 to keep the computational burden low.")
- knots <- 20
- }
- knots <- seq(from = min(x, na.rm = TRUE),
- to = max(x, na.rm = TRUE), length = knots+2)
- knots <- knots[2:(length(knots) - 1)]
- }
- boundary.knots <- range(x, na.rm = TRUE)
-
- newX <- function(x, z = NULL, na.rm = TRUE) {
- if (na.rm) {
- x <- x[cc]
- if (!is.null(z))
- z <- z[cc]
- }
- X <- bs(x, knots = knots, degree = degree, intercept = TRUE,
- Boundary.knots = boundary.knots)
- if (!is.null(z))
- X <- X * z
- if (center) {
- K <- diff(diag(ncol(X)), differences = differences)
- X <- tcrossprod(X, K) %*% solve(tcrossprod(K))
- }
- return(X)
- }
-
- X <- newX(x, z)
- Xna <- X
- if (any(!cc))
- Xna <- newX(x, z, na.rm = FALSE)
-
- if (center) {
- K <- diag(ncol(X))
- } else {
- K <- diff(diag(ncol(X)), differences = differences)
- K <- crossprod(K, K)
- }
-
- predictfun <- function(coefs, newdata = NULL) {
- if (is.null(newdata)) return(Xna %*% coefs)
- nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
- nX %*% coefs
- }
-
- designMat <- function(newdata = NULL){
- if (is.null(newdata)) return(Xna)
- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
- }
-
- lambda <- mboost:::df2lambda(X, df = df, dmat = K, weights =rep(1,nrow(X)))
-
- df2lambda <- function(y, offset){
- ## FIXME: was ist mit nu und maxit ##
- ## FIXME: bessere Funktionenname
- dummy <- helper_fct(y, X, offset, pen = K)
- df2l <- function(lambda, df){
- tmp <- dummy(lambda)$F %*% solve(dummy(lambda)$F_pen)
- sum(diag(tmp)) - df
- }
- ## FIXME: lambda gibt es später nicht mehr - wie ist obere Intervallgrenze? ##
- result <- uniroot(f= df2l, interval = c(0,lambda + 100), df=df)
- result$root
- }
-
- penalty <- function(lambda){
- lambda * K
- }
-
- attr(X, "designMat") <- designMat
- attr(X, "df") <- df
- attr(X, "lambda") <- df2lambda
- attr(X, "pen") <- penalty
- attr(X, "timedep") <- timedep
- attr(X, "coefs") <- rep(0, ncol(X))
- attr(X, "predict") <- predictfun
- class(X) <- c("baselearner", "bbs")
- return(X)
-}
Added: pkg/R/bols.R
===================================================================
--- pkg/R/bols.R (rev 0)
+++ pkg/R/bols.R 2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,55 @@
+#########################
+## Linear Base-Learner ##
+#########################
+
+bolsTime <- function(...){
+ bols(..., timedep=TRUE)
+}
+
+bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
+
+ if (is.null(xname)) xname = deparse(substitute(x))
+ if (is.null(zname)) zname = deparse(substitute(z))
+
+ cc <- complete_cases(x = x, z = z)
+
+ newX <- function(x, z = NULL, na.rm = TRUE) {
+ if (na.rm) {
+ x <- x[cc]
+ if (!is.null(z))
+ z <- z[cc]
+ }
+ X <- model.matrix(~ x)
+ if (any(!cc) & !na.rm) {
+ Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
+ Xtmp[cc,] <- X
+ X <- Xtmp
+ }
+ if (!is.null(z)) X <- X * z
+ X
+ }
+ X <- newX(x, z)
+ Xna <- X
+ if (any(!cc))
+ Xna <- newX(x, z, na.rm = FALSE)
+
+ predictfun <- function(coefs, newdata = NULL) {
+ if (is.null(newdata)) return(Xna %*% coefs)
+ nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+ nX %*% coefs
+ }
+
+ designMat <- function(newdata = NULL){
+ if (is.null(newdata)) return(Xna)
+ newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
+ }
+
+ attr(X, "designMat") <- designMat
+ attr(X, "timedep") <- timedep
+ attr(X, "coefs") <- rep(0, ncol(X))
+ attr(X, "predict") <- predictfun
+ class(X) <- c("baselearner", "bols")
+ return(X)
+}
+
+
Deleted: pkg/R/bols.r
===================================================================
--- pkg/R/bols.r 2008-12-09 15:36:18 UTC (rev 7)
+++ pkg/R/bols.r 2009-03-02 17:24:18 UTC (rev 8)
@@ -1,55 +0,0 @@
-#########################
-## Linear Base-Learner ##
-#########################
-
-bolsTime <- function(...){
- bols(..., timedep=TRUE)
-}
-
-bols <- function(x, z = NULL, xname = NULL, zname = NULL, timedep=FALSE) {
-
- if (is.null(xname)) xname = deparse(substitute(x))
- if (is.null(zname)) zname = deparse(substitute(z))
-
- cc <- mboost:::complete_cases(x = x, z = z)
-
- newX <- function(x, z = NULL, na.rm = TRUE) {
- if (na.rm) {
- x <- x[cc]
- if (!is.null(z))
- z <- z[cc]
- }
- X <- model.matrix(~ x)
- if (any(!cc) & !na.rm) {
- Xtmp <- matrix(NA, ncol = ncol(X), nrow = length(cc))
- Xtmp[cc,] <- X
- X <- Xtmp
- }
- if (!is.null(z)) X <- X * z
- X
- }
- X <- newX(x, z)
- Xna <- X
- if (any(!cc))
- Xna <- newX(x, z, na.rm = FALSE)
-
- predictfun <- function(coefs, newdata = NULL) {
- if (is.null(newdata)) return(Xna %*% coefs)
- nX <- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
- nX %*% coefs
- }
-
- designMat <- function(newdata = NULL){
- if (is.null(newdata)) return(Xna)
- newX(x = newdata[[xname]], z = newdata[[zname]], na.rm = FALSE)
- }
-
- attr(X, "designMat") <- designMat
- attr(X, "timedep") <- timedep
- attr(X, "coefs") <- rep(0, ncol(X))
- attr(X, "predict") <- predictfun
- class(X) <- c("baselearner", "bols")
- return(X)
-}
-
-
Added: pkg/R/cfboost.R
===================================================================
--- pkg/R/cfboost.R (rev 0)
+++ pkg/R/cfboost.R 2009-03-02 17:24:18 UTC (rev 8)
@@ -0,0 +1,186 @@
+########################
+## Boosting Algorithm ##
+########################
+
+### generic method for likelihood-based boosting with component-wise P-splines
+### for fitting Cox-type additive models (flexible Cox boosting)
+cfboost <- function(x, ...)
+ UseMethod("cfboost")
+
+### formula interface
+cfboost.formula <- function(formula, data = list(), weights = NULL, na.action = na.omit, control = boost_control(), ...) {
+ ## construct design matrix etc.
+ object <- boost_dpp(formula, data, weights, na.action)
+ ## fit the ensemble
+ object$input <- object$menv at get("input")
+ if (!is.null(weights))
+ object$oob$input <- object$oob$menv at get("input")
+ RET <- cfboost_fit(object, control = control, data = data, weights = weights, ...)
+ RET$call <- match.call()
+ return(RET)
+}
+
+### fitting-function
+cfboost_fit <- function(object, control = boost_control(), data, weights = NULL, ...) {
+
+ ## data and base-learner
+ x <- object$input
+ class(x) <- "list"
+
+ oob <- list(x = object$oob$input , y = object$oob$y)
+ if (!is.null(oob$x)) class(oob$x) <- "list"
+
+ y <- object$y
+ if (!inherits(y, "Surv")) stop("response is not an object of class ", sQuote("Surv"))
+
+ ## hyper parameters
+ mstop <- control$mstop
+ risk <- control$risk
+ nu <- control$nu
+ trace <- control$trace
+ tracestep <- options("width")$width / 2
+ maxit <- control$maxit # maximum iterations in optim (see PMLE())
+ which.offset <- control$which.offset
+ hardStop <- control$hardStop # option: "continue boosting if minimum not reached" or "stop"
+
+ ## check if (enough) oob-data is present if risk="oobag"
+ if (risk == "oobag"){
+ checksum <- sum(weights==0)
+ if(checksum == 0)
+ stop("All observations are used for estimation.\nSpecify some weights equal to zero for ", sQuote("risk = oobag"))
+ if (checksum < length(weights)/10)
+ warning("Less than 1/10 of the data used for out-of-bag risk.\n", sQuote("object$risk"), " might be misleading.")
+ }
+
+ ## the ensemble
+ ens <- rep(NA, mstop)
+ ensss <- vector(mode = "list", length = mstop)
+
+ ## vector of empirical risks for all boosting iterations
+ mrisk <- numeric(mstop)
+ mrisk[1:mstop] <- NA
+
+ maxll <- numeric(length(x))
+ coefs <- logLH <- vector(mode = "list", length = length(x))
+
+ fit <- fit_oob <- offset <- getoffset(y, which.offset)
+ if (trace)
+ cat("Offset: ", offset, "\n")
+
+ mstart <- 1
+ hSi <- 1 # number of iterations in the repeat loop
+ df_est <- matrix(NA, nrow = mstop, ncol = length(x)) # matrix of estimated degrees of freedom
+
+ ## compute df2lambda which depends on the offset and on y
+ for (i in 1:length(x)){
+ if (!is.null( attr(x[[i]], "df"))){
+ attr(x[[i]], "df2lambda")(y, offset)
+ }
+ }
+
+ ##################################
+ #### start boosting iteration ####
+ ##################################
+ repeat{
+ for (m in mstart:mstop) {
+ if (trace)
+ cat("Step ", m, "\n")
+
+ ## fit MLE component-wise
+ for (i in 1:length(x)) {
+ maxll[i] <- NA
+ dummy_ens <- ens[1:m] # get the first m-1 selected base-learners
+ dummy_ens[m] <- i # and set the m-th base-learner temporarily to i
+ ## try to compute the (component-wise) penalized MLE
+ dummy <- try(PMLE(y, x, offset, fit, dummy_ens, nu, maxit))
+ if (inherits(dummy, "try-error")) next
+ coefs[[i]] <- dummy$par
+ maxll[i] <- dummy$maxll
+ logLH[[i]] <- dummy$logLH
+ if (!is.null(dummy$df)) df_est[m,i] <- dummy$df
+ }
+ if (all(is.na(maxll)))
+ stop("could not fit base learner in boosting iteration ", m)
+
+ ## select base-learner
+ xselect <- which.max(maxll)
+
+ ## output for debugging
+ if (trace)
+ cat("\tSelected: ", xselect, " with log LH ", maxll[xselect],"\n", sep = "")
+ ## update step
+ fit <- fit + nu * x[[xselect]] %*% coefs[[xselect]]
+
+ ## save the model, i.e., the selected coefficient and base-learner
+ ens[m] <- xselect
+ ensss[[m]] <- coefs[[xselect]]
+
+ ## save updated parameters in x[[xselect]]
+ x[[xselect]] <- updatecoefs(x[[xselect]], coefs[[xselect]])
+
+ if (risk == "inbag"){
+ mrisk[m] <- - logLH[[xselect]](coefs[[xselect]] * nu)
+ if (trace)
+ cat("\trisk (inbag) = ", mrisk[m], "\n")
+ }
+ if (risk == "oobag"){
+ ## make a call to PMLE function to built matrices and get the logLH function (without estimation of coefficients)
+ dummy <- PMLE(oob$y, oob$x, offset, fit_oob, ens[1:m], nu, maxit, estimation = FALSE)
+ mrisk[m] <- - dummy$logLH(coefs[[xselect]] * nu)
+ if (trace)
+ cat("\trisk (oobag) = ", mrisk[m], "\n")
+
+ ## update step for oob data
+ oob$x[[xselect]] <- updatecoefs(oob$x[[xselect]], coefs[[xselect]])
+ fit_oob <- fit_oob + nu * oob$x[[xselect]] %*% coefs[[xselect]]
+ }
+ }
+ if (hardStop | risk != "oobag" | which.min(mrisk) < (mstop - mstop * 0.2/hSi) | hSi == 5) break
+
+ ## else if minimum is at the end of the boosting algorithm,
+ ## don't stop but proceed with boosting: Therefore, increase mstop
+ ## and the length of the arrays and vectors for storage of results
+ warning("risk in test sample seems not to be minimal. ", sQuote("mstop"), " increased.")
+
+ hSi <- hSi + 1
+ increase <- 100
+ ens <- c(ens, rep(NA, increase))
+ dummy <- vector("list", length = (mstop + increase))
+ dummy[1:mstop] <- ensss
+ ensss <- dummy
+ mrisk <- c(mrisk, rep(NA, increase))
+ df_est <- rbind(df_est, matrix(NA, nrow = increase, ncol = ncol(df_est)))# matrix of estimated degrees of freedom
+ mstart <- mstop + 1
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/coxflexboost -r 8
More information about the Coxflexboost-commits
mailing list