[Ipmpack-users] annual survival from survival data over longer time spans

Mon Apr 1 09:46:53 CEST 2013

Dear Ivan,

You asked whether IPMpack includes functions for estimating annual 
survival from survival data over longer time spans. At the moment it 
isn't, but Jess has written some code that you could use (see below). We 
might include it in later versions of IPMpack.

best wishes,
Eelke

# Functions for variable time intervals

## Function to fit survival with VARYING TIME INCREMENTS using optim
## and then write parameters - over onto a classic glm framework for use 
with IPMpack
#
# !! MAY BE VERY SENSITIVE TO STARTING VALUES...TEST SEVERAL !!
#
# parameters - dataf - a dataframe including columns
#                  size, surv, exatDate, and exactDatel, the latter two 
in days
#            - nyrs - desired number of years in a time-step (here 
choose 4)
#            - par - starting values for the parameters
#                     (must be of length one longer than covNames,
#                       given intercept)
#            - covNames - names of chosen covariates
#            - method - method for optim ("Nelder-Mead", "SANN", etc.)
#
# Returns  - output of optim, see R help
#
fitSurvVaryingTimeIntervals <- function(dataf,
                                         nyrs=4,
                                         par=c(0.01,0.01,0.01,0.01),
covNames=c("size","size2","size3"),
                                         method="Nelder-Mead") {

     #warnings
     if ((length(par)-1)!=length(covNames)) {
         print("starting values does not match length of covariate names")
     }

     #run optim
     tmp <- optim(par,likeSurvVaryingTimeIntervals,
                  covNames=covNames,nyrs=nyrs,dataf=dataf,
                  method=method)

     #try re-running if hasn't converged
     if (tmp$convergence!=0) {
         tmp <- optim(tmp$par,likeSurvVaryingTimeIntervals,
                      covNames=covNames,nyrs=nyrs,dataf=dataf,
                      method=method)
         if (tmp$convergence!=0)  print("not converged")
     }

     #write dummy template and then over-write with parameters estimated 
here
     if (length(covNames)>0) formula <- 
as.character(paste("surv~",paste(covNames,collapse="+"),collapse="")) 
else formula <- as.character("surv~1")
     fit <- glm(formula,data=dataf,family=binomial)
     #print(fit)
     fit$coefficients <- tmp$par

     sv1 <- new("survObj")
     sv1 at fit <- fit

     tmp$sv1 <- sv1

     return(tmp)

}

#Function to calculate the likelihood for variable time-intervals for 
survival
#
# parameters
#            - par - starting values for the parameters
#                     (must be of length one longer than covNames,
#                       given intercept)
#            - covNames - names of chosen covariates
#            - nyrs - desired number of years in a time-step
#            - dataf - a dataframe including columns in covNames, as well as
#                  size, surv, exactDate, and exactDatel, the latter two 
in days
#
# Returns  - numeric equal to minus the log likelihood for use with optim
#
likeSurvVaryingTimeIntervals <- function(par=c(0.01,0.01,0.01,0.01),
covNames=c("size","size2","size3"),
                                          nyrs=4,
                                          dataf) {

     #define time change
     tdiff <- (dataf$exactDatel-dataf$exactDate)/(nyrs*365)

     #build predictor
     if (length(covNames)>0) {
         pred <-  t(t(dataf[,covNames])*par[2:length(par)])
     }else { pred <- matrix(0,nrow(dataf),1)}
     pred <- par[1] + rowSums(pred)
     pred[pred>16] <- 16  #don't want bigger than exp(16) - returns 1

     pred <- (exp(pred)/(1+exp(pred)))^(tdiff)

     #estimate likelihood
     like <- dbinom(dataf$surv,1,pred, log=TRUE)
     #like[like==-Inf] <- -exp(500)
     #like[like==Inf] <- exp(500)

     sumRows <- rep(0,nrow(dataf))
     if (length(covNames)>1) sumRows <- rowSums(dataf[,covNames])
     if (length(covNames)==1) sumRows <- dataf[,covNames]

     return(-sum(like[!is.na(sumRows) & !is.na(dataf$surv)],na.rm=TRUE))

}

## Function to fit survival models and compare liklelihoods
# works with maximizing likelihoods. It will help the function if the models
# are ordered in increasing nestedness, since R will use parameters 
estimated for
# first as starting values for next. QUITE SENSITIVE TO STARTING VALUES!
#
# ANALOGUE OF FUNCTION survModelComp in main IPMpack - see that help 
file for arguments

survModelCompVaryInterval <- function(dataf,
                                       expVars = c("1", "size", "size + 
size2"),
                                       testType = "AIC",
                                       makePlot = FALSE,
                                       mainTitle = "",
                                       nyrs=4, method="Nelder-Mead",...) {
     require(stringr)

     varN <- length(expVars)
     treatN <- varN
     summaryTable <- data.frame()
     svObj <- vector("list", length = treatN)

         parStart <- 0.01
         newd <- dataf

     i <- 1
     for(v in 1:varN) {
             #print(expVars[v])

             if (expVars[v]=="1") {
                 covNames <- c() } else {
                     covNames <- 
str_trim(strsplit(as.character(expVars[v]),"[\\+]")[[1]])
                 }
             parStart <- 
c(parStart,rep(1e-6,length(covNames)-length(parStart)+1))
             #print(parStart)

             #make sure you have all the right covariates
             if(length(grep("size2",expVars[v]))==1)
             dataf$size2=(dataf$size)^2
             if(length(grep("size3",expVars[v]))==1)
             dataf$size3=(dataf$size)^3
             if(length(grep("logsize",expVars[v]))==1)
             dataf$logsize=log(dataf$size)
             if(length(grep("logsize2",expVars[v]))==1)
             dataf$logsize=(log(dataf$size))^2

             #print("starting value")
             #print(likeSurvVaryingTimeIntervals(par=parStart,
             #                             covNames=covNames,
             #                             nyrs=4,
             #                             dataf=dataf))

             tmp <- 
fitSurvVaryingTimeIntervals(dataf=dataf,nyrs=nyrs,par=parStart,covNames=covNames, 
method=method)

             parStart <- tmp$par

             svObj[[i]] <- tmp$sv1

             if (testType=="AIC") val <- 
(2*(length(covNames)+1))-2*(-tmp$value)
             if (testType=="logLik") val <- -tmp$value
             if (testType!="AIC" & testType!="logLik") { print("Unknown 
test; chose AIC or logLik"); val <- NA}

             summaryTable <- rbind(summaryTable, cbind(expVars[v], val))
             i <- i + 1
     }
     summaryTable <- as.data.frame(summaryTable)
     names(summaryTable) <- c("Exp. Vars", testType)
     outputList <- list(summaryTable = summaryTable, survObjects = svObj)

     # redefine formulas for the plot...
     Formulas <- list()
     for (k in 1:length(expVars)) Formulas[[k]] <- 
as.formula(paste("incr","~",expVars[k], sep=""))
     #print(Formulas)

     # PLOT SECTION #
     if(makePlot == TRUE) {
         ## this is the surv picture
         plotSurvModelComp(svObj = svObj,
                           summaryTable = summaryTable,
                           dataf = dataf,
                           expVars = Formulas,#expVars,
                           testType = testType, plotLegend = TRUE, 
mainTitle = mainTitle, ncuts=20,...)
     }
     return(outputList)

}