[Ipmpack-users] annual survival from survival data over longer time spans
Eelke Jongejans
e.jongejans at science.ru.nl
Mon Apr 1 09:46:53 CEST 2013
Dear Ivan,
You asked whether IPMpack includes functions for estimating annual
survival from survival data over longer time spans. At the moment it
isn't, but Jess has written some code that you could use (see below). We
might include it in later versions of IPMpack.
best wishes,
Eelke
# Functions for variable time intervals
## Function to fit survival with VARYING TIME INCREMENTS using optim
## and then write parameters - over onto a classic glm framework for use
with IPMpack
#
# !! MAY BE VERY SENSITIVE TO STARTING VALUES...TEST SEVERAL !!
#
# parameters - dataf - a dataframe including columns
# size, surv, exatDate, and exactDatel, the latter two
in days
# - nyrs - desired number of years in a time-step (here
choose 4)
# - par - starting values for the parameters
# (must be of length one longer than covNames,
# given intercept)
# - covNames - names of chosen covariates
# - method - method for optim ("Nelder-Mead", "SANN", etc.)
#
# Returns - output of optim, see R help
#
fitSurvVaryingTimeIntervals <- function(dataf,
nyrs=4,
par=c(0.01,0.01,0.01,0.01),
covNames=c("size","size2","size3"),
method="Nelder-Mead") {
#warnings
if ((length(par)-1)!=length(covNames)) {
print("starting values does not match length of covariate names")
}
#run optim
tmp <- optim(par,likeSurvVaryingTimeIntervals,
covNames=covNames,nyrs=nyrs,dataf=dataf,
method=method)
#try re-running if hasn't converged
if (tmp$convergence!=0) {
tmp <- optim(tmp$par,likeSurvVaryingTimeIntervals,
covNames=covNames,nyrs=nyrs,dataf=dataf,
method=method)
if (tmp$convergence!=0) print("not converged")
}
#write dummy template and then over-write with parameters estimated
here
if (length(covNames)>0) formula <-
as.character(paste("surv~",paste(covNames,collapse="+"),collapse=""))
else formula <- as.character("surv~1")
fit <- glm(formula,data=dataf,family=binomial)
#print(fit)
fit$coefficients <- tmp$par
sv1 <- new("survObj")
sv1 at fit <- fit
tmp$sv1 <- sv1
return(tmp)
}
#Function to calculate the likelihood for variable time-intervals for
survival
#
# parameters
# - par - starting values for the parameters
# (must be of length one longer than covNames,
# given intercept)
# - covNames - names of chosen covariates
# - nyrs - desired number of years in a time-step
# - dataf - a dataframe including columns in covNames, as well as
# size, surv, exactDate, and exactDatel, the latter two
in days
#
# Returns - numeric equal to minus the log likelihood for use with optim
#
likeSurvVaryingTimeIntervals <- function(par=c(0.01,0.01,0.01,0.01),
covNames=c("size","size2","size3"),
nyrs=4,
dataf) {
#define time change
tdiff <- (dataf$exactDatel-dataf$exactDate)/(nyrs*365)
#build predictor
if (length(covNames)>0) {
pred <- t(t(dataf[,covNames])*par[2:length(par)])
}else { pred <- matrix(0,nrow(dataf),1)}
pred <- par[1] + rowSums(pred)
pred[pred>16] <- 16 #don't want bigger than exp(16) - returns 1
pred <- (exp(pred)/(1+exp(pred)))^(tdiff)
#estimate likelihood
like <- dbinom(dataf$surv,1,pred, log=TRUE)
#like[like==-Inf] <- -exp(500)
#like[like==Inf] <- exp(500)
sumRows <- rep(0,nrow(dataf))
if (length(covNames)>1) sumRows <- rowSums(dataf[,covNames])
if (length(covNames)==1) sumRows <- dataf[,covNames]
return(-sum(like[!is.na(sumRows) & !is.na(dataf$surv)],na.rm=TRUE))
}
## Function to fit survival models and compare liklelihoods
# works with maximizing likelihoods. It will help the function if the models
# are ordered in increasing nestedness, since R will use parameters
estimated for
# first as starting values for next. QUITE SENSITIVE TO STARTING VALUES!
#
# ANALOGUE OF FUNCTION survModelComp in main IPMpack - see that help
file for arguments
survModelCompVaryInterval <- function(dataf,
expVars = c("1", "size", "size +
size2"),
testType = "AIC",
makePlot = FALSE,
mainTitle = "",
nyrs=4, method="Nelder-Mead",...) {
require(stringr)
varN <- length(expVars)
treatN <- varN
summaryTable <- data.frame()
svObj <- vector("list", length = treatN)
parStart <- 0.01
newd <- dataf
i <- 1
for(v in 1:varN) {
#print(expVars[v])
if (expVars[v]=="1") {
covNames <- c() } else {
covNames <-
str_trim(strsplit(as.character(expVars[v]),"[\\+]")[[1]])
}
parStart <-
c(parStart,rep(1e-6,length(covNames)-length(parStart)+1))
#print(parStart)
#make sure you have all the right covariates
if(length(grep("size2",expVars[v]))==1)
dataf$size2=(dataf$size)^2
if(length(grep("size3",expVars[v]))==1)
dataf$size3=(dataf$size)^3
if(length(grep("logsize",expVars[v]))==1)
dataf$logsize=log(dataf$size)
if(length(grep("logsize2",expVars[v]))==1)
dataf$logsize=(log(dataf$size))^2
#print("starting value")
#print(likeSurvVaryingTimeIntervals(par=parStart,
# covNames=covNames,
# nyrs=4,
# dataf=dataf))
tmp <-
fitSurvVaryingTimeIntervals(dataf=dataf,nyrs=nyrs,par=parStart,covNames=covNames,
method=method)
parStart <- tmp$par
svObj[[i]] <- tmp$sv1
if (testType=="AIC") val <-
(2*(length(covNames)+1))-2*(-tmp$value)
if (testType=="logLik") val <- -tmp$value
if (testType!="AIC" & testType!="logLik") { print("Unknown
test; chose AIC or logLik"); val <- NA}
summaryTable <- rbind(summaryTable, cbind(expVars[v], val))
i <- i + 1
}
summaryTable <- as.data.frame(summaryTable)
names(summaryTable) <- c("Exp. Vars", testType)
outputList <- list(summaryTable = summaryTable, survObjects = svObj)
# redefine formulas for the plot...
Formulas <- list()
for (k in 1:length(expVars)) Formulas[[k]] <-
as.formula(paste("incr","~",expVars[k], sep=""))
#print(Formulas)
# PLOT SECTION #
if(makePlot == TRUE) {
## this is the surv picture
plotSurvModelComp(svObj = svObj,
summaryTable = summaryTable,
dataf = dataf,
expVars = Formulas,#expVars,
testType = testType, plotLegend = TRUE,
mainTitle = mainTitle, ncuts=20,...)
}
return(outputList)
}
More information about the IPMpack-users
mailing list