[Lme4-commits] r1469 - / testing testing/lme4Eigen

Thu Dec 1 15:51:14 CET 2011

Author: bbolker
Date: 2011-12-01 15:51:14 +0100 (Thu, 01 Dec 2011)
New Revision: 1469

Added:
   testing/
   testing/lme4Eigen/
   testing/lme4Eigen/Makefile
   testing/lme4Eigen/README
   testing/lme4Eigen/anal1.R
   testing/lme4Eigen/bobyqa1.R
   testing/lme4Eigen/bobyqa1.RData
   testing/lme4Eigen/bobyqa1_plot.pdf
   testing/lme4Eigen/bobyqa1_prof.RData
   testing/lme4Eigen/bobyqa_problem1.R
   testing/lme4Eigen/bobyqa_prof.R
   testing/lme4Eigen/coalition2.R
   testing/lme4Eigen/testcbpp.R
Log:
   first round of testing code for lme4Eigen/bobyqa



Added: testing/lme4Eigen/Makefile
===================================================================

--- testing/lme4Eigen/Makefile	                        (rev 0)
+++ testing/lme4Eigen/Makefile	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,2 @@
+bobyqa1.RData: bobyqa1.R
+	R CMD BATCH --vanilla bobyqa1.R
\ No newline at end of file

Added: testing/lme4Eigen/README
===================================================================
--- testing/lme4Eigen/README	                        (rev 0)
+++ testing/lme4Eigen/README	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,33 @@
+This directory is intended for additional testing of lme4Eigen
+(perhaps including comparisons with other package versions/packages),
+with the specific goal of documenting the results of different
+optimizers and combinations of tuning parameters on the results
+of GLMMs.
+
+Data structure for test results (subject to change) a list with
+elements
+
+ *  problem: data set (character: 'Contraception', 'cbpp', etc.)
+ *  method: optimization f'n (character: 'bobyqa', 'nlminb', ...)
+ *  optimx: is optimization run through optimx? (logical)
+ *  options: optimization options (list: may be coerced to character)
+ *  time: 
+ *  parameters: numeric vector (varying length: may be coerced to character)
+ *  deviance: numeric
+ *  KKT
+ *  bad: logical
+ *  result: description (intended to be relatively free-form)
+
+Files:
+
+bobyqa1: Contraception data tests with bobyqa, varying rhobeg/rhoend/tolPwrss
+
+bobyqa_problem1: demonstrating some kind of environment modification
+  that screws up a second, identical bobyqa run after a preliminary
+  failed one
+
+coalition2: preliminary tests on a Gamma GLMM on the 'coalition2' data set from Zelig
+
+testcbpp.R: preliminary tests on the CBPP data set
+
+anal1.R: analysis code

Added: testing/lme4Eigen/anal1.R
===================================================================
--- testing/lme4Eigen/anal1.R	                        (rev 0)
+++ testing/lme4Eigen/anal1.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,51 @@
+## utility functions for collecting bits of results
+
+Sapply <- function(x,fun,...) {
+  r <- lapply(x,fun,...)
+  r0 <- r[[1]]
+  sapply(r,function(x) if (is.null(x) || is.null(x[[1]]) || is.na(x)) rep(NA,length(r0)) else x)
+}
+
+getf <- function(x,n) {
+  lapply(x,"[[",n)
+}
+
+pfun <- function(x) {
+  paste(names(x),x,sep="=",collapse=",")
+}
+  
+load("bobyqa1.RData")
+
+## assemble data frame from results
+opts <- getf(results,"options")
+opts <- data.frame(rhobeg=Sapply(opts,"[[","rhobeg"),
+                   rhoend=Sapply(opts,"[[","rhoend"),
+                   tolPwrss=Sapply(opts,"[[","tolPwrss"))
+deviance <- Sapply(results,"[[","deviance")
+KKT <- t(Sapply(results,"[[","KKT"))
+time <- unlist(getf(results,"time"))
+pars <- do.call(rbind,getf(results,"parameters"))
+dd <- data.frame(opts,deviance,KKT,pars,time)
+dd$devOK <- with(dd,!is.na(deviance) &  (deviance-min(deviance))<1e-2)
+dd$OKval <- with(dd,ifelse(devOK & isTRUE(KKT1) & isTRUE(KKT2),
+                           "good",
+                           ifelse(devOK,"OK","bad")))
+
+library(ggplot2)
+
+zmargin <- opts(panel.margin=unit(0,"lines"))
+g0 <- ggplot(dd,aes(x=factor(log10(rhobeg/2)),
+              y=factor(log10(rhoend/2)),colour=OKval))+
+  geom_point(aes(size=time),alpha=0.8)+
+  facet_grid(.~tolPwrss,labeller=label_both)+theme_bw()+zmargin+
+  scale_size_continuous(limits=c(0.01,20),to=c(0.5,6))+
+  labs(x=expression(paste("Start tol ",(log[10](rho/2)))),
+       y=expression(paste("End tol ",(log[10](rho/2)))))
+
+pdf("bobyqa1_plot.pdf",height=3,width=8)
+print(g0)
+dev.off()
+
+with(dd,table(unlist(KKT1),unlist(KKT2),devOK))
+
+

Added: testing/lme4Eigen/bobyqa1.R
===================================================================
--- testing/lme4Eigen/bobyqa1.R	                        (rev 0)
+++ testing/lme4Eigen/bobyqa1.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,60 @@
+fn <- "bobyqa1.RData"
+
+library(lme4Eigen)
+library(optimx)
+sessinfo <- sessionInfo()
+
+data(Contraception, package="mlmRev")
+Contraception <- within(Contraception,
+                        ch <- factor(ifelse(livch != 0, "Y", "N")))
+
+form <- use ~ age + I(age^2) + ch + (1|district:urban)
+## extract the objective function for the second stage optimization
+ff <- glmer(form, Contraception, binomial, devFunOnly=2L)
+
+val <- c(get("pp", environment(ff))$theta, get("pp", environment(ff))$beta(0))
+
+d0 <- list(problem="Contraception",method="bobyqa",optimx=TRUE)
+dbad <- c(d0,as.list(c(options=NA,time=NA,parameters=NA,deviance=NA,KKT=NA,bad=NA,result=NA)))
+          
+rhobegvec <- 2*10^(-8:0)
+begendvec <- 10^(-3:-6)
+tolPwrssvec <- 10^seq(-9,-5)
+
+results <- vector("list",
+                  length(rhobegvec)*length(begendvec)*length(tolPwrssvec))
+  ctr <- 0
+  for (i in seq_along(rhobegvec)) {
+    for (j in seq_along(begendvec)) {
+      for (k in seq_along(tolPwrssvec)) {
+        ## reset ff each time ... just in case it gets corrupted
+        ff <- glmer(form, Contraception, binomial, devFunOnly=2L)
+        ctr <- ctr+1
+        cat("*", i,j,k,ctr,"\n")
+        control <- list(rhobeg=rhobegvec[i],rhoend=rhobegvec[i]*begendvec[j])
+        ## tolPwrss is read from ff's environment
+        tolPwrss0 <- get("tolPwrss",envir=environment(ff))
+        assign("tolPwrss",tolPwrssvec[k],envir=environment(ff))
+        t0 <- system.time(cur.opt <- try(optimx(val, ff,
+                                                lower=c(0, rep(-Inf, 4L)),
+                                                control=control,
+                                                method="bobyqa")))
+        assign("tolPwrss",tolPwrss0,envir=environment(ff))
+        if (inherits(cur.opt,"try-error")) {
+          d <- dbad
+          d$result <- attr(cur.opt,"condition")$message
+        } else {
+          d <- d0
+          d <- c(d,list(options=c(control,list(tolPwrss=tolPwrssvec[k])),
+                        time=sum(t0[1:2]), ## user +system
+                        parameters=cur.opt$par$par,           ## ??? why nested
+                        deviance=c(cur.opt$fvalues$fvalues),  ## ??? why nested
+                        KKT=c(cur.opt$KKT1,cur.opt$KKT2),
+                        bad=NA,
+                        result=NA_character_))
+        }
+        results[[ctr]] <- d
+        save("results","sessinfo",file=fn)
+      }
+    }
+  }

Added: testing/lme4Eigen/bobyqa1.RData
===================================================================
(Binary files differ)


Property changes on: testing/lme4Eigen/bobyqa1.RData
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: testing/lme4Eigen/bobyqa1_plot.pdf
===================================================================
(Binary files differ)


Property changes on: testing/lme4Eigen/bobyqa1_plot.pdf
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: testing/lme4Eigen/bobyqa1_prof.RData
===================================================================
(Binary files differ)


Property changes on: testing/lme4Eigen/bobyqa1_prof.RData
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: testing/lme4Eigen/bobyqa_problem1.R
===================================================================
--- testing/lme4Eigen/bobyqa_problem1.R	                        (rev 0)
+++ testing/lme4Eigen/bobyqa_problem1.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,32 @@
+## test file: demonstrates that something is getting changed inside the environment of 'ff'
+##  during the course of an unsuccessful bobyqa fit.  When we attempt the same fit
+##  a second time we get an error about an inadmissible starting value.
+
+## How do we (1a) make a 'deep copy' of ff (replicating the environment and breaking
+## the association with the environment) and (1b) check the contents of the environment
+## to see what has changed?  Alternately, can (2) we lock the environment and prevent its
+## contents from being changed (hopefully this would lead to some kind of meaningful
+## error, if the change in the environment is necessary to the operation, OR would
+## allow us to complete the process without modifying/mangling the environment ...)
+## Option (3) [ouch] is simply to delve down through levels of the code to see directly
+## what's happening ...
+
+library(lme4Eigen)
+
+data(Contraception, package="mlmRev")
+Contraception <- within(Contraception, ch <- factor(ifelse(livch != 0, "Y", "N")))
+
+## extract the objective function for the second stage optimization
+(ff <- glmer(use ~ age + I(age^2) + ch + (1|district:urban), Contraception, binomial, devFunOnly=2L))
+val <- c(get("pp", environment(ff))$theta, get("pp", environment(ff))$beta(0))
+
+## lockEnvironment(environment(ff),bindings=TRUE)  ## attempt to lock environment (doesn't seem to matter)
+
+require(optimx)
+## can't run bobyqa twice in a row -- something goes haywire
+
+lo <- c(0, rep(-Inf, 4L)
+opt1 <-optimx(val, ff, lower=lo, method="bobyqa", control=list(trace=2L, kkt=FALSE))
+
+## second try
+opt1 <-optimx(val, ff, lower=lo, method="bobyqa", control=list(trace=2L, kkt=FALSE))

Added: testing/lme4Eigen/bobyqa_prof.R
===================================================================
--- testing/lme4Eigen/bobyqa_prof.R	                        (rev 0)
+++ testing/lme4Eigen/bobyqa_prof.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,22 @@
+prof2 <- function(theta) {
+  pp <- get("pp",environment(ff))
+  ff2 <- function(beta) {
+    ff(c(theta,beta))
+  }
+  optval <- optimx(pp$beta(0),
+                   ff2, method="bobyqa", control=list(rhobeg=2e-4,rhoend=2e-7))
+  optval$fvalues[[1]]
+}
+
+prof2(val[1])
+
+fn <- "bobyqa1_prof.RData"
+vvec <- seq(0,1.2,by=0.025)
+pp <- numeric(length(vvec))
+for (i in seq_along(pp)) {
+  cat(i,vvec[i],"\n")
+  pp[i] <- prof2(vvec[i])
+}
+save("pp","vvec",file=fn)
+
+

Added: testing/lme4Eigen/coalition2.R
===================================================================
--- testing/lme4Eigen/coalition2.R	                        (rev 0)
+++ testing/lme4Eigen/coalition2.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,74 @@
+do.plots <- FALSE
+data(coalition2,package="Zelig")
+
+form <- duration ~ invest + fract + polar + numst2 + crisis + (1 | country) 
+
+tmpf <- function(m) {
+  list(fixef=fixef(m),ranef=c(ranef(m)[[1]]),
+       vc=c(unlist(VarCorr(m))),LL=logLik(m))
+}
+
+if (do.plots) {
+pairs(coalition2[,1:7],pch=".",gap=0)
+## ggplot2(coalition2,aes(x=fract,y=duration,
+##                       colour=fract,shape=invest))+
+##  geom_point()+facet_grid(cut_number())
+}
+
+## are there any other packages that will do Gamma?
+require(lme4)
+m_lme4 <- glmer(form, data   = coalition2, family = Gamma(link = log))
+rlist <- list(lme4=tmpf(m_lme4))
+packageVersion('lme4')
+detach("package:lme4")
+
+require(lme4a)
+m_lme4a <- glmer(form, data   = coalition2, family = Gamma(link = log))
+rlist <- c(rlist,list(lme4a=tmpf(m_lme4a)))
+detach("package:lme4a")
+
+require(lme4Eigen)
+m_lme4e <- glmer(form, data   = coalition2, family = Gamma(link = log),
+                 control=list(rhobeg=0.2, rhoend=2e-7),
+                 tolPwrss=1e-8)
+rlist <- c(rlist,list(lme4e=tmpf(m_lme4e)))
+detach("package:lme4Eigen")
+
+library(MASS)
+m_glmmPQL <- glmmPQL(duration ~ invest + 
+                        fract + 
+                        polar + 
+                        numst2 +
+                        crisis,
+                random = ~1 | country,
+             data   = coalition2, 
+             family = Gamma(link = log))
+tt <- tmpf(m_glmmPQL)
+## n.b. variance components don't match up exactly
+tt$vc <- sum(diag(matrix(as.numeric(VarCorr(m_glmmPQL)),nrow=2)))
+rlist <- c(rlist,list(glmmPQL=tt))
+
+## detach("package:MASS")
+
+rbind(sapply(rlist,"[[","fixef"),
+      logLik=sapply(rlist,"[[","LL"),
+      herdvar=sapply(rlist,"[[","vc"))
+
+if (FALSE) {
+  ## not working yet ...
+  library(glmmADMB)
+
+  f3 <- glmmadmb(duration ~ invest + 
+                 fract + 
+               polar + 
+               numst2 +
+               crisis,
+               random = ~1 | country,
+               data   = coalition2, 
+               family = "Gamma", link="log",
+               extra.args="-ams 160000000",
+               save.dir="~/R/misc",
+               verbose=TRUE)
+}
+
+## ./glmmadmb -maxfn 500 -noinit -shess -ams 120000000

Added: testing/lme4Eigen/testcbpp.R
===================================================================
--- testing/lme4Eigen/testcbpp.R	                        (rev 0)
+++ testing/lme4Eigen/testcbpp.R	2011-12-01 14:51:14 UTC (rev 1469)
@@ -0,0 +1,64 @@
+library(lme4)
+cbpp <- cbpp
+form <- cbind(incidence, size - incidence) ~ period + (1 | herd)
+
+## utility for extracting *mer info
+
+tmpf <- function(m) {
+  list(fixef=fixef(m),ranef=c(ranef(m)[[1]]),vc=c(unlist(VarCorr(m))),LL=logLik(m))
+}
+
+## 1. lme4
+m_lme4 <- glmer(form, family = binomial, data = cbpp)
+rlist <- list(lme4=tmpf(m_lme4))
+detach("package:lme4")
+
+## 2. lme4a
+library(lme4a)
+m_lme4a <- glmer(form, family = binomial, data = cbpp)
+rlist <- c(rlist,list(lme4a=tmpf(m_lme4a)))
+detach("package:lme4a")
+
+## 3. lme4eigen
+library(lme4Eigen)
+
+## 3A. default settings
+m_lme4e <- glmer(form, family = binomial, data = cbpp)
+rlist <- c(rlist,list(lme4e=tmpf(m_lme4e)))
+
+## 3B. change rhobeg/rhoend settings back to lme4a defaults
+m_lme4eB <- glmer(form,family = binomial, data = cbpp,
+                  control=list(rhobeg=0.2, rhoend=2e-7))
+rlist <- c(rlist,list(lme4eB=tmpf(m_lme4eB)))
+
+## 3C. rhobeg/rhoend settings plus decreased value of tolPwrss
+m_lme4eC <- glmer(form,
+            family = binomial, data = cbpp, control=list(rhobeg=0.2, rhoend=2e-7),
+                  tolPwrss=1e-8)
+rlist <- c(rlist,list(lme4eC=tmpf(m_lme4eC)))
+
+detach("package:lme4Eigen")
+
+## 4. glmmADMB
+## n.b. glmmADMB needs to be installed from r-forge -- not on CRAN
+library(glmmADMB)
+
+m_glmmADMB <- glmmadmb(cbind(incidence, size - incidence) ~ period + (1 | herd),
+            family = "binomial", data = cbpp)
+rlist <- c(rlist,list(glmmADMB=tmpf(m_glmmADMB)))
+detach("package:glmmADMB")
+
+## 5. glmmML
+library(glmmML)
+m_glmmML <- glmmML(cbind(incidence, size - incidence) ~ period,
+                   cluster=herd, family="binomial", data=cbpp)
+rlist <- c(rlist,list(glmmML=list(fixef=coef(m_glmmML),ranef=m_glmmML$posterior.modes,
+                        LL=-m_glmmML$deviance/2,vc=m_glmmML$sigma^2)))
+
+rbind(sapply(rlist,"[[","fixef"),
+      logLik=sapply(rlist,"[[","LL"),
+      herdvar=sapply(rlist,"[[","vc"))
+## sapply(rlist,"[[","ranef")
+
+
+