[Lme4-commits] r1649 - in pkg/lme4Eigen: . R man tests

Tue Mar 13 17:42:14 CET 2012

Author: bbolker
Date: 2012-03-13 17:42:14 +0100 (Tue, 13 Mar 2012)
New Revision: 1649

Added:
   pkg/lme4Eigen/R/vcconv.R
   pkg/lme4Eigen/tests/optimizer.R
Modified:
   pkg/lme4Eigen/DESCRIPTION
   pkg/lme4Eigen/R/profile.R
   pkg/lme4Eigen/man/profile-methods.Rd
Log:

   preliminary methods (vcconv.R) for conversion among var-cov formats
   profiling: robustness, more options for verbose/debugging output
              fix to work with multi-response grouping variables (i.e
                correlation terms); add upper bounds where necessary
   tests for alternative optimizers



Modified: pkg/lme4Eigen/DESCRIPTION
===================================================================

--- pkg/lme4Eigen/DESCRIPTION	2012-03-13 16:16:49 UTC (rev 1648)
+++ pkg/lme4Eigen/DESCRIPTION	2012-03-13 16:42:14 UTC (rev 1649)
@@ -1,5 +1,5 @@
 Package: lme4Eigen
-Version: 0.9996875-13
+Version: 0.9996875-14
 Date: $Date$
 Revision: $Revision$
 Title: Linear mixed-effects models using Eigen and S4
@@ -56,4 +56,5 @@
     'nbinom.R'
     'mcmcsamp.R'
     'plot.R'
+    'vcconv.R'
 BuildVignettes: no

Modified: pkg/lme4Eigen/R/profile.R
===================================================================
--- pkg/lme4Eigen/R/profile.R	2012-03-13 16:16:49 UTC (rev 1648)
+++ pkg/lme4Eigen/R/profile.R	2012-03-13 16:42:14 UTC (rev 1649)
@@ -8,11 +8,11 @@
 ##' @aliases profile-methods profile.merMod
 ##' @docType methods
 ##' @param fitted a fitted model, e.g., the result of \code{\link{lmer}(..)}.
-##' @param alphamax used when \code{delta} is unspecified, as probability ... to
-##' compute \code{delta} ...
-##' @param maxpts ...
-##' @param delta ...
-##' @param tr ...
+##' @param alphamax maximum alpha value for likelihood ratio confidence regions; used to establish the range of values to be profiled
+##' @param maxpts maximum number of points (in each direction, for each parameter) to evaluate in attempting to construct the profile
+##' @param delta stepping scale for deciding on next point to profile
+##' @param verbose level of output from internal calculations
+##' @param devtol tolerance for fitted deviances less than baseline (supposedly minimum) deviance
 ##' @param \dots potential further arguments for \code{profile} methods.
 ##' @section Methods: FIXME: These (signatures) will change soon --- document
 ##' \bold{after} change!
@@ -32,9 +32,19 @@
 ##' @method profile merMod
 ##' @export
 profile.merMod <- function(fitted, alphamax = 0.01, maxpts = 100, delta = cutoff/8,
-                           tr = 0, ...) {
+                           ##  tr = 0,  ## FIXME:  remove if not doing anything ...
+                           verbose=0, devtol=1e-9,
+                           startval = "prev", ...) {
+
+  ## FIXME: allow choice of optimizer? choice of nextstep/nextstart algorithm?
+  ## FIXME: allow selection of individual variables to profile (by location and?? name)
+  ## FIXME: allow for failure of bounds (non-pos-definite correlation matrices) when >1 cor parameter
+  ## FIXME: generalize to GLMMs
+  ## (use different devfun;
+  ##  be careful with scale parameter;
+  ##  profile all parameters at once rather than RE first and then fixed)
+  
     dd <- devfun2(fitted)
-    
     base <- attr(dd, "basedev")
     thopt <- attr(dd, "thopt")
     stderr <- attr(dd, "stderr")
@@ -52,6 +62,9 @@
     res <- c(.zeta = 0, opt)
     res <- matrix(res, nrow = maxpts, ncol = length(res),
                   dimnames = list(NULL, names(res)), byrow = TRUE)
+    ## FIXME: why is cutoff based on nptot (i.e. boundary of simultaneous LRT conf region for nptot values)
+    ##  when we are computing (at most) 2-parameter profiles here?
+    
     cutoff <- sqrt(qchisq(1 - alphamax, nptot))
     
     ## helper functions
@@ -59,17 +72,30 @@
     ## nextpar calculates the next value of the parameter being
     ## profiled based on the desired step in the profile zeta
     ## (absstep) and the values of zeta and column cc for rows
-    ## r-1 and r.  The parameter may not be below lower
-    nextpar <- function(mat, cc, r, absstep, lower = -Inf) {
+    ## r-1 and r.  The parameter may not be below lower (or above upper)
+    nextpar <- function(mat, cc, r, absstep, lower = -Inf, upper = Inf, minstep=1e-6) {
         rows <- r - (1:0)         # previous two row numbers
         pvals <- mat[rows, cc]
         zeta <- mat[rows, ".zeta"]
-        if (!(denom <- diff(zeta)))
-            stop("Last two rows have identical .zeta values")
         num <- diff(pvals)
-        max(lower, pvals[2] + sign(num) * absstep * num / denom)
+        if (is.na(denom <- diff(zeta)) || denom==0) {
+          ## this may happen due to numerical fuzz ...
+          warning("Last two rows have identical or NA .zeta values: using minstep")
+          step <- minstep
+        } else {
+          step <- absstep*num/denom
+        }
+        min(upper, max(lower, pvals[2] + sign(num) * step))
+      }
+
+    nextstart <- function(mat, pind, r, method="global") {
+      ## FIXME: indexing may need to be checked (e.g. for fixed-effect parameters)
+      switch(method,
+             global=opt[seqnvp][-pind],  ## address opt, no zeta column
+             prev=mat[r,1+seqnvp][-pind],
+             extrap=stop("stub")) ## do something with mat[r-(1:0),1+seqnvp])[-pind] ...
     }
-    
+       
     ## mkpar generates the parameter vector of theta and
     ## sigma from the values being profiled in position w
     mkpar <- function(np, w, pw, pmw) {
@@ -83,59 +109,91 @@
     ## using nextpar and zeta
 ### FIXME:  add code to evaluate more rows near the minimum if that
 ###        constraint was active.
-    fillmat <- function(mat, lowcut, zetafun, cc) {
+    fillmat <- function(mat, lowcut, upcut, zetafun, cc) {
         nr <- nrow(mat)
         i <- 2L
-        while (i < nr && abs(mat[i, ".zeta"]) <= cutoff &&
-               mat[i, cc] > lowcut) {
-            mat[i + 1L, ] <- zetafun(nextpar(mat, cc, i, delta, lowcut))
+        while (i < nr && mat[i, cc] > lowcut && mat[i,cc] < upcut &&
+               (is.na(curzeta <- abs(mat[i, ".zeta"])) || curzeta <= cutoff)) {
+             np <- nextpar(mat, cc, i, delta, lowcut, upcut)
+             ns <- nextstart(mat, cc-1, i, startval)
+             mat[i + 1L, ] <- zetafun(np,ns)
+            if (verbose>0) {
+              cat(i,cc,np,mat[i+1L,],"\n")
+            }
             i <- i + 1L
         }
         mat
     }
     
-    lower <- c(fitted at lower, 0, rep.int(-Inf, p))
+    lower <- c(pmax(fitted at lower,-1.0), 0, rep.int(-Inf, p))
+    upper <- c(ifelse(fitted at lower==0,Inf,1.0), Inf, rep.int(Inf, p))
     seqnvp <- seq_len(nvp)
     lowvp <- lower[seqnvp]
+    upvp <- upper[seqnvp]
     form <- .zeta ~ foo           # pattern for interpSpline formula
     
     for (w in seqnvp) {
+       if (verbose) cat("var-cov parameter ",w,":\n",sep="")
+
         wp1 <- w + 1L
         start <- opt[seqnvp][-w]
         pw <- opt[w]
         lowcut <- lower[w]
-        zeta <- function(xx) {
+        upcut <- upper[w]
+        zeta <- function(xx,start) {
+          ## FIXME: use Nelder_Mead, or (for GLMMs) optimizer choice??
             ores <- bobyqa(start,
                            function(x) dd(mkpar(nvp, w, xx, x)),
-                           lower = lowvp[-w])
-            zz <- sign(xx - pw) * sqrt(ores$fval - base)
+                           lower = lowvp[-w],
+                           upper = upvp[-w])
+            ores2 <- Nelder_Mead(par=start,
+                                fn=function(x) dd(mkpar(nvp, w, xx, x)),
+                                lower = lowvp[-w],
+                                upper = upvp[-w])
+
+            devdiff <- ores$fval-base
+            if (is.na(ores$fval)) {
+              ## NA/NaN detected
+              warning("NAs detected in profiling")
+            } else if (devdiff < (-devtol))
+              stop("profiling detected new, lower deviance")
+            if (devdiff<0) warning(paste("slightly lower deviances (diff=",devdiff,") detected",sep=""))
+            devdiff <- max(0,devdiff)
+            zz <- sign(xx - pw) * sqrt(devdiff)
             c(zz, mkpar(nvp, w, xx, ores$par), pp$beta(1))
         }
         
 ### FIXME: The starting values for the conditional optimization should
 ### be determined from recent starting values, not always the global
 ### optimum values.
-        
-### Can do this by taking the change in the other parameter values at
+
+### Can do this most easily by taking the most recent by taking the change in the other parameter values at
 ### the two last points and extrapolating.
+
         
         ## intermediate storage for pos. and neg. increments
         pres <- nres <- res
         ## assign one row, determined by inc. sign, from a small shift
-        nres[1, ] <- pres[2, ] <- zeta(pw * 1.01)
+        nres[1, ] <- pres[2, ] <- zeta(pw * 1.01, start=opt[seqnvp][-w])
         ## fill in the rest of the arrays and collapse them
         bres <-
-            as.data.frame(unique(rbind2(fillmat(pres,lowcut, zeta, wp1),
-                                        fillmat(nres,lowcut, zeta, wp1))))
+            as.data.frame(unique(rbind2(fillmat(pres,lowcut, upcut, zeta, wp1),
+                                        fillmat(nres,lowcut, upcut, zeta, wp1))))
         bres$.par <- names(opt)[w]
         ans[[w]] <- bres[order(bres[, wp1]), ]
         form[[3]] <- as.name(names(opt)[w])
-        
-        bakspl[[w]] <- backSpline(forspl[[w]] <- interpSpline(form, bres))
+       
+       ## FIXME: test for nearly-vertical slopes here ...
+        bakspl[[w]] <- try(backSpline(forspl[[w]] <- interpSpline(form, bres)),silent=TRUE)
+       if (inherits(bakspl[[w]],"try-error")) {
+         warning("non-monotonic profile")
+       }
+
     }
 
     offset.orig <- fitted at resp$offset
     for (j in seq_len(p)) {
+      if (verbose) cat("fixed-effect parameter ",j,":\n",sep="")
         pres <-            # intermediate results for pos. incr.
             nres <- res    # and negative increments
         est <- opt[nvp + j]
@@ -153,27 +211,32 @@
         ### FIXME Change this to use the deep copy and setWeights, setOffset, etc.
         rr <- new(Class=class(fitted at resp), y=fitted at resp$y)
         rr$setWeights(fitted at resp$weights)
-        fe.zeta <- function(fw) {
+        fe.zeta <- function(fw, start) {
+          ## (start parameter ignored)
             rr$setOffset(Xw * fw + offset.orig)
             rho <- as.environment(list(pp=pp1, resp=rr))
             parent.env(rho) <- parent.frame()
-            ores <- bobyqa(thopt, mkdevfun(rho, 0L), lower = fitted at lower)
+            ores <- bobyqa(thopt, mkdevfun(rho, 0L), lower = pmax(fitted at lower, -1.0),
+                           upper =  ifelse(fitted at lower==0,Inf,1.0))
             fv <- ores$fval
             sig <- sqrt((rr$wrss() + pp1$sqrL(1))/n)
             c(sign(fw - est) * sqrt(fv - base),
-              ores$par * sig, sig, mkpar(p, j, fw, pp1$beta(1)))
+              Cv_to_Sv(ores$par, sapply(fitted at cnms,length),s=sig),
+              ## ores$par * sig, sig,
+              mkpar(p, j, fw, pp1$beta(1)))
         }
         nres[1, ] <- pres[2, ] <- fe.zeta(est + delta * std)
         poff <- nvp + 1L + j
         bres <-
-            as.data.frame(unique(rbind2(fillmat(pres,-Inf, fe.zeta, poff),
-                                        fillmat(nres,-Inf, fe.zeta, poff))))
+            as.data.frame(unique(rbind2(fillmat(pres,-Inf, Inf, fe.zeta, poff),
+                                        fillmat(nres,-Inf, Inf, fe.zeta, poff))))
         thisnm <- names(fe.orig)[j]
         bres$.par <- thisnm
         ans[[thisnm]] <- bres[order(bres[, poff]), ]
         form[[3]] <- as.name(thisnm)
         bakspl[[thisnm]] <-
-            backSpline(forspl[[thisnm]] <- interpSpline(form, bres))
+            try(backSpline(forspl[[thisnm]] <- interpSpline(form, bres)),silent=TRUE)
+      if (inherits(bakspl[[thisnm]],"try-error")) warning("non-monotonic profile")
     }
     
     ans <- do.call(rbind, ans)
@@ -222,7 +285,9 @@
     sig <- sigma(fm)
     stdErr <- unname(coef(summary(fm))[,2])
     pp <- fm at pp$copy()
-    opt <- c(sig * pp$theta, sig)
+    ## opt <- c(pp$theta*sig, sig)
+    opt <- Cv_to_Sv(pp$theta, n=sapply(fm at cnms,length), s=sig)
+    ## FIXME: alternatively use/allow names as in getME(.,"theta") ?
     names(opt) <- c(sprintf(".sig%02d", seq_along(pp$theta)), ".sigma")
     opt <- c(opt, fixef(fm))
     resp <- fm at resp$copy()
@@ -233,10 +298,13 @@
         stopifnot(is.numeric(pars), length(pars) == np)
         ## Assumption:  all parameters, including the residual SD on SD-scale
         sigma <- pars[np]
-        .Call(lmer_Deviance, pp$ptr(), resp$ptr(), pars[-np]/sigma)
+        ## .Call(lmer_Deviance, pp$ptr(), resp$ptr(), pars[-np]/sigma)
+        ## convert from sdcor vector back to 'unscaled theta' 
+        thpars <- Sv_to_Cv(pars,n=sapply(fm at cnms,length),s=sigma)
+        .Call(lmer_Deviance, pp$ptr(), resp$ptr(), thpars)
         sigsq <- sigma^2
         pp$ldL2() + (resp$wrss() + pp$sqrL(1))/sigsq + n * log(2 * pi * sigsq)
-    }
+      }
     attr(ans, "optimum") <- opt         # w/ names()
     attr(ans, "basedev") <- basedev
     attr(ans, "thopt") <- pp$theta
@@ -315,6 +383,7 @@
                  lsegments(x, y, x, 0, ...)
                  lims <- current.panel.limits()$xlim
                  myspl <- spl[[panel.number()]]
+                 if (inherits(myspl,"try-error")) browser()
                  krange <- range(myspl$knots)
                  pr <- predict(myspl,
                                seq(max(lims[1], krange[1]),
@@ -596,7 +665,7 @@
             attr(x, "forward")[[nm]] <- interpSpline(form, fr)
             attr(x, "backward")[[nm]] <- backSpline(attr(x, "forward")[[nm]])
         }
-        ## eliminate rows the produced non-finite logs
+        ## eliminate rows that produced non-finite logs
         x <- x[apply(is.finite(as.matrix(x[, sigs])), 1, all),]
     }
     x
@@ -704,3 +773,4 @@
     }
     ans
 }
+

Added: pkg/lme4Eigen/R/vcconv.R
===================================================================
--- pkg/lme4Eigen/R/vcconv.R	                        (rev 0)
+++ pkg/lme4Eigen/R/vcconv.R	2012-03-13 16:42:14 UTC (rev 1649)
@@ -0,0 +1,117 @@
+## convert matrix list to concatenated vector of lower triangles
+mlist2vec <- function(L) {
+  n <- sapply(L,nrow)
+  r <- unlist(lapply(L,function(x) x[lower.tri(x,diag=TRUE)]))
+  attr(r,"clen") <- n
+  r
+}
+
+get_clen <- function(v,n=NULL) {
+  if (is.null(n)) {
+    if (is.null(n <- attr(v,"clen"))) {
+      ## single component
+      n <- (sqrt(8*length(v)+1)-1)/2
+    }
+  }
+  n
+}
+  
+## convert concatenated vector to matrix list
+## (lower triangle or symmetric)
+vec2mlist <- function(v,n=NULL,symm=TRUE) {
+  n <- get_clen(v,n)
+  s <- split(v,rep.int(seq_along(n),n*(n+1)/2))
+  m <- mapply(function(x,n0) {
+    m0 <- diag(nrow=n0)
+    m0[lower.tri(m0,diag=TRUE)] <- x
+    if (symm) m0[upper.tri(m0)] <- t(m0)[upper.tri(m0)]
+    m0
+  },s,n,SIMPLIFY=FALSE)
+  m
+}
+
+## convert 'sdcor' format -- diagonal = std dev, off-diag=cor
+##  to and from variance-covariance matrix
+sdcor2cov  <- function(m) {
+  sd <- diag(m)
+  diag(m) <- 1
+  m * outer(sd,sd)
+}
+
+cov2sdcor  <- function(m) {
+  v <- diag(m)
+  m1 <- cov2cor(m)
+  diag(m1) <- sqrt(v)
+  m1
+}
+
+dmult <- function(m,s) {
+  diag(m) <- diag(m)*s
+  m
+}
+
+safe_chol <- function(m) {
+  if (all(m==0)) return(m)
+  if (nrow(m)==1) return(sqrt(m))
+  if (all(dmult(m,0)==0)) {  ## diagonal
+    return(diag(sqrt(diag(m))))
+  }
+  cc <- suppressWarnings(chol(m,pivot=TRUE))
+  cc[,order(attr(cc,"pivot"))]
+  ## FIXME: pivot is here to deal with semidefinite cases: do we have
+  ##   to be more careful?  test!
+  ## does pivoting carry a performance cost?
+  ## FIXME: we 
+}
+
+Vv_to_Cv <- function(v,n=NULL,s=1) {
+  if (!missing(s)) {
+    v <- v[-length(v)]
+  }
+  r <- mlist2vec(lapply(vec2mlist(v,n,symm=TRUE),
+                        function(m) t(safe_chol(m/s^2))))
+  attr(r,"clen") <- get_clen(v,n)
+  r
+}
+Sv_to_Cv <- function(v,n=NULL,s=1) {
+  if (!missing(s)) {
+    v <- v[-length(v)]
+  }
+  r <-  mlist2vec(lapply(vec2mlist(v,n,symm=TRUE),
+                   function(m) t(safe_chol(sdcor2cov(m)/s^2))))
+  attr(r,"clen") <- get_clen(v,n)
+  r
+}
+
+Cv_to_Vv <- function(v,n=NULL,s=1) {
+  r <- mlist2vec(lapply(vec2mlist(v,n,symm=FALSE),
+                        function(m) tcrossprod(m)*s^2))
+  if (!missing(s)) r <- c(r,s^2)
+    attr(r,"clen") <- get_clen(v,n)
+  r
+}
+
+Cv_to_Sv <- function(v,n=NULL,s=1) {
+  r <- mlist2vec(lapply(vec2mlist(v,n,symm=FALSE),
+                   function(m) cov2sdcor(tcrossprod(m)*s^2)))
+  if (!missing(s)) r <- c(r,s)
+  attr(r,"clen") <- get_clen(v,n)
+  r
+}
+
+if (FALSE) {
+  cvec1 <- 1:6
+  Cv_to_Vv(cvec1)
+  Vv_to_Cv(Cv_to_Vv(0))
+  Cv_to_Vv(cvec1,s=2)
+  Sv_to_Cv(Cv_to_Sv(cvec1))
+  Vv_to_Cv(Cv_to_Vv(cvec1))
+  ## for length-1 matrices, Cv_to_Sv should be equivalent
+  ##   to multiplying Cv by sigma and appending sigma ....
+  clist2 <- list(matrix(1),matrix(2),matrix(3))
+  cvec2 <- mlist2vec(clist2)
+  all((cvec3 <- Cv_to_Sv(cvec2,s=2))==c(cvec2*2,2))
+  all(Sv_to_Cv(cvec3,n=rep(1,3),s=2)==
+      cvec3[-length(cvec3)]/cvec3[length(cvec3)])
+}
+

Modified: pkg/lme4Eigen/man/profile-methods.Rd
===================================================================
--- pkg/lme4Eigen/man/profile-methods.Rd	2012-03-13 16:16:49 UTC (rev 1648)
+++ pkg/lme4Eigen/man/profile-methods.Rd	2012-03-13 16:42:14 UTC (rev 1649)
@@ -5,22 +5,21 @@
 \title{Methods for profile() of [ng]lmer fitted models}
 \usage{
   \method{profile}{merMod} (fitted, alphamax = 0.01,
-    maxpts = 100, delta = cutoff/8, tr = 0, ...)
+    maxpts = 100, delta = cutoff/8, verbose=0, devtol=1e-9,
+startval="prev", ...)
 }
+%% FIXME: update from Roxygen!
 \arguments{
   \item{fitted}{a fitted model, e.g., the result of
   \code{\link{lmer}(..)}.}
-
   \item{alphamax}{used when \code{delta} is unspecified, as
   probability ... to compute \code{delta} ...}
-
   \item{maxpts}{...}
-
   \item{delta}{...}
-
-  \item{tr}{...}
-
-  \item{\dots}{potential further arguments for
+  \item{verbose}{...}
+  \item{devtol}{...}
+  \item{startval}{...}
+   \item{\dots}{potential further arguments for
   \code{profile} methods.}
 }
 \description{

Added: pkg/lme4Eigen/tests/optimizer.R
===================================================================
--- pkg/lme4Eigen/tests/optimizer.R	                        (rev 0)
+++ pkg/lme4Eigen/tests/optimizer.R	2012-03-13 16:42:14 UTC (rev 1649)
@@ -0,0 +1,37 @@
+library(lme4Eigen)
+## should be able to run any example with any bounds-constrained optimizer ...
+
+## these are the only ones we know of
+fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)  ## Nelder_Mead
+fm1B <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy,
+            optimizer="bobyqa") 
+require(optimx)
+fm1C <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy,
+             optimizer="optimx", control=list(method="nlminb"))
+fm1D <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy,
+             optimizer="optimx", control=list(method="L-BFGS-B"))
+all.equal(fixef(fm1),fixef(fm1B),fixef(fm1C),fixef(fm1D))
+
+gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
+                   data = cbpp, family = binomial, tolPwrss=1e-13)
+gm1B <- update(gm1,optimizer="bobyqa")
+
+if (FALSE) {
+  gm1C <- update(gm1,optimizer="optimx",control=list(method="nlminb"))
+  gm1D <- update(gm1,optimizer="optimx",control=list(method="L-BFGS-B"))
+
+  ##  these fail because the initial optimization step finds the (true)
+  ## maximum of the nAGQ-0 deviance function at zero; then nlminb and L-BFGS-B
+  ## can't get off the boundary ... don't know how well they would do even if
+  ## they could
+
+  gm1fun <- update(gm1,devFunOnly=TRUE)
+  gm1fun0 <- update(gm1,devFunOnly=TRUE,nAGQ=0)
+  svec <- seq(0.01,3,by=0.01)
+  dvec <- sapply(svec,gm1fun0)
+  plot(svec,dvec,type="l")
+  abline(v=1)
+  n1 <- nlminb(start=1,objective=gm1fun0,control=list(),lower=0)  ## false convergence=code 1
+  b1 <- bobyqa(par=1,fn=gm1fun0,control=list(),lower=0)
+}
+