[Genabel-commits] r728 - in pkg/GenABEL: . R inst/unitTests man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed May 18 19:59:52 CEST 2011
Author: yurii
Date: 2011-05-18 19:59:52 +0200 (Wed, 18 May 2011)
New Revision: 728
Added:
pkg/GenABEL/inst/unitTests/runit.polylik.R
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/DESCRIPTION
pkg/GenABEL/R/polygenic.R
pkg/GenABEL/R/polygenic_hglm.R
pkg/GenABEL/R/polylik.R
pkg/GenABEL/R/zzz.R
pkg/GenABEL/man/polygenic.Rd
Log:
improved 'polygenic' based on the idea of G. Svischeva (also RUnit test)
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/CHANGES.LOG 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,3 +1,18 @@
+*** v. 1.6-8 (2011.05.18)
+
+Updated 'polygenic' with use of 'polylik_eigen' developed by
+Gulnara Svischeva. Now 'polygenic' works MUCH faster. The
+main advantage of Gulnara's method is that time to compute
+the likelihood function is approximately linear with number
+of subjects. In that, relative speed-up grows with sample
+size growth, e.g. for 100 IDs, it is x15, x40 for 200,
+x60 for 400, and xX for 800 individuals (using two fixed effect
+covariates, see runit.polylik.R). Also added RUnit test to check
+consistency of results based on old 'polylik' and new
+'polylik_eigen'.
+
+Upgrade version number
+
*** v. 1.6-7 (2011.05.17)
Submitted 1.6-7, based on r727, to CRAN
@@ -4,8 +19,6 @@
Deleting some pdf from 'doc' and compressing 'data'
-Submitted 1.6-7, based on r718, to CRAN
-
Fixed 'technical' bug [1398] (related to changes in R 2.14):
https://r-forge.r-project.org/tracker/index.php?func=detail&aid=1398&group_id=505&atid=2058
Modified: pkg/GenABEL/DESCRIPTION
===================================================================
--- pkg/GenABEL/DESCRIPTION 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/DESCRIPTION 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,8 +1,8 @@
Package: GenABEL
Type: Package
Title: genome-wide SNP association analysis
-Version: 1.6-7
-Date: 2011-05-17
+Version: 1.6-8
+Date: 2011-05-18
Author: GenABEL developers
Maintainer: GenABEL developers <genabel-devel at r-forge.r-project.org>
Depends: R (>= 2.10.0), methods, MASS
Modified: pkg/GenABEL/R/polygenic.R
===================================================================
--- pkg/GenABEL/R/polygenic.R 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/R/polygenic.R 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,4 +1,4 @@
-#' Estimation of polygenic model
+#' Estimation of polygenic model
#'
#' This function maximises the likelihood of the data under polygenic
#' model with covariates an reports twice negative maximum likelihood estimates
@@ -81,6 +81,8 @@
#' @param maxdiffgls max difference allowed in fgls checks
#' @param patchBasedOnFGLS if FGLS checks not passed, 'patch' fixed
#' effect estimates based on FGLS expectation
+#' @param llfun function to compute likelihood (default 'polylik_eigen', also
+#' avalable -- but not recommeneded -- 'polylik')
#' @param ... Optional arguments to be passed to \code{\link{nlm}} or (\code{\link{optim}})
#' minimisation function
#'
@@ -119,7 +121,7 @@
#' Amin N, van Duijn CM, Aulchenko YS. A genomic background based method for
#' association analysis in related individuals. PLoS ONE. 2007 Dec 5;2(12):e1274.
#'
-#' @author Yurii Aulchenko
+#' @author Yurii Aulchenko, Gulnara Svischeva
#'
#' @note
#' Presence of twins may complicate your analysis. Check kinship matrix for
@@ -164,7 +166,8 @@
function(formula,kinship.matrix,data,fixh2,starth2=0.3,trait.type="gaussian",
opt.method="nlm",scaleh2=1,quiet=FALSE,
steptol=1e-8, gradtol = 1e-8, optimbou = 8,
- fglschecks=TRUE,maxnfgls=8,maxdiffgls=1e-4, patchBasedOnFGLS = TRUE, ...) {
+ fglschecks=TRUE,maxnfgls=8,maxdiffgls=1e-4, patchBasedOnFGLS = TRUE,
+ llfun = "polylik_eigen", ...) {
if (!missing(data)) if (is(data,"gwaa.data"))
{
checkphengen(data)
@@ -184,6 +187,9 @@
out <- paste("opt.method argument should be one of",optargs,"\n")
stop(out)
}
+ if (llfun == "polylik_eigen") llFUN <- polylik_eigen
+ else if (llfun == "polylik") llFUN <- polylik
+ else stop("llfun should be 'polylik' or 'polylik_eigen'")
if (!missing(data)) attach(data,pos=2,warn.conflicts=FALSE)
if (is(formula,"formula")) {
@@ -237,7 +243,14 @@
tmp <- t(relmat)
relmat[upper.tri(relmat)] <- tmp[upper.tri(tmp)]
rm(tmp);gc()
- eigres <- eigen(ginv(relmat),symm=TRUE)
+ if (llfun=="polylik") eigres <- eigen(ginv(relmat),symm=TRUE)
+ else if (llfun=="polylik_eigen") {
+ eigres <- eigen(relmat,symm=TRUE)
+ if (any(eigres$values<0)) {
+ #eigres$values <- abs(eigres$values)
+ warning("some eigenvalues <=0, taking ABS for det; try option llfun='polylik'",immediate.=TRUE)
+ }
+ } else stop("cannot be here...")
if (!quiet) {
cat("LM estimates of fixed parameters:\n")
print(iniest);
@@ -249,21 +262,21 @@
convFGLS <- NULL;
if (!missing(fixh2)) {
- startlik <- polylik(c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,
- ervec=eigres$vec,fixh2=(fixh2/scaleh2),trait.type=trait.type,
+ startlik <- llFUN(c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,
+ eigenRes=eigres,fixh2=(fixh2/scaleh2),trait.type=trait.type,
scaleh2=scaleh2)
if (opt.method=="nlm") {
prnlev <- 0; if (!quiet) prnlev <- 2;
- h2an <- nlm(polylik,p=c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,
+ h2an <- nlm(llFUN,p=c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,
fixh2=(fixh2/scaleh2),trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,
print.level=prnlev,steptol=steptol,gradtol=gradtol,...)
-# h2an <- nlm(polylik,p=c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,fixh2=(fixh2/scaleh2),trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,...)
+# h2an <- nlm(llFUN,p=c(iniest,tvar),y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,fixh2=(fixh2/scaleh2),trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,...)
} else {
lower <- c(iniest-inierr*optimbou,1.e-4)
upper <- c(iniest+inierr*optimbou,1)
cntrl <- list(); if (!quiet) cntrl <- list(trace=6,REPORT=1)
- h2an <- optim(fn=polylik,par=c(iniest,tvar),method="L-BFGS-B",lower=lower,upper=upper,
- y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,fixh2=(fixh2),trait.type=trait.type,
+ h2an <- optim(fn=llFUN,par=c(iniest,tvar),method="L-BFGS-B",lower=lower,upper=upper,
+ y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,fixh2=(fixh2),trait.type=trait.type,
control=cntrl,scaleh2=1,...)
}
} else {
@@ -280,15 +293,15 @@
if (opt.method=="nlm") parsave <- c(iniest,starth2/scaleh2,tvar)
else parsave <- c(iniest,starth2,tvar)
}
- startlik<-polylik(parsave,y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,
+ startlik<-llFUN(parsave,y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,
trait.type=trait.type,scaleh2=scaleh2)
if (opt.method=="nlm") {
#print(parsave)
prnlev <- 0; if (!quiet) prnlev <- 2;
- h2an <- nlm(polylik,p=parsave,y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,
+ h2an <- nlm(llFUN,p=parsave,y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,
trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,
print.level=prnlev,steptol=steptol,gradtol=gradtol,...)
-# h2an <- nlm(polylik,p=c(iniest,(starth2/scaleh2),tvar),y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,...)
+# h2an <- nlm(llFUN,p=c(iniest,(starth2/scaleh2),tvar),y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,trait.type=trait.type,startlik=startlik,scaleh2=scaleh2,...)
parsave <- h2an$estimate
} else {
#print(parsave)
@@ -298,7 +311,9 @@
#print(lower)
#print(upper)
cntrl <- list(); if (!quiet) cntrl <- list(trace=6,REPORT=1)
- h2an <- optim(fn=polylik,par=parsave,method="L-BFGS-B",lower=lower,upper=upper,y=y,desmat=desmat,relmat=relmat,ervec=eigres$vec,trait.type=trait.type,control=cntrl,scaleh2=1,...)
+ h2an <- optim(fn=llFUN,par=parsave,method="L-BFGS-B",lower=lower,upper=upper,
+ y=y,desmat=desmat,relmat=relmat,eigenRes=eigres,trait.type=trait.type,
+ control=cntrl,scaleh2=1,...)
parsave <- h2an$par
}
#print(parsave)
Modified: pkg/GenABEL/R/polygenic_hglm.R
===================================================================
--- pkg/GenABEL/R/polygenic_hglm.R 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/R/polygenic_hglm.R 2011-05-18 17:59:52 UTC (rev 728)
@@ -98,26 +98,39 @@
#'
#' @keywords htest
#'
-"polygenic_hglm" <- function(formula, kinship.matrix, data, family = gaussian(), conv = 1e-6, maxit = 100, ...)
+"polygenic_hglm" <- function(formula, kinship.matrix, data = NULL,
+ family = gaussian(), conv = 1e-6, maxit = 100, ...)
{
if (!require(hglm))
stop("this function requires 'hglm' package to be installed")
- if (!missing(data)) if (is(data,"gwaa.data"))
+ if (!is.null(data)) {
+ if (is(data,"gwaa.data"))
{
-# checkphengen(data)
+ checkphengen(data)
data <- phdata(data)
}
- if (!missing(data))
if (!is(data,"data.frame"))
stop("data should be of gwaa.data or data.frame class")
- allids <- data$id
+ }
relmat <- kinship.matrix
relmat[upper.tri(relmat)] <- t(relmat)[upper.tri(relmat)]
mf <- model.frame(formula,data,na.action=na.omit,drop.unused.levels=TRUE)
y <- model.response(mf)
desmat <- model.matrix(formula,mf)
- phids <- rownames(data)[rownames(data) %in% rownames(mf)]
+
+ if (is.null(data)) {
+ if (!is.null(rownames(kinship.matrix))) {
+ allids <- rownames(kinship.matrix)
+ } else {
+ allids <- as.character(c(1:length(y)))
+ warning("no ID names identified, using integers")
+ }
+ } else {
+ allids <- data$id
+ }
+
+ phids <- allids[allids %in% rownames(mf)]
mids <- (allids %in% phids)
relmat <- relmat[mids,mids]
relmat <- relmat*2.0
Modified: pkg/GenABEL/R/polylik.R
===================================================================
--- pkg/GenABEL/R/polylik.R 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/R/polylik.R 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,4 +1,4 @@
-"polylik" <- function(coeffs,y,desmat,relmat,ervec,fixh2,trait.type,
+"polylik" <- function(coeffs,y,desmat,relmat,eigenRes,fixh2,trait.type,
startlik=.Machine$double.xmax-2,scaleh2=1) {
#print(coeffs)
if (!missing(fixh2)) {
@@ -19,12 +19,34 @@
} else {
qt <- y - desmat %*% fixeff
}
- es <- 1./diag(t(ervec) %*% (sigma) %*% ervec)
- ginvsig <- ervec %*% diag(es,ncol=length(qt)) %*% t(ervec)
+ es <- 1./diag(t(eigenRes$vectors) %*% (sigma) %*% eigenRes$vectors)
+ ginvsig <- eigenRes$vectors %*% diag(es,ncol=length(qt)) %*% t(eigenRes$vectors)
a <- determinant(sigma,logarithm=T)
a <- a$modulus * a$sign
b <- (t(qt) %*% ginvsig %*% qt)
loglik <- a+b
+ loglik[1,1]
+}
+"polylik_eigen" <- function(coeffs,y,desmat,relmat,eigenRes,fixh2,trait.type,
+ startlik=.Machine$double.xmax-2,scaleh2=1) {
+ if (!missing(fixh2)) {
+ h2 <- fixh2
+ fixeff <- coeffs[1:(length(coeffs)-1)]
+ } else {
+ h2 <- coeffs[(length(coeffs)-1)] # coeffs[(length(coeffs))]
+ fixeff <- coeffs[1:(length(coeffs)-2)]
+ }
+ h2 <- h2*scaleh2
+ tvar <- coeffs[(length(coeffs))]
+ if (h2<1e-8 || h2>(1-1e-8) || tvar<1e-6) return(startlik+1)
+ if (trait.type=="binomial") {
+ expb <- exp(desmat %*% fixeff)
+ qt <- y - expb/(1.+expb)
+ } else {
+ qt <- y - desmat %*% fixeff
+ }
+ y_0 <- as.vector(t(eigenRes$vectors) %*% qt)
+ eigenvalueCOV <- tvar*(eigenRes$values*h2+1.-h2)
+ loglik <- sum(log(abs(eigenvalueCOV))) + sum(y_0^2/eigenvalueCOV)
loglik
}
-
Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/R/zzz.R 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,6 +1,6 @@
.onLoad <- function(lib, pkg) {
- GenABEL.version <- "1.6-7"
- cat("GenABEL v.",GenABEL.version,"(May 17, 2011) loaded\n")
+ GenABEL.version <- "1.6-8"
+ cat("GenABEL v.",GenABEL.version,"(May 18, 2011) loaded\n")
# check for updates and news
address <- c(
Added: pkg/GenABEL/inst/unitTests/runit.polylik.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.polylik.R (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.polylik.R 2011-05-18 17:59:52 UTC (rev 728)
@@ -0,0 +1,50 @@
+### --- Test setup ---
+#
+# regression tests
+#
+
+if(FALSE) {
+ ## Not really needed, but can be handy when writing tests
+ library(RUnit)
+ library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.polylik <- function()
+{
+ data(ge03d2.clean)
+ df <- ge03d2.clean[1:150,autosomal(ge03d2.clean)]
+ gkin <- ibs(df,w="freq")
+ set.seed(1)
+ phdata(df)$height <- phdata(df)$height+rnorm(nids(df),sd=.1*sd(phdata(df)$height,na.rm=TRUE))
+ formula <- height ~ sex + age
+ mf <- model.frame(formula,phdata(df),na.action=na.omit,drop.unused.levels=TRUE)
+ y <- model.response(mf)
+ desmat <- model.matrix(formula,mf)
+ phids <- rownames(phdata(df))[rownames(phdata(df)) %in% rownames(mf)]
+ relmat <- gkin; relmat[upper.tri(relmat)] <- t(relmat)[upper.tri(relmat)];
+ relmat <- 2.*relmat; relmat <- relmat[phids,phids]
+ eigenRO <- eigen(ginv(relmat),symm=TRUE)
+ eigenRes <- eigen(relmat,symm=TRUE)
+ tO <- system.time(h2htOld <- polygenic(formula,kin=gkin,df,llfun="polylik"))
+ tN <- system.time(h2htNew <- polygenic(formula,kin=gkin,df,llfun="polylik_eigen"))
+ tO; tN; tO/tN;
+ print(h2htNew$h2an)
+ print(h2htOld$h2an)
+ checkEquals(h2htNew$h2an$est,h2htOld$h2an$est)
+ checkEquals(h2htNew$h2an$min,h2htOld$h2an$min)
+ checkEquals(h2htNew$residualY,h2htOld$residualY)
+ checkEquals(h2htNew$pgresidualY,h2htOld$pgresidualY)
+ checkEquals(h2htNew$InvSigma,h2htOld$InvSigma)
+ checkEquals(h2htNew$measuredIDs,h2htOld$measuredIDs)
+}
\ No newline at end of file
Property changes on: pkg/GenABEL/inst/unitTests/runit.polylik.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/GenABEL/man/polygenic.Rd
===================================================================
--- pkg/GenABEL/man/polygenic.Rd 2011-05-18 10:57:21 UTC (rev 727)
+++ pkg/GenABEL/man/polygenic.Rd 2011-05-18 17:59:52 UTC (rev 728)
@@ -1,10 +1,11 @@
-\name{polygenic}
+\name{polygenic}
\alias{polygenic}
\title{Estimation of polygenic model...}
\usage{polygenic(formula, kinship.matrix, data, fixh2, starth2=0.3,
trait.type="gaussian", opt.method="nlm", scaleh2=1, quiet=FALSE,
steptol=1e-08, gradtol=1e-08, optimbou=8, fglschecks=TRUE,
- maxnfgls=8, maxdiffgls=1e-04, patchBasedOnFGLS=TRUE, ...)}
+ maxnfgls=8, maxdiffgls=1e-04, patchBasedOnFGLS=TRUE,
+ llfun="polylik_eigen", ...)}
\description{Estimation of polygenic model}
\details{This function maximises the likelihood of the data under polygenic
model with covariates an reports twice negative maximum likelihood estimates
@@ -81,7 +82,7 @@
Amin N, van Duijn CM, Aulchenko YS. A genomic background based method for
association analysis in related individuals. PLoS ONE. 2007 Dec 5;2(12):e1274.}
-\author{Yurii Aulchenko}
+\author{Yurii Aulchenko, Gulnara Svischeva}
\note{Presence of twins may complicate your analysis. Check kinship matrix for
singularities, or rather use \code{\link{check.marker}} for identification
of twin samples. Take special care in interpretation.
@@ -130,6 +131,8 @@
\item{maxdiffgls}{max difference allowed in fgls checks}
\item{patchBasedOnFGLS}{if FGLS checks not passed, 'patch' fixed
effect estimates based on FGLS expectation}
+\item{llfun}{function to compute likelihood (default 'polylik_eigen', also
+avalable -- but not recommeneded -- 'polylik')}
\item{...}{Optional arguments to be passed to \code{\link{nlm}} or (\code{\link{optim}})
minimisation function}}
\examples{# note that procedure runs on CLEAN data
More information about the Genabel-commits
mailing list