[Genabel-commits] r709 - in pkg/GenABEL: . R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Apr 18 11:12:32 CEST 2011
Author: yurii
Date: 2011-04-18 11:12:31 +0200 (Mon, 18 Apr 2011)
New Revision: 709
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/R/check.marker.R
pkg/GenABEL/R/polygenic_hglm.R
Log:
update Xia's polygenic_hglm; updated check.marker, added warning if no. Y-markers is < 10
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-04-14 22:40:26 UTC (rev 708)
+++ pkg/GenABEL/CHANGES.LOG 2011-04-18 09:12:31 UTC (rev 709)
@@ -1,5 +1,10 @@
-*** v. 1.6-6 (2011.03.31)
+*** v. 1.6-6 (2011.04.18)
+Added Xia Shen's procedure 'polygenic_hglm'. Features: quick
+convergence, standard errors for fixed effects.
+
+Updated check.marker, added warning if no. Y-markers is < 10
+
Applied the patch of Nicola Pirastu
http://lists.r-forge.r-project.org/pipermail/genabel-devel/2011-March/000182.html
to descriptives.trait. Added RUnit regression tests, updated
@@ -23,9 +28,6 @@
option 'mapHasHeaderLine' to convert.snp.ped and updated documentation
(resolving feature request #1317).
-With suggestion and guidance from Xia Shen, added procedure 'polygenic_hglm'.
-At the moment experimental.
-
***** v. 1.6-5 (2011.02.07)
Added '#include <cstdarg>' to iterator.cpp to solve
Modified: pkg/GenABEL/R/check.marker.R
===================================================================
--- pkg/GenABEL/R/check.marker.R 2011-04-14 22:40:26 UTC (rev 708)
+++ pkg/GenABEL/R/check.marker.R 2011-04-18 09:12:31 UTC (rev 709)
@@ -1,12 +1,12 @@
"check.marker" <-
-function(data, snpsubset, idsubset,
- callrate=0.95,perid.call=0.95, extr.call = 0.1, extr.perid.call = 0.1,
- het.fdr=0.01, ibs.threshold = 0.95, ibs.mrk = 2000, ibs.exclude="lower",
- maf, p.level=-1,
- fdrate = 0.2, odds = 1000, hweidsubset, redundant="no", minconcordance = 2.0,
- qoption="bh95",imphetasmissing=TRUE,XXY.call=0.8,
- intermediateXF = c(0.5,0.5)) {
-
+ function(data, snpsubset, idsubset,
+ callrate=0.95,perid.call=0.95, extr.call = 0.1, extr.perid.call = 0.1,
+ het.fdr=0.01, ibs.threshold = 0.95, ibs.mrk = 2000, ibs.exclude="lower",
+ maf, p.level=-1,
+ fdrate = 0.2, odds = 1000, hweidsubset, redundant="no", minconcordance = 2.0,
+ qoption="bh95",imphetasmissing=TRUE,XXY.call=0.8,
+ intermediateXF = c(0.5,0.5)) {
+
if (is(data,"gwaa.data")) data <- data at gtdata
if (!is(data,"snp.data")) stop("data argument should be of type gwaa.data or snp.data");
if (!missing(snpsubset)) data <- data[,snpsubset]
@@ -21,18 +21,18 @@
snam <- data at snpnames
sids <- data at idnames
smap <- data at map
- schr <- data at chromosome
-
+ schr <- chromosome(data)
+
cat("Excluding people/markers with extremely low call rate...\n")
- cat(data at nsnps,"markers and",data at nids,"people in total\n")
+ cat(nsnps(data),"markers and",nids(data),"people in total\n")
ts <- perid.summary(data)
- out$idnocall <- rownames(ts[ts[,"CallPP"]<extr.perid.call,])
+ out$idnocall <- rownames(ts[which(ts[,"CallPP"]<extr.perid.call),])
out$idok <- data at idnames[!(data at idnames %in% out$idnocall)]
cat(length(out$idnocall),"people excluded because of call rate <",extr.perid.call,"\n")
ts <- summary(data)
- if (any(as.character(data at chromosome) == "Y") && any(data at male==1)) {
- tsY <- summary(data[which(data at male==1),(which(as.character(data at chromosome) == "Y"))])
- ts[(which(as.character(data at chromosome) == "Y")),] <- tsY
+ if (any(chromosome(data) == "Y") && any(male(data)==1)) {
+ tsY <- summary(data[which(male(data)==1),(which(chromosome(data) == "Y"))])
+ ts[(which(chromosome(data) == "Y")),] <- tsY
}
out$nocall <- rownames(ts[ts[,"CallRate"]<extr.call,])
out$snpok <- data at snpnames[!(data at snpnames %in% out$nocall)]
@@ -43,7 +43,7 @@
if (!missing(hweidsubset)) hweidsubset <- hweidsubset[hweidsubset %in% out$idok]
updat <- 0
- if (any(data at chromosome=="X")) {
+ if (any(chromosome(data)=="X")) {
cat("\nRunning sex chromosome checks...\n")
#maleInd <- male(data)
@@ -72,7 +72,7 @@
# cat("cut off Fmale =",Fmale,"\n")
# stop("Fmale not in [0,1]")
#}
- Ffemale <- intermediateXF[1]
+ Ffemale <- intermediateXF[1]
Fmale <- intermediateXF[2]
if (Ffemale>Fmale) stop("Ffemale>Fmale")
if (Ffemale > 1 | Ffemale < 0) {
@@ -84,7 +84,7 @@
stop("Fmale not in [0,1]")
}
- out.nxt <- Xcheck(data[,data at chromosome=="X"],Pgte=0.001,Pssw=0.01,
+ out.nxt <- Xcheck(data[,chromosome(data)=="X"],Pgte=0.001,Pssw=0.01,
Pmsw=0.01,odds=odds,Ffemale=Ffemale,Fmale=Fmale,tabonly=F)
nxerr <- dim(out.nxt$Xerrtab)[1]
if (!length(nxerr)) nxerr <- 0
@@ -95,18 +95,24 @@
cat(length(out.nxt$otherSexErr)," people have intermediate inbreeding (",Ffemale," > F > ",Fmale,")\n",sep="")
out <- update.check.marker(out,out.nxt)
updat <- 1
- out.nxt <- Xcheck(data[out$idok,out$snpok[out$snpok %in% data at snpnames[data at chromosome=="X"]]],Pgte=0.001,Pssw=0.01,Pmsw=0.01,odds=odds,tabonly=T)
+ out.nxt <- Xcheck(data[out$idok,out$snpok[out$snpok %in% data at snpnames[chromosome(data)=="X"]]],Pgte=0.001,Pssw=0.01,Pmsw=0.01,odds=odds,tabonly=T)
nxerr <- dim(out.nxt$Xerrtab)[1]
if (!length(nxerr)) nxerr <- 0
cat("If these people/markers are removed,",nxerr,"heterozygous male genotypes are left\n")
if (nxerr & imphetasmissing) cat("... these will be considered missing in analysis.\n")
if (nxerr) cat("... Use Xfix() to fix these problems.\n")
}
-
+
# XXY checks
- if (any(data at chromosome=="Y")) {
- totest <- unique(c(out$isfemale,intersect(data at idnames[data at male==0],out$idok)))
- Ytab <- perid.summary(data[totest,(as.character(data at chromosome) == "Y")])
+ if (any(chromosome(data)=="Y")) {
+ nYsnps <- length(which(chromosome(data)=="Y"))
+ if (nYsnps<10) {
+ message <- paste("The number of Y-chromosome SNPs is low (",nYsnps,
+ "). Consider ignoring Y-checks by setting 'XXY.call' to 2",sep="")
+ warning(message,immediate. = TRUE)
+ }
+ totest <- unique(c(out$isfemale,intersect(data at idnames[male(data)==0],out$idok)))
+ Ytab <- perid.summary(data[totest,(chromosome(data) == "Y")])
Ytab <- Ytab[Ytab$NoMeasured>0,]
cat("\n",sum(Ytab$NoMeasured),"possibly female Y genotypes identified")
Ytabexc <- Ytab[Ytab$CallPP>=XXY.call,]
@@ -125,7 +131,7 @@
cat("\nNone of these people excluded based on Y-threshold of",XXY.call,"\n")
}
}
-
+
if (updat) {
data <- data[out$idok,out$snpok]
gc(verbose=FALSE)
@@ -133,25 +139,25 @@
}
# out<-list(qc=out,data=data)
# return(out)
-
+
ts <- summary(data)
- if (any(data at chromosome=="Y")) {
+ if (any(chromosome(data)=="Y")) {
cat("\nChecking Y-chromsome heterozygous genotypes... ");
- sumYerr <- sum(ts[data at chromosome=="Y","P.12"],na.rm=T)
- cat(sumYerr,"(",100*sumYerr/sum(ts[data at chromosome=="Y","NoMeasured"],na.rm=T),"%) found.\n");
+ sumYerr <- sum(ts[chromosome(data)=="Y","P.12"],na.rm=T)
+ cat(sumYerr,"(",100*sumYerr/sum(ts[chromosome(data)=="Y","NoMeasured"],na.rm=T),"%) found.\n");
if (sumYerr & imphetasmissing) cat("... these will be considered missing in analysis.\n")
if (sumYerr) cat("... Use Xfix() to fix these problems.\n")
}
- if (any(data at chromosome=="mt")) {
+ if (any(chromosome(data)=="mt")) {
cat("\nChecking mtDNA heterozygous genotypes... ");
- sumMTerr <- sum(ts[data at chromosome=="mt","P.12"],na.rm=T)
- cat(sumMTerr,"(",100*sumMTerr/sum(ts[data at chromosome=="mt","NoMeasured"],na.rm=T),"%) found.\n");
+ sumMTerr <- sum(ts[chromosome(data)=="mt","P.12"],na.rm=T)
+ cat(sumMTerr,"(",100*sumMTerr/sum(ts[chromosome(data)=="mt","NoMeasured"],na.rm=T),"%) found.\n");
if (sumMTerr & imphetasmissing) cat("... these will be considered missing in analysis.\n")
if (sumMTerr) cat("... Use Xfix() to fix these problems.\n")
}
rm(ts)
gc(verbose=FALSE)
-
+
if (imphetasmissing & (data at nsnps != length(autosomal(data)))) {
cat("\n")
data <- Xfix(data)
@@ -159,18 +165,18 @@
cat("\n")
}
-
+
# first round -- redundancy
run <- 1
passed <- 0
while (!passed) {
cat("\nRUN",run,"\n");
out.nxt <- check.marker.internal(data=data,
- callrate=callrate,perid.call=perid.call,
- het.fdr=het.fdr, ibs.threshold = ibs.threshold, ibs.mrk = ibs.mrk, ibs.exclude=ibs.exclude,
- maf = maf, p.level= p.level,
- fdrate = fdrate, hweidsubset = hweidsubset, redundant = "no", minconcordance = 2.0,
- qoption = qoption, imphetasmissing = imphetasmissing)
+ callrate=callrate,perid.call=perid.call,
+ het.fdr=het.fdr, ibs.threshold = ibs.threshold, ibs.mrk = ibs.mrk, ibs.exclude=ibs.exclude,
+ maf = maf, p.level= p.level,
+ fdrate = fdrate, hweidsubset = hweidsubset, redundant = "no", minconcordance = 2.0,
+ qoption = qoption, imphetasmissing = imphetasmissing)
if (length(out$snpok) == length(out.nxt$snpok) && length(out$idok) == length(out.nxt$idok))
if (all(out$snpok == out.nxt$snpok) && all(out$idok == out.nxt$idok))
passed <- 1
@@ -185,11 +191,11 @@
if (redundant != "no") {
cat("\nCHECK REDUNDANCY\n");
out.nxt <- check.marker.internal(data=data,
- callrate=callrate,perid.call=perid.call,
- het.fdr=het.fdr, ibs.threshold = ibs.threshold, ibs.mrk = ibs.mrk, ibs.exclude=ibs.exclude,
- maf = maf, p.level= p.level,
- fdrate = fdrate, hweidsubset = hweidsubset, redundant = redundant, minconcordance = minconcordance,
- qoption = qoption, imphetasmissing = imphetasmissing)
+ callrate=callrate,perid.call=perid.call,
+ het.fdr=het.fdr, ibs.threshold = ibs.threshold, ibs.mrk = ibs.mrk, ibs.exclude=ibs.exclude,
+ maf = maf, p.level= p.level,
+ fdrate = fdrate, hweidsubset = hweidsubset, redundant = redundant, minconcordance = minconcordance,
+ qoption = qoption, imphetasmissing = imphetasmissing)
out <- update.check.marker(out,out.nxt)
}
out$call$name <- snam
Modified: pkg/GenABEL/R/polygenic_hglm.R
===================================================================
--- pkg/GenABEL/R/polygenic_hglm.R 2011-04-14 22:40:26 UTC (rev 708)
+++ pkg/GenABEL/R/polygenic_hglm.R 2011-04-18 09:12:31 UTC (rev 709)
@@ -1,9 +1,7 @@
#' Estimation of polygenic model
#'
-#' Estimation of polygenic model using hierarchical
-#' GLM (hglm package)
-#' Tell when REML is used, what otherwise;
-#' what it is doing, how to do tests, add more examples, references, etc.
+#' Estimation of polygenic model using a hierarchical generalized linear model
+#' (HGLM; Lee and Nelder 1996. \code{hglm} package; Ronnegard et al. 2010).
#'
#' @param formula Formula describing fixed effects to be used in analysis, e.g.
#' y ~ a + b means that outcome (y) depends on two covariates, a and b.
@@ -12,18 +10,36 @@
#' @param kinship.matrix Kinship matrix, as provided by e.g. ibs(,weight="freq"),
#' or estimated outside of GenABEL from pedigree data.
#' @param data An (optional) object of \code{\link{gwaa.data-class}} or a data frame with
-#' outcome and covariates
+#' outcome and covariates.
#' @param family a description of the error distribution and link function to be
#' used in the mean part of the model (see \code{\link{family}} for details of
-#' family functions)
-#' @param conv 'conv' parameter of 'hglm' (stricter then default)
-#' @param maxit 'maxit' parameter of 'hglm' (stricter then default)
-#' @param ... other parameters passed to 'hglm' call
+#' family functions).
+#' @param conv 'conv' parameter of \code{hglm} (stricter than default, for great precision, use 1e-8).
+#' @param maxit 'maxit' parameter of \code{hglm} (stricter than default).
+#' @param ... other parameters passed to \code{hglm} call
#'
#' @author Xia Shen, Yurii Aulchenko
#'
+#' @details
+#'
+#' The algorithm gives extended quasi-likelihood (EQL) estimates
+#' for restricted maximum likelihood (REML) (Ronnegard et al. 2010).
+#' Implemented is the inter-connected GLM interpretation of HGLM (Lee and Nelder 2001).
+#' Testing on fixed and random effects estimates are directly done using inter-connected
+#' GLMs. Testing on dispersion parameters (variance components) can be done by extracting
+#' the profile likelihood function value of REML.
+#'
#' @references
-#' need to put reference here
+#'
+#' Ronnegard, L, Shen, X and Alam, M (2010). hglm: A Package For Fitting
+#' Hierarchical Generalized Linear Models. \emph{The R Journal}, \bold{2}(2), 20-28.
+#'
+#' Lee, Y and Nelder JA (2001). Hierarchical generalised linear models:
+#' A synthesis of generalised linear models, random-effect models
+#' and structured dispersions. \emph{Biometrika}, \bold{88}(4), 987-1006.
+#'
+#' Lee, Y and Nelder JA (1996). Hierarchical Generalized Linear Models.
+#' \emph{Journal of the Royal Statistical Society. Series B}, \bold{58}(4), 619-678.
#'
#' @seealso
#' \code{\link{polygenic}},
@@ -35,22 +51,56 @@
#' df <- ge03d2ex.clean[,autosomal(ge03d2ex.clean)]
#' gkin <- ibs(df,w="freq")
#'
-#' # for quantitative traits
-#' h2ht <- polygenic_hglm(height ~ sex + age,kin=gkin,df)
-#' # estimate of heritability
+#' # ----- for quantitative traits
+#' h2ht <- polygenic_hglm(height ~ sex + age, kin = gkin, df)
+#' # ----- estimate of heritability
#' h2ht$esth2
-#' # other parameters
+#' # ----- other parameters
#' h2ht$h2an
#'
-#' # for binary traits
-#' h2dm <- polygenic_hglm(dm2 ~ sex + age,kin=gkin,df,family=binomial(link = 'logit'))
-#' # estimated parameters
+#' # ----- test the significance of fixed effects
+#' # ----- (e.g. quantitative trait)
+#' h2ht <- polygenic_hglm(height ~ sex + age, kin = gkin, df)
+#' # ----- summary with standard errors and p-values
+#' summary(h2ht$hglm)
+#' # ----- output for the fixed effects part
+#' # ...
+#' # MEAN MODEL
+#' # Summary of the fixed effects estimates
+#' # Estimate Std. Error t value Pr(>|t|)
+#' # (Intercept) 169.53525 2.57624 65.807 < 2e-16 ***
+#' # sex 14.10694 1.41163 9.993 4.8e-15 ***
+#' # age -0.15332 0.05208 -2.944 0.00441 **
+#' # ---
+#' # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#' # Note: P-values are based on 69 degrees of freedom
+#' # ...
+#' # ----- extract fixed effects estimates and standard errors
+#' h2ht$h2an
+#'
+#' # test the significance of polygenic effect
+#' h2ht <- polygenic_hglm(height ~ sex + age, kin = gkin, df, method = 'REML')
+#' nullht <- lm(height ~ sex + age, df)
+#' l1 <- h2ht$ProfLogLik
+#' l0 <- as.numeric(logLik(nullht))
+#' # the likelihood ratio test (LRT) statistic
+#' (S <- -2*(l0 - l1))
+#' # 5% right-tailed significance cutoff
+#' # for 50:50 mixture distribution between point mass 0 and \chi^{2}(1)
+#' # Self, SG and Liang KY (1987) Journal of the American Statistical Association.
+#' qchisq(((1 - .05) - .50)/.50, 1)
+#' # p-value
+#' pchisq(S, 1, lower.tail = FALSE)*2
+#'
+#' #' # ----- for binary traits
+#' h2dm <- polygenic_hglm(dm2 ~ sex + age, kin = gkin, df, family = binomial(link = 'logit'))
+#' # ----- estimated parameters
#' h2dm$h2an
#'
#'
#' @keywords htest
#'
-"polygenic_hglm" <- function(formula,kinship.matrix,data,family=gaussian(), conv=1e-8, maxit=100, ... )
+"polygenic_hglm" <- function(formula, kinship.matrix, data, family = gaussian(), conv = 1e-6, maxit = 100, ...)
{
if (!require(hglm))
stop("this function requires 'hglm' package to be installed")
@@ -88,12 +138,16 @@
out$h2an$estimate <- c(res_hglm$fixef,out$esth2,tVar)
names(out$h2an$estimate)[(length(out$h2an$estimate)-1):(length(out$h2an$estimate))] <-
c("h2","totalVar")
+ out$h2an$se <- c(res_hglm$SeFe,NA,NA)
+ names(out$h2an$se) <-
+ c(names(res_hglm$fixef), "h2","totalVar")
out$pgresidualY <- rep(NA,length(mids))
out$pgresidualY[mids] <- y-res_hglm$fv
out$residualY <- rep(NA,length(mids))
out$residualY[mids] <- y - desmat %*% res_hglm$fixef
out$InvSigma <- ginv(tVar*out$esth2*relmat +
diag(tVar*(1-out$esth2),ncol=length(y),nrow=length(y)))
+ out$ProfLogLik <- res_hglm$ProfLogLik
class(out) <- "polygenic"
out
More information about the Genabel-commits
mailing list