From noreply at r-forge.r-project.org Tue Dec 7 20:21:33 2010 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 7 Dec 2010 20:21:33 +0100 (CET) Subject: [Gtdb-commits] r59 - in pkg/gt.db: . R man Message-ID: <20101207192133.CF9219D31D@r-forge.r-project.org> Author: dahinds Date: 2010-12-07 20:21:33 +0100 (Tue, 07 Dec 2010) New Revision: 59 Added: pkg/gt.db/man/ramToChar.Rd pkg/gt.db/man/score.coxph.Rd pkg/gt.db/man/score.glm.scoretest.Rd Modified: pkg/gt.db/DESCRIPTION pkg/gt.db/R/genotype.R pkg/gt.db/R/gplot.R pkg/gt.db/R/hwe.R pkg/gt.db/R/score.R pkg/gt.db/R/test.R pkg/gt.db/R/tracks.R pkg/gt.db/man/manhattan.plot.Rd pkg/gt.db/man/score.glm.Rd pkg/gt.db/man/score.glm.general.Rd pkg/gt.db/man/score.gt.data.Rd Log: - fix to association tests based on dosage - minor improvements to manhattan.plot, new 'scale' argument - performance improvement for exact HWE tests - automatic promotion of factors to scores in linear models - added 'score.glm.scoretest' for faster GLM scoring - added 'score.coxph' and 'score.survreg' scoring functions - made model-parsing in .null.model, .flat.model more robust Modified: pkg/gt.db/DESCRIPTION =================================================================== --- pkg/gt.db/DESCRIPTION 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/DESCRIPTION 2010-12-07 19:21:33 UTC (rev 59) @@ -7,6 +7,6 @@ Description: Framework for storing and manipulating genotype data, phenotype data, and association study results License: GPL -Depends: R (>= 2.7.0), lattice, grid, methods, DBI -Suggests: nlme, cluster, mda, RMySQL, RSQLite +Depends: R (>= 2.7.0), survival, lattice, grid, methods, DBI +Suggests: statmod, nlme, cluster, mda, RMySQL, RSQLite Packaged: 2010-08-12 20:47:05 UTC; dhinds Modified: pkg/gt.db/R/genotype.R =================================================================== --- pkg/gt.db/R/genotype.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/genotype.R 2010-12-07 19:21:33 UTC (rev 59) @@ -202,7 +202,7 @@ } if (dosage) { fn <- function(x) unpack.raw.data(hexToRaw(x),'dosage')$dosage - g <- lapply(gt.data$raw.data) + g <- lapply(gt.data$raw.data, fn) } else { a <- strsplit(gt.data$alleles, split='/', fixed=TRUE) g <- mapply(gt.split, gt.data$genotype, ..., Modified: pkg/gt.db/R/gplot.R =================================================================== --- pkg/gt.db/R/gplot.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/gplot.R 2010-12-07 19:21:33 UTC (rev 59) @@ -51,34 +51,43 @@ manhattan.plot <- function(y, data, gap=0, threshold=-log10(5e-8), around=0, xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25, - xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim, + xlab, ylab=expression(-log[10](pvalue)), yrange, ylim, + scale=c('Mb','Kb','bp'), col=c('#d0d0d0','#e0e0e0','#ff0000'), ...) { val <- eval(substitute(y), data, parent.frame()) - len <- with(data, tapply(position, .sort.levels(scaffold), + scale <- match.arg(scale) + div <- switch(scale, Mb=1e6, Kb=1000, 1) + len <- with(data, tapply(position/div, .sort.levels(scaffold), max, na.rm=TRUE)) ofs <- cumsum(c(0,as.numeric(len+gap))) mid <- (ofs[-1] + ofs[-length(ofs)])/2 - pos <- data$position + ofs[match(data$scaffold, names(len))] + pos <- data$position/div + ofs[match(data$scaffold, names(len))] set <- list(superpose.symbol=list(col=col)) grp <- (match(data$scaffold, names(len)) %% 2) grp <- ifelse(val > threshold, 2, grp) if (around > 0) { hits <- pos[which(val > threshold)] - keep <- (diff(hits) > around) + keep <- (diff(hits) > around/div) hits <- hits[c(TRUE,keep) | c(keep,TRUE)] for (hit in hits) { - grp[abs(pos - hit) < around] <- 2 + grp[abs(pos - hit) < around/div] <- 2 } } xlim <- range(pos,na.rm=TRUE) xlim <- xlim + (xlim[2]-xlim[1]) * c(-0.02,0.02) if (missing(ylim)) { if (missing(yrange)) - yrange <- range(c(val,0,1.1*threshold)) + yrange <- range(c(val,0,1.1*threshold), na.rm=TRUE) padding <- lattice.options()$axis.padding$numeric ylim <- yrange + diff(yrange) * padding * c(-1,1) } + if (missing(xlab)) { + if (length(len) == 1) + xlab <- paste('position,', scale) + else + xlab <- 'Chromosome' + } panel.fn <- function(...) { panel.xyplot(...) ; panel.refline(h=threshold) } if (length(len) == 1) { Modified: pkg/gt.db/R/hwe.R =================================================================== --- pkg/gt.db/R/hwe.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/hwe.R 2010-12-07 19:21:33 UTC (rev 59) @@ -23,38 +23,29 @@ .hwe.exact <- function(aa, ab, bb, tail) { - # use integer constants to avoid implicit casts - m <- ab %/% 2L - xx <- aa + m + 1L - xy <- ab %% 2L + 1L - yy <- bb + m + 1L - n <- pmin(xx,yy) - 1L + nr <- pmin(aa,bb)*2+ab + nn <- aa+ab+bb + xy <- as.integer(nr*(2*nn-nr) / (2*nn)) + xy <- xy + ((nr+xy) %% 2) + xx <- (nr-xy) %/% 2 + yy <- (nn-xy-xx) - # precompute lookup table of log gamma values - # note that xx, xy, yy are offset by 1 for indexing - LG <- lgamma(1:(max(xx,yy,2L*n)+2)) - v <- log(2)*ab - LG[aa+1L] - LG[ab+1L] - LG[bb+1L] - log.p <- function(xx, xy, yy, i) - log(2)*(xy+2L*i-1L) - LG[xx-i] - LG[xy+2L*i] - LG[yy-i] - - fn.lower <- function(v, m, n, ...) + p.fn <- function(xx, xy, yy) { - p <- exp(log.p(...,0:n)-v) - sum(p[0:m+1L])/sum(p) + if (is.na(xx+xy+yy)) return(NA) + i <- 1:(xy %/% 2) + lt <- cumprod((xy-2*i+2)*(xy-2*i+1)/(4*(xx+i)*(yy+i))) + i <- 1:min(xx,yy) + ut <- cumprod(4*(xx-i+1)*(yy-i+1)/((xy+2*i)*(xy+2*i-1))) + c(rev(lt),1,ut) } - fn.upper <- function(v, m, n, ...) - { - p <- exp(log.p(...,0:n)-v) - sum(p[m:n+1L])/sum(p) - } - fn.both <- function(v, m, n, ...) - { - p <- exp(log.p(...,0:n)-v) - sum(p[p<=1])/sum(p) - } - mapply(switch(tail, lower=fn.lower, upper=fn.upper, fn.both), - v, m, n, xx, xy, yy) + fn.lower <- function(brk, p) sum(p[1:brk]) / sum(p) + fn.upper <- function(brk, p) sum(p[brk:length(p)]) / sum(p) + fn.both <- function(brk, p) sum(p[p <= p[brk]]) / sum(p) + fn <- switch(tail, lower=fn.lower, upper=fn.upper, fn.both) + mapply(function(brk,xx,xy,yy) fn(brk, p.fn(xx,xy,yy)), + (ab %/% 2)+1, xx, xy, yy) } hwe.test <- Modified: pkg/gt.db/R/score.R =================================================================== --- pkg/gt.db/R/score.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/score.R 2010-12-07 19:21:33 UTC (rev 59) @@ -170,6 +170,9 @@ mode=c('additive','recessive','dominant','general')) { data$genotype <- .recode.gt(data$genotype, match.arg(mode)) + lhs <- .lhs.formula(formula) + if (lhs %in% names(data) && is.factor(data[,lhs])) + data[,lhs] <- as.numeric(data[,lhs]) m <- lm(formula, data) nf <- .null.model(formula, term=drop) pvalue <- anova(update(m,nf), m)[2,6] @@ -187,6 +190,9 @@ return(keep.attr(cbind(term=NA, r), fit='lm')) } data$genotype <- factor(data$genotype) + lhs <- .lhs.formula(formula) + if (lhs %in% names(data) && is.factor(data[,lhs])) + data[,lhs] <- as.numeric(data[,lhs]) contrasts <- list(genotype=matrix(c(0,1,2,0,1,0),3,2)) m <- lm(formula, data, contrasts=contrasts) nf <- .null.model(formula) @@ -250,6 +256,16 @@ ), fit='glm') } +score.glm.scoretest <- + function (formula, data, ploidy, + precalc=glm(.null.model(formula),binomial,data)) +{ + require(statmod) + pvalue <- 2 * pnorm(-abs(glm.scoretest(precalc,data$genotype))) + keep.attr(data.frame(pvalue, effect = NA, stderr = NA), + fit = "glm.scoretest") +} + score.glm.general <- function(formula, data, ploidy) { if (length(unique(data$genotype)) < 3) { @@ -348,3 +364,52 @@ ), fit='gls') } +#----------------------------------------------------------------------- + +score.coxph <- +function(formula, data, ploidy, test=c('LR','Wald'), + mode=c('additive','recessive','dominant','general'), ...) +{ + lhs <- .lhs.formula(formula) + if (!is.Surv(data[,lhs])) + data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0) + data$genotype <- .recode.gt(data$genotype, match.arg(mode)) + m <- coxph(formula, data, ...) + x <- unname(coef(summary(m))['genotype',]) + test <- match.arg(test) + if (test == "LR") { + # work-around for anova() evaluation problems + LL <- function(m) tail(m$loglik,1) + m0 <- update(m, .null.model(formula, term='genotype')) + pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE) + } else { + pvalue <- pnorm(-abs(x[4]))*2 + } + keep.attr(data.frame( + pvalue, effect=x[1], stderr=x[3] + ), fit='coxph') +} + +score.survreg <- +function(formula, data, ploidy, test=c('LR','Wald'), + mode=c('additive','recessive','dominant','general'), ...) +{ + lhs <- .lhs.formula(formula) + if (!is.Surv(data[,lhs])) + data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0) + data$genotype <- .recode.gt(data$genotype, match.arg(mode)) + m <- survreg(formula, data, ...) + x <- unname(summary(m)$table['genotype',]) + test <- match.arg(test) + if (test == "LR") { + # work-around for anova() evaluation problems + LL <- function(m) tail(m$loglik,1) + m0 <- update(m, .null.model(formula, term='genotype')) + pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE) + } else { + pvalue <- x[4] + } + keep.attr(data.frame( + pvalue, effect=x[1], stderr=x[2] + ), fit='survreg') +} Modified: pkg/gt.db/R/test.R =================================================================== --- pkg/gt.db/R/test.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/test.R 2010-12-07 19:21:33 UTC (rev 59) @@ -27,14 +27,23 @@ .null.model <- function(formula, term='genotype') { - txt <- gsub(paste('[*+:]',term), '', .as.text(formula)) - eval(parse(text=gsub(term, '1', txt))) + drop <- grep(paste('\\b',term,'\\b',sep=''), + labels(terms(formula)), value=TRUE) + if (length(drop) == 0) + stop('no term(s) to drop') + drop <- paste('~ . -', paste(drop,collapse=' - ')) + update.formula(formula, as.formula(drop)) } .flat.model <- function(formula, term='genotype') { - txt <- .as.text(.null.model(formula, term)) - eval(parse(text=paste(txt, '+', term))) + drop <- grep(paste('\\b',term,'\\b',sep=''), + labels(terms(formula)), value=TRUE) + drop <- setdiff(drop, term) + if (length(drop) == 0) + stop('no term(s) to drop') + drop <- paste('~ . -', paste(drop,collapse=' - ')) + update.formula(formula, as.formula(drop)) } .lhs.formula <- function(formula) as.character(formula[2]) @@ -76,10 +85,14 @@ dsub$gender <- pt.data$gender # guess scoring function based on model and outcome + if (is.character(score.fn)) + score.fn <- eval(parse(text=paste('score.', score.fn, sep=''))) if (is.null(score.fn)) { binary <- (length(table(pt.data[,col[1]])) == 2) groups <- (length(grep(':genotype',dimnames(ft)[[2]]))) - fn <- if (!groups && binary && !dosage && (length(col)==2)) + fn <- if (is.Surv(pt.data[,col[1]])) + 'score.coxph' + else if (!groups && binary && !dosage && (length(col)==2)) 'score.trend' else paste('score', c('.lm','.glm')[binary+1], c('','.groups')[groups+1], sep='') @@ -87,7 +100,18 @@ score.fn <- eval(parse(text=fn)) } + pt.mask <- if.na(pt.filter,FALSE) & complete.cases(dsub) + dsub <- dsub[pt.mask,,drop=FALSE] + + # if score function has a precalc argument, evaluate it now + precalc <- eval(formals(score.fn)$precalc, + envir=list(formula=formula, data=dsub)) + new.score.fn <- score.fn + if (!is.null(precalc)) + new.score.fn <- function(...) score.fn(..., precalc=precalc) + id.cols <- which(names(gt.data) %in% c('assay.data.id','assay.name')) nr <- nrow(gt.data) + wrap.score.fn <- function(n) { gn <- gt.data[n,] my.error <- function(e) { @@ -98,15 +122,19 @@ my.error(e) invokeRestart('muffleWarning') } - d <- cbind(dsub, unpack.gt.matrix(gn, 'genotype', dosage=dosage)) - keep <- if.na(pt.filter,FALSE) & complete.cases(d) + dsub$genotype <- unpack.gt.matrix(gn, dosage=dosage)[pt.mask,1] + if (!is.null(precalc)) { + dsub$genotype <- if.na(dsub$genotype, + mean(dsub$genotype,na.rm=TRUE)) + keep <- TRUE + } else + keep <- !is.na(dsub$genotype) r <- tryCatch(withCallingHandlers( - score.fn(formula, d[keep,], gn$ploidy, ...), + new.score.fn(formula, dsub[keep,], gn$ploidy, ...), warning=my.warn), error=my.error) if (progress) progress.bar(n, nr) if (!is.null(r)) { - id <- gn[c('assay.data.id','assay.name')] - keep.attr(merge(id,r), .Attr=kept.attr(r)) + keep.attr(cbind(gn[id.cols],r), .Attr=kept.attr(r)) } } if (progress) progress.bar(0, nr) Modified: pkg/gt.db/R/tracks.R =================================================================== --- pkg/gt.db/R/tracks.R 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/R/tracks.R 2010-12-07 19:21:33 UTC (rev 59) @@ -31,18 +31,19 @@ tck <- function(x) { p <- pretty(x) ; p[p >= x[1] & p <= x[2]] } xscale <- current.viewport()$xscale scale <- match.arg(scale) - mul <- switch(match.arg(scale), Mb=1e6, Kb=1e3, 1) - grid.xaxis(tck(xscale), format(tck(xscale/mul))) + div <- switch(scale, Mb=1e6, Kb=1e3, 1) + grid.xaxis(tck(xscale), format(tck(xscale/div))) if (missing(xlab)) xlab <- paste(gt$scaffold[1], 'position,', scale) if (!is.null(xlab)) grid.text(xlab, y=unit(-3,'lines')) y <- unit(0,'npc') for (tn in 1:length(tracks)) { + tr <- tracks[[tn]] + if (is.null(tr)) next name <- names(tracks)[tn] if (is.null(name) || name=='') name <- sprintf("track%d", tn) - tr <- tracks[[tn]] args <- list(name=name, y=y, just='bottom', xscale=xscale) vp <- do.call('viewport', c(args, tr$vp)) pushViewport(vp) @@ -55,7 +56,7 @@ } pushViewport(viewport(name='border', y=unit(0,'npc'), height=y, just='bottom')) - grid.rect() + grid.rect(gp=gpar(fill='transparent')) upViewport() } Modified: pkg/gt.db/man/manhattan.plot.Rd =================================================================== --- pkg/gt.db/man/manhattan.plot.Rd 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/man/manhattan.plot.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -26,7 +26,8 @@ \usage{ manhattan.plot(y, data, gap=0, threshold=-log10(5e-8), around=0, xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25, - xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim, + xlab=, ylab=expression(-log[10](pvalue)), yrange, ylim, + scale=c('Mb','Kb','bp'), col=c('#d0d0d0','#e0e0e0','#ff0000'), ...) } \arguments{ @@ -41,6 +42,8 @@ \item{cex, xlab, ylab, ylim}{see \code{\link[lattice:xyplot]{xyplot}}.} \item{yrange}{similar to \code{ylim}, except that this range will be padded as if it were the range of actual data points.} + \item{scale}{determines the scaling of tick labels on the X axis, if + the data all falls on a single chromosome.} \item{col}{colors to use for odd and even chromosomes, and hits.} \item{\dots}{additional arguments passed to \code{\link[lattice:xyplot]{xyplot}}.} Added: pkg/gt.db/man/ramToChar.Rd =================================================================== --- pkg/gt.db/man/ramToChar.Rd (rev 0) +++ pkg/gt.db/man/ramToChar.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -0,0 +1,52 @@ +% +% Copyright (C) 2009, Perlegen Sciences, Inc. +% Copyright (C) 2010, 23andMe, Inc. +% +% Written by David A. Hinds +% +% This is free software; you can redistribute it and/or modify it +% under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the license, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see +% +\name{charToRam} +\alias{charToRam} +\alias{ramToChar} +\title{Convert between Raw Matrices and Character Vectors} +\description{ + Convert between raw matrices and character vectors. +} +\usage{ +charToRam(str) +ramToChar(raw) +} +\arguments{ + \item{str}{a character vector.} + \item{raw}{a raw matrix.} +} +\value{ + For \code{charToRam}, a raw matrix with one column per string in + \code{char}. + + For \code{ramToChar}, a character vector with one element per column + in \code{raw}. +} +\seealso{ + \code{\link{hexToRam}}, \code{\link{ramToHex}}, + \code{\link{charToRaw}}, \code{\link{rawToChar}}. +} +\examples{ +charToRam(c('1234','5678','abcd')) +m <- matrix(charToRaw('abcd'),4,3) +print(m) +ramToChar(m) +} +\keyword{classes} Added: pkg/gt.db/man/score.coxph.Rd =================================================================== --- pkg/gt.db/man/score.coxph.Rd (rev 0) +++ pkg/gt.db/man/score.coxph.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -0,0 +1,84 @@ +% +% Copyright (C) 2010, 23andMe, Inc. +% +% Written by David A. Hinds +% +% This is free software; you can redistribute it and/or modify it +% under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the license, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see +% +\name{score.coxph} +\alias{score.coxph} +\alias{score.survreg} +\title{Test for Association using Survival Analysis} +\description{ + Test for association using either a semi-parametric Cox proportional + hazards model, or a parametric survival regression model. +} +\usage{ +score.coxph(formula, data, ploidy, test=c('LR','Wald'), + mode=c('additive','recessive','dominant','general'), ...) +score.survreg(formula, data, ploidy, test=c('LR','Wald'), + mode=c('additive','recessive','dominant','general'), ...) +} +\arguments{ + \item{formula}{a symbolic description of the model to be fit.} + \item{data}{a data frame containing the variables in the model.} + \item{ploidy}{see \code{\link{fetch.gt.data}}. Ignored.} + \item{test}{the test to use for computing P values.} + \item{mode}{genetic mode of action to test.} + \item{\dots}{additional arguments passed to + \code{\link[survival:coxph]{coxph}} or + \code{\link[survival:survreg]{survreg}}.} +} +\details{ + These functions perform association tests using survival regression + models. The model formula may include covariates and should contain a + single genotype term without interactions. The left-hand-side of the + model formula can either identify a survival object, or a numeric + vector. If it is numeric, then a survival object will be created + using the absolute magnitude of the value as the followup time and + the sign of the value as the event indicator. + + At sex linked loci, haploid males are treated the same as the + corresponding diploid female homozygotes. +} +\value{ + A data frame with one row and three columns. An effect size is not + reported if the mode of action is \code{'general'}. + \item{pvalue}{P value for the specified \code{test}.} + \item{effect}{the regression coefficient (log hazard) for the genotype + term.} + \item{stderr}{the standard error of the effect size estimate.} +} +\seealso{ + \code{\link[survival:Surv]{Surv}}, + \code{\link[survival:coxph]{coxph}}, + \code{\link[survival:survreg]{survreg}}, + \code{\link{score.gt.data}}. +} +\examples{ +gt.demo.check() +library(survival) +pt <- fetch.pt.data('Demo_2') +gt <- fetch.gt.data('Demo_2')[1:10,] +pt$age <- 50+10*rnorm(nrow(pt)) +pt$event <- rbinom(nrow(pt),1,pt$age/100) +pt$survival <- Surv(pt$age,pt$event) +score.gt.data(survival~genotype, pt, gt, score.coxph) +score.gt.data(survival~genotype, pt, gt, score.survreg) +# use numeric coding for outcome +pt$survival <- pt$age * ifelse(pt$event,1,-1) +score.gt.data(survival~genotype, pt, gt, score.survreg) +} +\keyword{htest} +\keyword{regression} Modified: pkg/gt.db/man/score.glm.Rd =================================================================== --- pkg/gt.db/man/score.glm.Rd 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/man/score.glm.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -1,7 +1,8 @@ % % Copyright (C) 2009, Perlegen Sciences, Inc. +% Copyright (C) 2010, 23andMe, Inc. % -% Written by David A. Hinds +% Written by David A. Hinds % % This is free software; you can redistribute it and/or modify it % under the terms of the GNU General Public License as published by @@ -51,16 +52,17 @@ \value{ A data frame with one row and three columns. An effect size is not reported if the mode of action is \code{'general'}. - \item{pvalue}{P value for an F test comparing the full and null models.} + \item{pvalue}{P value for the specified \code{test}.} \item{effect}{the regression coefficient (log odds) for \code{term}.} \item{stderr}{the standard error of the effect size estimate.} } \seealso{ - \code{\link{lm}}, + \code{\link{glm}}, \code{\link{score.trend}}, \code{\link{score.chisq}}, \code{\link{score.glm.general}}, \code{\link{score.glm.groups}}, + \code{\link{score.glm.scoretest}}, \code{\link{score.gt.data}}. } \examples{ Modified: pkg/gt.db/man/score.glm.general.Rd =================================================================== --- pkg/gt.db/man/score.glm.general.Rd 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/man/score.glm.general.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -45,7 +45,7 @@ the second describes the fitted additive effect; and the third describes the fitted dominance effect. \item{term}{\code{NA}, \code{'additive'}, or \code{'dominance'}.} - \item{pvalue}{For the overall test, the ANOVA F test result. + \item{pvalue}{For the overall test, the ANOVA test result. For additive and dominance effects, Wald test results.} \item{effect}{the regression coefficient, for the additive or dominant effect.} Added: pkg/gt.db/man/score.glm.scoretest.Rd =================================================================== --- pkg/gt.db/man/score.glm.scoretest.Rd (rev 0) +++ pkg/gt.db/man/score.glm.scoretest.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -0,0 +1,69 @@ +% +% Copyright (C) 2010, 23andMe, Inc. +% +% Written by David A. Hinds +% +% This is free software; you can redistribute it and/or modify it +% under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the license, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see +% +\name{score.glm.scoretest} +\alias{score.glm.scoretest} +\title{Fast Score Test for Association using Logistic Regression} +\description{ + A score test for association with a binary outcome based on logistic + regression, using a precalculated null model. +} +\usage{ +score.glm.scoretest(formula, data, ploidy, + precalc=glm(.null.model(formula),binomial,data)) +} +\arguments{ + \item{formula}{a symbolic description of the model to be fit.} + \item{data}{a data frame containing the variables in the model.} + \item{ploidy}{see \code{\link{fetch.gt.data}}. Ignored.} + \item{precalc}{a null model to be precalculated once.} +} +\details{ + This is a variation on \code{\link{score.glm}} that calculates P + values based on Rao's score test. It precalculates the base model + (which can contain covariates). In order to use this precalculated + model, there can be no missing genotype data, so missing observations + are replaced by the mean. A consequence is that only the additive + mode of action is supported. It is much faster than + \code{\link{score.glm}}. + + At sex linked loci, haploid males are treated the same as the + corresponding diploid female homozygotes. +} +\value{ + A data frame with one row and three columns: + \item{pvalue}{P value for the score test.} + \item{effect}{always \code{NA}.} + \item{stderr}{always \code{NA}.} +} +\seealso{ + \code{\link{glm}}, \code{\link[statmod:glm.scoretest]{glm.scoretest}}, + \code{\link{score.glm}}, \code{\link{score.trend}}, + \code{\link{score.gt.data}}. +} +\examples{ +gt.demo.check() +pt <- fetch.pt.data('Demo_2') +gt <- fetch.gt.data('Demo_2')[1:10,] +pt$status <- as.logical(rbinom(nrow(pt), 1, 0.5)) +score.gt.data(status~genotype, pt, gt, score.glm, test='Rao') +score.gt.data(status~genotype, pt, gt, score.trend) +score.gt.data(status~genotype, pt, gt, score.glm.scoretest) +} +\keyword{htest} +\keyword{regression} Modified: pkg/gt.db/man/score.gt.data.Rd =================================================================== --- pkg/gt.db/man/score.gt.data.Rd 2010-09-10 20:56:58 UTC (rev 58) +++ pkg/gt.db/man/score.gt.data.Rd 2010-12-07 19:21:33 UTC (rev 59) @@ -32,7 +32,8 @@ \item{formula}{a symbolic description of the model to be fit.} \item{pt.data}{a data frame of phenotypes from \code{\link{fetch.pt.data}}.} \item{gt.data}{a data frame of genotypes from \code{\link{fetch.gt.data}}.} - \item{score.fn}{the scoring function to be applied to each SNP.} + \item{score.fn}{the scoring function to be applied to each SNP, or a + short name like \code{'lm'}, \code{'glm'}, etc.} \item{pt.filter}{an expression to use for subsetting on the phenotype table.} \item{gt.filter}{an expression to use for subsetting on the @@ -53,8 +54,8 @@ the form of the model formula. A trend test will be used for simple binary-outcome models of the form "status~genotype". Logistic regression will be used for binary outcomes with more complicated - model functions. And linear regression will be used for - quantitative outcomes. + model functions. Cox proportional hazards will be used for a survival + outcome. And linear regression will be used for quantitative outcomes. The filters \code{pt.filter} and \code{gt.filter} are evaluated in the context of the \code{pt.data} and \code{gt.data} tables, @@ -89,6 +90,8 @@ \code{\link{score.glm}}, \code{\link{score.glm.general}}, \code{\link{score.glm.groups}}, + \code{\link{score.coxph}}, + \code{\link{score.survreg}}, \code{\link{score.and.store}}. } \examples{