[Gtdb-commits] r59 - in pkg/gt.db: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 7 20:21:33 CET 2010
Author: dahinds
Date: 2010-12-07 20:21:33 +0100 (Tue, 07 Dec 2010)
New Revision: 59
Added:
pkg/gt.db/man/ramToChar.Rd
pkg/gt.db/man/score.coxph.Rd
pkg/gt.db/man/score.glm.scoretest.Rd
Modified:
pkg/gt.db/DESCRIPTION
pkg/gt.db/R/genotype.R
pkg/gt.db/R/gplot.R
pkg/gt.db/R/hwe.R
pkg/gt.db/R/score.R
pkg/gt.db/R/test.R
pkg/gt.db/R/tracks.R
pkg/gt.db/man/manhattan.plot.Rd
pkg/gt.db/man/score.glm.Rd
pkg/gt.db/man/score.glm.general.Rd
pkg/gt.db/man/score.gt.data.Rd
Log:
- fix to association tests based on dosage
- minor improvements to manhattan.plot, new 'scale' argument
- performance improvement for exact HWE tests
- automatic promotion of factors to scores in linear models
- added 'score.glm.scoretest' for faster GLM scoring
- added 'score.coxph' and 'score.survreg' scoring functions
- made model-parsing in .null.model, .flat.model more robust
Modified: pkg/gt.db/DESCRIPTION
===================================================================
--- pkg/gt.db/DESCRIPTION 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/DESCRIPTION 2010-12-07 19:21:33 UTC (rev 59)
@@ -7,6 +7,6 @@
Description: Framework for storing and manipulating genotype data,
phenotype data, and association study results
License: GPL
-Depends: R (>= 2.7.0), lattice, grid, methods, DBI
-Suggests: nlme, cluster, mda, RMySQL, RSQLite
+Depends: R (>= 2.7.0), survival, lattice, grid, methods, DBI
+Suggests: statmod, nlme, cluster, mda, RMySQL, RSQLite
Packaged: 2010-08-12 20:47:05 UTC; dhinds
Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/genotype.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -202,7 +202,7 @@
}
if (dosage) {
fn <- function(x) unpack.raw.data(hexToRaw(x),'dosage')$dosage
- g <- lapply(gt.data$raw.data)
+ g <- lapply(gt.data$raw.data, fn)
} else {
a <- strsplit(gt.data$alleles, split='/', fixed=TRUE)
g <- mapply(gt.split, gt.data$genotype, ...,
Modified: pkg/gt.db/R/gplot.R
===================================================================
--- pkg/gt.db/R/gplot.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/gplot.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -51,34 +51,43 @@
manhattan.plot <-
function(y, data, gap=0, threshold=-log10(5e-8), around=0,
xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25,
- xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim,
+ xlab, ylab=expression(-log[10](pvalue)), yrange, ylim,
+ scale=c('Mb','Kb','bp'),
col=c('#d0d0d0','#e0e0e0','#ff0000'), ...)
{
val <- eval(substitute(y), data, parent.frame())
- len <- with(data, tapply(position, .sort.levels(scaffold),
+ scale <- match.arg(scale)
+ div <- switch(scale, Mb=1e6, Kb=1000, 1)
+ len <- with(data, tapply(position/div, .sort.levels(scaffold),
max, na.rm=TRUE))
ofs <- cumsum(c(0,as.numeric(len+gap)))
mid <- (ofs[-1] + ofs[-length(ofs)])/2
- pos <- data$position + ofs[match(data$scaffold, names(len))]
+ pos <- data$position/div + ofs[match(data$scaffold, names(len))]
set <- list(superpose.symbol=list(col=col))
grp <- (match(data$scaffold, names(len)) %% 2)
grp <- ifelse(val > threshold, 2, grp)
if (around > 0) {
hits <- pos[which(val > threshold)]
- keep <- (diff(hits) > around)
+ keep <- (diff(hits) > around/div)
hits <- hits[c(TRUE,keep) | c(keep,TRUE)]
for (hit in hits) {
- grp[abs(pos - hit) < around] <- 2
+ grp[abs(pos - hit) < around/div] <- 2
}
}
xlim <- range(pos,na.rm=TRUE)
xlim <- xlim + (xlim[2]-xlim[1]) * c(-0.02,0.02)
if (missing(ylim)) {
if (missing(yrange))
- yrange <- range(c(val,0,1.1*threshold))
+ yrange <- range(c(val,0,1.1*threshold), na.rm=TRUE)
padding <- lattice.options()$axis.padding$numeric
ylim <- yrange + diff(yrange) * padding * c(-1,1)
}
+ if (missing(xlab)) {
+ if (length(len) == 1)
+ xlab <- paste('position,', scale)
+ else
+ xlab <- 'Chromosome'
+ }
panel.fn <- function(...)
{ panel.xyplot(...) ; panel.refline(h=threshold) }
if (length(len) == 1) {
Modified: pkg/gt.db/R/hwe.R
===================================================================
--- pkg/gt.db/R/hwe.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/hwe.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -23,38 +23,29 @@
.hwe.exact <- function(aa, ab, bb, tail)
{
- # use integer constants to avoid implicit casts
- m <- ab %/% 2L
- xx <- aa + m + 1L
- xy <- ab %% 2L + 1L
- yy <- bb + m + 1L
- n <- pmin(xx,yy) - 1L
+ nr <- pmin(aa,bb)*2+ab
+ nn <- aa+ab+bb
+ xy <- as.integer(nr*(2*nn-nr) / (2*nn))
+ xy <- xy + ((nr+xy) %% 2)
+ xx <- (nr-xy) %/% 2
+ yy <- (nn-xy-xx)
- # precompute lookup table of log gamma values
- # note that xx, xy, yy are offset by 1 for indexing
- LG <- lgamma(1:(max(xx,yy,2L*n)+2))
- v <- log(2)*ab - LG[aa+1L] - LG[ab+1L] - LG[bb+1L]
- log.p <- function(xx, xy, yy, i)
- log(2)*(xy+2L*i-1L) - LG[xx-i] - LG[xy+2L*i] - LG[yy-i]
-
- fn.lower <- function(v, m, n, ...)
+ p.fn <- function(xx, xy, yy)
{
- p <- exp(log.p(...,0:n)-v)
- sum(p[0:m+1L])/sum(p)
+ if (is.na(xx+xy+yy)) return(NA)
+ i <- 1:(xy %/% 2)
+ lt <- cumprod((xy-2*i+2)*(xy-2*i+1)/(4*(xx+i)*(yy+i)))
+ i <- 1:min(xx,yy)
+ ut <- cumprod(4*(xx-i+1)*(yy-i+1)/((xy+2*i)*(xy+2*i-1)))
+ c(rev(lt),1,ut)
}
- fn.upper <- function(v, m, n, ...)
- {
- p <- exp(log.p(...,0:n)-v)
- sum(p[m:n+1L])/sum(p)
- }
- fn.both <- function(v, m, n, ...)
- {
- p <- exp(log.p(...,0:n)-v)
- sum(p[p<=1])/sum(p)
- }
- mapply(switch(tail, lower=fn.lower, upper=fn.upper, fn.both),
- v, m, n, xx, xy, yy)
+ fn.lower <- function(brk, p) sum(p[1:brk]) / sum(p)
+ fn.upper <- function(brk, p) sum(p[brk:length(p)]) / sum(p)
+ fn.both <- function(brk, p) sum(p[p <= p[brk]]) / sum(p)
+ fn <- switch(tail, lower=fn.lower, upper=fn.upper, fn.both)
+ mapply(function(brk,xx,xy,yy) fn(brk, p.fn(xx,xy,yy)),
+ (ab %/% 2)+1, xx, xy, yy)
}
hwe.test <-
Modified: pkg/gt.db/R/score.R
===================================================================
--- pkg/gt.db/R/score.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/score.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -170,6 +170,9 @@
mode=c('additive','recessive','dominant','general'))
{
data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+ lhs <- .lhs.formula(formula)
+ if (lhs %in% names(data) && is.factor(data[,lhs]))
+ data[,lhs] <- as.numeric(data[,lhs])
m <- lm(formula, data)
nf <- .null.model(formula, term=drop)
pvalue <- anova(update(m,nf), m)[2,6]
@@ -187,6 +190,9 @@
return(keep.attr(cbind(term=NA, r), fit='lm'))
}
data$genotype <- factor(data$genotype)
+ lhs <- .lhs.formula(formula)
+ if (lhs %in% names(data) && is.factor(data[,lhs]))
+ data[,lhs] <- as.numeric(data[,lhs])
contrasts <- list(genotype=matrix(c(0,1,2,0,1,0),3,2))
m <- lm(formula, data, contrasts=contrasts)
nf <- .null.model(formula)
@@ -250,6 +256,16 @@
), fit='glm')
}
+score.glm.scoretest <-
+ function (formula, data, ploidy,
+ precalc=glm(.null.model(formula),binomial,data))
+{
+ require(statmod)
+ pvalue <- 2 * pnorm(-abs(glm.scoretest(precalc,data$genotype)))
+ keep.attr(data.frame(pvalue, effect = NA, stderr = NA),
+ fit = "glm.scoretest")
+}
+
score.glm.general <- function(formula, data, ploidy)
{
if (length(unique(data$genotype)) < 3) {
@@ -348,3 +364,52 @@
), fit='gls')
}
+#-----------------------------------------------------------------------
+
+score.coxph <-
+function(formula, data, ploidy, test=c('LR','Wald'),
+ mode=c('additive','recessive','dominant','general'), ...)
+{
+ lhs <- .lhs.formula(formula)
+ if (!is.Surv(data[,lhs]))
+ data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0)
+ data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+ m <- coxph(formula, data, ...)
+ x <- unname(coef(summary(m))['genotype',])
+ test <- match.arg(test)
+ if (test == "LR") {
+ # work-around for anova() evaluation problems
+ LL <- function(m) tail(m$loglik,1)
+ m0 <- update(m, .null.model(formula, term='genotype'))
+ pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE)
+ } else {
+ pvalue <- pnorm(-abs(x[4]))*2
+ }
+ keep.attr(data.frame(
+ pvalue, effect=x[1], stderr=x[3]
+ ), fit='coxph')
+}
+
+score.survreg <-
+function(formula, data, ploidy, test=c('LR','Wald'),
+ mode=c('additive','recessive','dominant','general'), ...)
+{
+ lhs <- .lhs.formula(formula)
+ if (!is.Surv(data[,lhs]))
+ data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0)
+ data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+ m <- survreg(formula, data, ...)
+ x <- unname(summary(m)$table['genotype',])
+ test <- match.arg(test)
+ if (test == "LR") {
+ # work-around for anova() evaluation problems
+ LL <- function(m) tail(m$loglik,1)
+ m0 <- update(m, .null.model(formula, term='genotype'))
+ pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE)
+ } else {
+ pvalue <- x[4]
+ }
+ keep.attr(data.frame(
+ pvalue, effect=x[1], stderr=x[2]
+ ), fit='survreg')
+}
Modified: pkg/gt.db/R/test.R
===================================================================
--- pkg/gt.db/R/test.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/test.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -27,14 +27,23 @@
.null.model <- function(formula, term='genotype')
{
- txt <- gsub(paste('[*+:]',term), '', .as.text(formula))
- eval(parse(text=gsub(term, '1', txt)))
+ drop <- grep(paste('\\b',term,'\\b',sep=''),
+ labels(terms(formula)), value=TRUE)
+ if (length(drop) == 0)
+ stop('no term(s) to drop')
+ drop <- paste('~ . -', paste(drop,collapse=' - '))
+ update.formula(formula, as.formula(drop))
}
.flat.model <- function(formula, term='genotype')
{
- txt <- .as.text(.null.model(formula, term))
- eval(parse(text=paste(txt, '+', term)))
+ drop <- grep(paste('\\b',term,'\\b',sep=''),
+ labels(terms(formula)), value=TRUE)
+ drop <- setdiff(drop, term)
+ if (length(drop) == 0)
+ stop('no term(s) to drop')
+ drop <- paste('~ . -', paste(drop,collapse=' - '))
+ update.formula(formula, as.formula(drop))
}
.lhs.formula <- function(formula) as.character(formula[2])
@@ -76,10 +85,14 @@
dsub$gender <- pt.data$gender
# guess scoring function based on model and outcome
+ if (is.character(score.fn))
+ score.fn <- eval(parse(text=paste('score.', score.fn, sep='')))
if (is.null(score.fn)) {
binary <- (length(table(pt.data[,col[1]])) == 2)
groups <- (length(grep(':genotype',dimnames(ft)[[2]])))
- fn <- if (!groups && binary && !dosage && (length(col)==2))
+ fn <- if (is.Surv(pt.data[,col[1]]))
+ 'score.coxph'
+ else if (!groups && binary && !dosage && (length(col)==2))
'score.trend'
else paste('score', c('.lm','.glm')[binary+1],
c('','.groups')[groups+1], sep='')
@@ -87,7 +100,18 @@
score.fn <- eval(parse(text=fn))
}
+ pt.mask <- if.na(pt.filter,FALSE) & complete.cases(dsub)
+ dsub <- dsub[pt.mask,,drop=FALSE]
+
+ # if score function has a precalc argument, evaluate it now
+ precalc <- eval(formals(score.fn)$precalc,
+ envir=list(formula=formula, data=dsub))
+ new.score.fn <- score.fn
+ if (!is.null(precalc))
+ new.score.fn <- function(...) score.fn(..., precalc=precalc)
+ id.cols <- which(names(gt.data) %in% c('assay.data.id','assay.name'))
nr <- nrow(gt.data)
+
wrap.score.fn <- function(n) {
gn <- gt.data[n,]
my.error <- function(e) {
@@ -98,15 +122,19 @@
my.error(e)
invokeRestart('muffleWarning')
}
- d <- cbind(dsub, unpack.gt.matrix(gn, 'genotype', dosage=dosage))
- keep <- if.na(pt.filter,FALSE) & complete.cases(d)
+ dsub$genotype <- unpack.gt.matrix(gn, dosage=dosage)[pt.mask,1]
+ if (!is.null(precalc)) {
+ dsub$genotype <- if.na(dsub$genotype,
+ mean(dsub$genotype,na.rm=TRUE))
+ keep <- TRUE
+ } else
+ keep <- !is.na(dsub$genotype)
r <- tryCatch(withCallingHandlers(
- score.fn(formula, d[keep,], gn$ploidy, ...),
+ new.score.fn(formula, dsub[keep,], gn$ploidy, ...),
warning=my.warn), error=my.error)
if (progress) progress.bar(n, nr)
if (!is.null(r)) {
- id <- gn[c('assay.data.id','assay.name')]
- keep.attr(merge(id,r), .Attr=kept.attr(r))
+ keep.attr(cbind(gn[id.cols],r), .Attr=kept.attr(r))
}
}
if (progress) progress.bar(0, nr)
Modified: pkg/gt.db/R/tracks.R
===================================================================
--- pkg/gt.db/R/tracks.R 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/tracks.R 2010-12-07 19:21:33 UTC (rev 59)
@@ -31,18 +31,19 @@
tck <- function(x) { p <- pretty(x) ; p[p >= x[1] & p <= x[2]] }
xscale <- current.viewport()$xscale
scale <- match.arg(scale)
- mul <- switch(match.arg(scale), Mb=1e6, Kb=1e3, 1)
- grid.xaxis(tck(xscale), format(tck(xscale/mul)))
+ div <- switch(scale, Mb=1e6, Kb=1e3, 1)
+ grid.xaxis(tck(xscale), format(tck(xscale/div)))
if (missing(xlab))
xlab <- paste(gt$scaffold[1], 'position,', scale)
if (!is.null(xlab))
grid.text(xlab, y=unit(-3,'lines'))
y <- unit(0,'npc')
for (tn in 1:length(tracks)) {
+ tr <- tracks[[tn]]
+ if (is.null(tr)) next
name <- names(tracks)[tn]
if (is.null(name) || name=='')
name <- sprintf("track%d", tn)
- tr <- tracks[[tn]]
args <- list(name=name, y=y, just='bottom', xscale=xscale)
vp <- do.call('viewport', c(args, tr$vp))
pushViewport(vp)
@@ -55,7 +56,7 @@
}
pushViewport(viewport(name='border', y=unit(0,'npc'),
height=y, just='bottom'))
- grid.rect()
+ grid.rect(gp=gpar(fill='transparent'))
upViewport()
}
Modified: pkg/gt.db/man/manhattan.plot.Rd
===================================================================
--- pkg/gt.db/man/manhattan.plot.Rd 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/manhattan.plot.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -26,7 +26,8 @@
\usage{
manhattan.plot(y, data, gap=0, threshold=-log10(5e-8), around=0,
xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25,
- xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim,
+ xlab=, ylab=expression(-log[10](pvalue)), yrange, ylim,
+ scale=c('Mb','Kb','bp'),
col=c('#d0d0d0','#e0e0e0','#ff0000'), ...)
}
\arguments{
@@ -41,6 +42,8 @@
\item{cex, xlab, ylab, ylim}{see \code{\link[lattice:xyplot]{xyplot}}.}
\item{yrange}{similar to \code{ylim}, except that this range will be
padded as if it were the range of actual data points.}
+ \item{scale}{determines the scaling of tick labels on the X axis, if
+ the data all falls on a single chromosome.}
\item{col}{colors to use for odd and even chromosomes, and hits.}
\item{\dots}{additional arguments passed to
\code{\link[lattice:xyplot]{xyplot}}.}
Added: pkg/gt.db/man/ramToChar.Rd
===================================================================
--- pkg/gt.db/man/ramToChar.Rd (rev 0)
+++ pkg/gt.db/man/ramToChar.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,52 @@
+%
+% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
+%
+% Written by David A. Hinds <dhinds at sonic.net>
+%
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+%
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program. If not, see <http://www.gnu.org/licenses/>
+%
+\name{charToRam}
+\alias{charToRam}
+\alias{ramToChar}
+\title{Convert between Raw Matrices and Character Vectors}
+\description{
+ Convert between raw matrices and character vectors.
+}
+\usage{
+charToRam(str)
+ramToChar(raw)
+}
+\arguments{
+ \item{str}{a character vector.}
+ \item{raw}{a raw matrix.}
+}
+\value{
+ For \code{charToRam}, a raw matrix with one column per string in
+ \code{char}.
+
+ For \code{ramToChar}, a character vector with one element per column
+ in \code{raw}.
+}
+\seealso{
+ \code{\link{hexToRam}}, \code{\link{ramToHex}},
+ \code{\link{charToRaw}}, \code{\link{rawToChar}}.
+}
+\examples{
+charToRam(c('1234','5678','abcd'))
+m <- matrix(charToRaw('abcd'),4,3)
+print(m)
+ramToChar(m)
+}
+\keyword{classes}
Added: pkg/gt.db/man/score.coxph.Rd
===================================================================
--- pkg/gt.db/man/score.coxph.Rd (rev 0)
+++ pkg/gt.db/man/score.coxph.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,84 @@
+%
+% Copyright (C) 2010, 23andMe, Inc.
+%
+% Written by David A. Hinds <dhinds at 23andMe.com>
+%
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+%
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program. If not, see <http://www.gnu.org/licenses/>
+%
+\name{score.coxph}
+\alias{score.coxph}
+\alias{score.survreg}
+\title{Test for Association using Survival Analysis}
+\description{
+ Test for association using either a semi-parametric Cox proportional
+ hazards model, or a parametric survival regression model.
+}
+\usage{
+score.coxph(formula, data, ploidy, test=c('LR','Wald'),
+ mode=c('additive','recessive','dominant','general'), ...)
+score.survreg(formula, data, ploidy, test=c('LR','Wald'),
+ mode=c('additive','recessive','dominant','general'), ...)
+}
+\arguments{
+ \item{formula}{a symbolic description of the model to be fit.}
+ \item{data}{a data frame containing the variables in the model.}
+ \item{ploidy}{see \code{\link{fetch.gt.data}}. Ignored.}
+ \item{test}{the test to use for computing P values.}
+ \item{mode}{genetic mode of action to test.}
+ \item{\dots}{additional arguments passed to
+ \code{\link[survival:coxph]{coxph}} or
+ \code{\link[survival:survreg]{survreg}}.}
+}
+\details{
+ These functions perform association tests using survival regression
+ models. The model formula may include covariates and should contain a
+ single genotype term without interactions. The left-hand-side of the
+ model formula can either identify a survival object, or a numeric
+ vector. If it is numeric, then a survival object will be created
+ using the absolute magnitude of the value as the followup time and
+ the sign of the value as the event indicator.
+
+ At sex linked loci, haploid males are treated the same as the
+ corresponding diploid female homozygotes.
+}
+\value{
+ A data frame with one row and three columns. An effect size is not
+ reported if the mode of action is \code{'general'}.
+ \item{pvalue}{P value for the specified \code{test}.}
+ \item{effect}{the regression coefficient (log hazard) for the genotype
+ term.}
+ \item{stderr}{the standard error of the effect size estimate.}
+}
+\seealso{
+ \code{\link[survival:Surv]{Surv}},
+ \code{\link[survival:coxph]{coxph}},
+ \code{\link[survival:survreg]{survreg}},
+ \code{\link{score.gt.data}}.
+}
+\examples{
+gt.demo.check()
+library(survival)
+pt <- fetch.pt.data('Demo_2')
+gt <- fetch.gt.data('Demo_2')[1:10,]
+pt$age <- 50+10*rnorm(nrow(pt))
+pt$event <- rbinom(nrow(pt),1,pt$age/100)
+pt$survival <- Surv(pt$age,pt$event)
+score.gt.data(survival~genotype, pt, gt, score.coxph)
+score.gt.data(survival~genotype, pt, gt, score.survreg)
+# use numeric coding for outcome
+pt$survival <- pt$age * ifelse(pt$event,1,-1)
+score.gt.data(survival~genotype, pt, gt, score.survreg)
+}
+\keyword{htest}
+\keyword{regression}
Modified: pkg/gt.db/man/score.glm.Rd
===================================================================
--- pkg/gt.db/man/score.glm.Rd 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.glm.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -1,7 +1,8 @@
%
% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
%
-% Written by David A. Hinds <dhinds at sonic.net>
+% Written by David A. Hinds <dhinds at 23andMe.com>
%
% This is free software; you can redistribute it and/or modify it
% under the terms of the GNU General Public License as published by
@@ -51,16 +52,17 @@
\value{
A data frame with one row and three columns. An effect size is not
reported if the mode of action is \code{'general'}.
- \item{pvalue}{P value for an F test comparing the full and null models.}
+ \item{pvalue}{P value for the specified \code{test}.}
\item{effect}{the regression coefficient (log odds) for \code{term}.}
\item{stderr}{the standard error of the effect size estimate.}
}
\seealso{
- \code{\link{lm}},
+ \code{\link{glm}},
\code{\link{score.trend}},
\code{\link{score.chisq}},
\code{\link{score.glm.general}},
\code{\link{score.glm.groups}},
+ \code{\link{score.glm.scoretest}},
\code{\link{score.gt.data}}.
}
\examples{
Modified: pkg/gt.db/man/score.glm.general.Rd
===================================================================
--- pkg/gt.db/man/score.glm.general.Rd 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.glm.general.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -45,7 +45,7 @@
the second describes the fitted additive effect; and the third
describes the fitted dominance effect.
\item{term}{\code{NA}, \code{'additive'}, or \code{'dominance'}.}
- \item{pvalue}{For the overall test, the ANOVA F test result.
+ \item{pvalue}{For the overall test, the ANOVA test result.
For additive and dominance effects, Wald test results.}
\item{effect}{the regression coefficient, for the additive or
dominant effect.}
Added: pkg/gt.db/man/score.glm.scoretest.Rd
===================================================================
--- pkg/gt.db/man/score.glm.scoretest.Rd (rev 0)
+++ pkg/gt.db/man/score.glm.scoretest.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,69 @@
+%
+% Copyright (C) 2010, 23andMe, Inc.
+%
+% Written by David A. Hinds <dhinds at 23andMe.com>
+%
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+%
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program. If not, see <http://www.gnu.org/licenses/>
+%
+\name{score.glm.scoretest}
+\alias{score.glm.scoretest}
+\title{Fast Score Test for Association using Logistic Regression}
+\description{
+ A score test for association with a binary outcome based on logistic
+ regression, using a precalculated null model.
+}
+\usage{
+score.glm.scoretest(formula, data, ploidy,
+ precalc=glm(.null.model(formula),binomial,data))
+}
+\arguments{
+ \item{formula}{a symbolic description of the model to be fit.}
+ \item{data}{a data frame containing the variables in the model.}
+ \item{ploidy}{see \code{\link{fetch.gt.data}}. Ignored.}
+ \item{precalc}{a null model to be precalculated once.}
+}
+\details{
+ This is a variation on \code{\link{score.glm}} that calculates P
+ values based on Rao's score test. It precalculates the base model
+ (which can contain covariates). In order to use this precalculated
+ model, there can be no missing genotype data, so missing observations
+ are replaced by the mean. A consequence is that only the additive
+ mode of action is supported. It is much faster than
+ \code{\link{score.glm}}.
+
+ At sex linked loci, haploid males are treated the same as the
+ corresponding diploid female homozygotes.
+}
+\value{
+ A data frame with one row and three columns:
+ \item{pvalue}{P value for the score test.}
+ \item{effect}{always \code{NA}.}
+ \item{stderr}{always \code{NA}.}
+}
+\seealso{
+ \code{\link{glm}}, \code{\link[statmod:glm.scoretest]{glm.scoretest}},
+ \code{\link{score.glm}}, \code{\link{score.trend}},
+ \code{\link{score.gt.data}}.
+}
+\examples{
+gt.demo.check()
+pt <- fetch.pt.data('Demo_2')
+gt <- fetch.gt.data('Demo_2')[1:10,]
+pt$status <- as.logical(rbinom(nrow(pt), 1, 0.5))
+score.gt.data(status~genotype, pt, gt, score.glm, test='Rao')
+score.gt.data(status~genotype, pt, gt, score.trend)
+score.gt.data(status~genotype, pt, gt, score.glm.scoretest)
+}
+\keyword{htest}
+\keyword{regression}
Modified: pkg/gt.db/man/score.gt.data.Rd
===================================================================
--- pkg/gt.db/man/score.gt.data.Rd 2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.gt.data.Rd 2010-12-07 19:21:33 UTC (rev 59)
@@ -32,7 +32,8 @@
\item{formula}{a symbolic description of the model to be fit.}
\item{pt.data}{a data frame of phenotypes from \code{\link{fetch.pt.data}}.}
\item{gt.data}{a data frame of genotypes from \code{\link{fetch.gt.data}}.}
- \item{score.fn}{the scoring function to be applied to each SNP.}
+ \item{score.fn}{the scoring function to be applied to each SNP, or a
+ short name like \code{'lm'}, \code{'glm'}, etc.}
\item{pt.filter}{an expression to use for subsetting on the
phenotype table.}
\item{gt.filter}{an expression to use for subsetting on the
@@ -53,8 +54,8 @@
the form of the model formula. A trend test will be used for simple
binary-outcome models of the form "status~genotype". Logistic
regression will be used for binary outcomes with more complicated
- model functions. And linear regression will be used for
- quantitative outcomes.
+ model functions. Cox proportional hazards will be used for a survival
+ outcome. And linear regression will be used for quantitative outcomes.
The filters \code{pt.filter} and \code{gt.filter} are evaluated
in the context of the \code{pt.data} and \code{gt.data} tables,
@@ -89,6 +90,8 @@
\code{\link{score.glm}},
\code{\link{score.glm.general}},
\code{\link{score.glm.groups}},
+ \code{\link{score.coxph}},
+ \code{\link{score.survreg}},
\code{\link{score.and.store}}.
}
\examples{
More information about the Gtdb-commits
mailing list