[Gtdb-commits] r59 - in pkg/gt.db: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 7 20:21:33 CET 2010


Author: dahinds
Date: 2010-12-07 20:21:33 +0100 (Tue, 07 Dec 2010)
New Revision: 59

Added:
   pkg/gt.db/man/ramToChar.Rd
   pkg/gt.db/man/score.coxph.Rd
   pkg/gt.db/man/score.glm.scoretest.Rd
Modified:
   pkg/gt.db/DESCRIPTION
   pkg/gt.db/R/genotype.R
   pkg/gt.db/R/gplot.R
   pkg/gt.db/R/hwe.R
   pkg/gt.db/R/score.R
   pkg/gt.db/R/test.R
   pkg/gt.db/R/tracks.R
   pkg/gt.db/man/manhattan.plot.Rd
   pkg/gt.db/man/score.glm.Rd
   pkg/gt.db/man/score.glm.general.Rd
   pkg/gt.db/man/score.gt.data.Rd
Log:
- fix to association tests based on dosage
- minor improvements to manhattan.plot, new 'scale' argument
- performance improvement for exact HWE tests
- automatic promotion of factors to scores in linear models
- added 'score.glm.scoretest' for faster GLM scoring
- added 'score.coxph' and 'score.survreg' scoring functions
- made model-parsing in .null.model, .flat.model more robust



Modified: pkg/gt.db/DESCRIPTION
===================================================================
--- pkg/gt.db/DESCRIPTION	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/DESCRIPTION	2010-12-07 19:21:33 UTC (rev 59)
@@ -7,6 +7,6 @@
 Description: Framework for storing and manipulating genotype data, 
   phenotype data, and association study results
 License: GPL
-Depends: R (>= 2.7.0), lattice, grid, methods, DBI
-Suggests: nlme, cluster, mda, RMySQL, RSQLite
+Depends: R (>= 2.7.0), survival, lattice, grid, methods, DBI
+Suggests: statmod, nlme, cluster, mda, RMySQL, RSQLite
 Packaged: 2010-08-12 20:47:05 UTC; dhinds

Modified: pkg/gt.db/R/genotype.R
===================================================================
--- pkg/gt.db/R/genotype.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/genotype.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -202,7 +202,7 @@
     }
     if (dosage) {
         fn <- function(x) unpack.raw.data(hexToRaw(x),'dosage')$dosage
-        g <- lapply(gt.data$raw.data)
+        g <- lapply(gt.data$raw.data, fn)
     } else {
         a <- strsplit(gt.data$alleles, split='/', fixed=TRUE)
         g <- mapply(gt.split, gt.data$genotype, ...,

Modified: pkg/gt.db/R/gplot.R
===================================================================
--- pkg/gt.db/R/gplot.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/gplot.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -51,34 +51,43 @@
 manhattan.plot <-
     function(y, data, gap=0, threshold=-log10(5e-8), around=0,
              xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25,
-             xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim,
+             xlab, ylab=expression(-log[10](pvalue)), yrange, ylim,
+             scale=c('Mb','Kb','bp'),
              col=c('#d0d0d0','#e0e0e0','#ff0000'), ...)
 {
     val <- eval(substitute(y), data, parent.frame())
-    len <- with(data, tapply(position, .sort.levels(scaffold),
+    scale <- match.arg(scale)
+    div <- switch(scale, Mb=1e6, Kb=1000, 1)
+    len <- with(data, tapply(position/div, .sort.levels(scaffold),
                              max, na.rm=TRUE))
     ofs <- cumsum(c(0,as.numeric(len+gap)))
     mid <- (ofs[-1] + ofs[-length(ofs)])/2
-    pos <- data$position + ofs[match(data$scaffold, names(len))]
+    pos <- data$position/div + ofs[match(data$scaffold, names(len))]
     set <- list(superpose.symbol=list(col=col))
     grp <- (match(data$scaffold, names(len)) %% 2)
     grp <- ifelse(val > threshold, 2, grp)
     if (around > 0) {
         hits <- pos[which(val > threshold)]
-        keep <- (diff(hits) > around)
+        keep <- (diff(hits) > around/div)
         hits <- hits[c(TRUE,keep) | c(keep,TRUE)]
         for (hit in hits) {
-            grp[abs(pos - hit) < around] <- 2
+            grp[abs(pos - hit) < around/div] <- 2
         }
     }
     xlim <- range(pos,na.rm=TRUE)
     xlim <- xlim + (xlim[2]-xlim[1]) * c(-0.02,0.02)
     if (missing(ylim)) {
         if (missing(yrange))
-            yrange <- range(c(val,0,1.1*threshold))
+            yrange <- range(c(val,0,1.1*threshold), na.rm=TRUE)
         padding <- lattice.options()$axis.padding$numeric
         ylim <- yrange + diff(yrange) * padding * c(-1,1)
     }
+    if (missing(xlab)) {
+        if (length(len) == 1)
+            xlab <- paste('position,', scale)
+        else
+            xlab <- 'Chromosome'
+    }
     panel.fn <- function(...)
     { panel.xyplot(...) ; panel.refline(h=threshold) }
     if (length(len) == 1) {

Modified: pkg/gt.db/R/hwe.R
===================================================================
--- pkg/gt.db/R/hwe.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/hwe.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -23,38 +23,29 @@
 
 .hwe.exact <- function(aa, ab, bb, tail)
 {
-    # use integer constants to avoid implicit casts
-    m  <- ab %/% 2L
-    xx <- aa + m + 1L
-    xy <- ab %% 2L + 1L
-    yy <- bb + m + 1L
-    n  <- pmin(xx,yy) - 1L
+    nr <- pmin(aa,bb)*2+ab
+    nn <- aa+ab+bb
+    xy <- as.integer(nr*(2*nn-nr) / (2*nn))
+    xy <- xy + ((nr+xy) %% 2)
+    xx <- (nr-xy) %/% 2
+    yy <- (nn-xy-xx)
 
-    # precompute lookup table of log gamma values
-    # note that xx, xy, yy are offset by 1 for indexing
-    LG <- lgamma(1:(max(xx,yy,2L*n)+2))
-    v <- log(2)*ab - LG[aa+1L] - LG[ab+1L] - LG[bb+1L]
-    log.p <- function(xx, xy, yy, i)
-        log(2)*(xy+2L*i-1L) - LG[xx-i] - LG[xy+2L*i] - LG[yy-i]
-
-    fn.lower <- function(v, m, n, ...)
+    p.fn <- function(xx, xy, yy)
     {
-        p <- exp(log.p(...,0:n)-v)
-        sum(p[0:m+1L])/sum(p)
+        if (is.na(xx+xy+yy)) return(NA)
+        i <- 1:(xy %/% 2)
+        lt <- cumprod((xy-2*i+2)*(xy-2*i+1)/(4*(xx+i)*(yy+i)))
+        i <- 1:min(xx,yy)
+        ut <- cumprod(4*(xx-i+1)*(yy-i+1)/((xy+2*i)*(xy+2*i-1)))
+        c(rev(lt),1,ut)
     }
-    fn.upper <- function(v, m, n, ...)
-    {
-        p <- exp(log.p(...,0:n)-v)
-        sum(p[m:n+1L])/sum(p)
-    }
-    fn.both <- function(v, m, n, ...)
-    {
-        p <- exp(log.p(...,0:n)-v)
-        sum(p[p<=1])/sum(p)
-    }
 
-    mapply(switch(tail, lower=fn.lower, upper=fn.upper, fn.both),
-           v, m, n, xx, xy, yy)
+    fn.lower <- function(brk, p) sum(p[1:brk]) / sum(p)
+    fn.upper <- function(brk, p) sum(p[brk:length(p)]) / sum(p)
+    fn.both  <- function(brk, p) sum(p[p <= p[brk]]) / sum(p)
+    fn <- switch(tail, lower=fn.lower, upper=fn.upper, fn.both)
+    mapply(function(brk,xx,xy,yy) fn(brk, p.fn(xx,xy,yy)),
+           (ab %/% 2)+1, xx, xy, yy)
 }
 
 hwe.test <-

Modified: pkg/gt.db/R/score.R
===================================================================
--- pkg/gt.db/R/score.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/score.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -170,6 +170,9 @@
          mode=c('additive','recessive','dominant','general'))
 {
     data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+    lhs <- .lhs.formula(formula)
+    if (lhs %in% names(data) && is.factor(data[,lhs]))
+        data[,lhs] <- as.numeric(data[,lhs])
     m <- lm(formula, data)
     nf <- .null.model(formula, term=drop)
     pvalue <- anova(update(m,nf), m)[2,6]
@@ -187,6 +190,9 @@
         return(keep.attr(cbind(term=NA, r), fit='lm'))
     }
     data$genotype <- factor(data$genotype)
+    lhs <- .lhs.formula(formula)
+    if (lhs %in% names(data) && is.factor(data[,lhs]))
+        data[,lhs] <- as.numeric(data[,lhs])
     contrasts <- list(genotype=matrix(c(0,1,2,0,1,0),3,2))
     m <- lm(formula, data, contrasts=contrasts)
     nf <- .null.model(formula)
@@ -250,6 +256,16 @@
     ), fit='glm')
 }
 
+score.glm.scoretest <-
+    function (formula, data, ploidy,
+              precalc=glm(.null.model(formula),binomial,data))
+{
+    require(statmod)
+    pvalue <- 2 * pnorm(-abs(glm.scoretest(precalc,data$genotype)))
+    keep.attr(data.frame(pvalue, effect = NA, stderr = NA),
+              fit = "glm.scoretest")
+}
+
 score.glm.general <- function(formula, data, ploidy)
 {
     if (length(unique(data$genotype)) < 3) {
@@ -348,3 +364,52 @@
     ), fit='gls')
 }
 
+#-----------------------------------------------------------------------
+
+score.coxph <-
+function(formula, data, ploidy, test=c('LR','Wald'),
+         mode=c('additive','recessive','dominant','general'), ...)
+{
+    lhs <- .lhs.formula(formula)
+    if (!is.Surv(data[,lhs]))
+        data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0)
+    data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+    m <- coxph(formula, data, ...)
+    x <- unname(coef(summary(m))['genotype',])
+    test <- match.arg(test)
+    if (test == "LR") {
+        # work-around for anova() evaluation problems
+        LL <- function(m) tail(m$loglik,1)
+        m0 <- update(m, .null.model(formula, term='genotype'))
+        pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE)
+    } else {
+        pvalue <- pnorm(-abs(x[4]))*2
+    }
+    keep.attr(data.frame(
+        pvalue, effect=x[1], stderr=x[3]
+    ), fit='coxph')
+}
+
+score.survreg <-
+function(formula, data, ploidy, test=c('LR','Wald'),
+         mode=c('additive','recessive','dominant','general'), ...)
+{
+    lhs <- .lhs.formula(formula)
+    if (!is.Surv(data[,lhs]))
+        data[,lhs] <- Surv(abs(data[,lhs]),data[,lhs]>0)
+    data$genotype <- .recode.gt(data$genotype, match.arg(mode))
+    m <- survreg(formula, data, ...)
+    x <- unname(summary(m)$table['genotype',])
+    test <- match.arg(test)
+    if (test == "LR") {
+        # work-around for anova() evaluation problems
+        LL <- function(m) tail(m$loglik,1)
+        m0 <- update(m, .null.model(formula, term='genotype'))
+        pvalue <- pchisq(2*(LL(m)-LL(m0)), 1, lower=FALSE)
+    } else {
+        pvalue <- x[4]
+    }
+    keep.attr(data.frame(
+        pvalue, effect=x[1], stderr=x[2]
+    ), fit='survreg')
+}

Modified: pkg/gt.db/R/test.R
===================================================================
--- pkg/gt.db/R/test.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/test.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -27,14 +27,23 @@
 
 .null.model <- function(formula, term='genotype')
 {
-    txt <- gsub(paste('[*+:]',term), '', .as.text(formula))
-    eval(parse(text=gsub(term, '1', txt)))
+    drop <- grep(paste('\\b',term,'\\b',sep=''),
+                 labels(terms(formula)), value=TRUE)
+    if (length(drop) == 0)
+        stop('no term(s) to drop')
+    drop <- paste('~ . -', paste(drop,collapse=' - '))
+    update.formula(formula, as.formula(drop))
 }
 
 .flat.model <- function(formula, term='genotype')
 {
-    txt <- .as.text(.null.model(formula, term))
-    eval(parse(text=paste(txt, '+', term)))
+    drop <- grep(paste('\\b',term,'\\b',sep=''),
+                 labels(terms(formula)), value=TRUE)
+    drop <- setdiff(drop, term)
+    if (length(drop) == 0)
+        stop('no term(s) to drop')
+    drop <- paste('~ . -', paste(drop,collapse=' - '))
+    update.formula(formula, as.formula(drop))
 }
 
 .lhs.formula <- function(formula) as.character(formula[2])
@@ -76,10 +85,14 @@
         dsub$gender <- pt.data$gender
 
     # guess scoring function based on model and outcome
+    if (is.character(score.fn))
+        score.fn <- eval(parse(text=paste('score.', score.fn, sep='')))
     if (is.null(score.fn)) {
         binary <- (length(table(pt.data[,col[1]])) == 2)
         groups <- (length(grep(':genotype',dimnames(ft)[[2]])))
-        fn <- if (!groups && binary && !dosage && (length(col)==2))
+        fn <- if (is.Surv(pt.data[,col[1]]))
+            'score.coxph'
+        else if (!groups && binary && !dosage && (length(col)==2))
             'score.trend'
         else paste('score', c('.lm','.glm')[binary+1],
                    c('','.groups')[groups+1], sep='')
@@ -87,7 +100,18 @@
         score.fn <- eval(parse(text=fn))
     }
 
+    pt.mask <- if.na(pt.filter,FALSE) & complete.cases(dsub)
+    dsub <- dsub[pt.mask,,drop=FALSE]
+
+    # if score function has a precalc argument, evaluate it now
+    precalc <- eval(formals(score.fn)$precalc,
+                    envir=list(formula=formula, data=dsub))
+    new.score.fn <- score.fn
+    if (!is.null(precalc))
+        new.score.fn <- function(...) score.fn(..., precalc=precalc)
+    id.cols <- which(names(gt.data) %in% c('assay.data.id','assay.name'))
     nr <- nrow(gt.data)
+
     wrap.score.fn <- function(n) {
         gn <- gt.data[n,]
         my.error <- function(e) {
@@ -98,15 +122,19 @@
             my.error(e)
             invokeRestart('muffleWarning')
         }
-        d <- cbind(dsub, unpack.gt.matrix(gn, 'genotype', dosage=dosage))
-        keep <- if.na(pt.filter,FALSE) & complete.cases(d)
+        dsub$genotype <- unpack.gt.matrix(gn, dosage=dosage)[pt.mask,1]
+        if (!is.null(precalc)) {
+            dsub$genotype <- if.na(dsub$genotype,
+                                   mean(dsub$genotype,na.rm=TRUE))
+            keep <- TRUE
+        } else
+            keep <- !is.na(dsub$genotype)
         r <- tryCatch(withCallingHandlers(
-            score.fn(formula, d[keep,], gn$ploidy, ...),
+            new.score.fn(formula, dsub[keep,], gn$ploidy, ...),
             warning=my.warn), error=my.error)
         if (progress) progress.bar(n, nr)
         if (!is.null(r)) {
-            id <- gn[c('assay.data.id','assay.name')]
-            keep.attr(merge(id,r), .Attr=kept.attr(r))
+            keep.attr(cbind(gn[id.cols],r), .Attr=kept.attr(r))
         }
     }
     if (progress) progress.bar(0, nr)

Modified: pkg/gt.db/R/tracks.R
===================================================================
--- pkg/gt.db/R/tracks.R	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/R/tracks.R	2010-12-07 19:21:33 UTC (rev 59)
@@ -31,18 +31,19 @@
     tck <- function(x) { p <- pretty(x) ; p[p >= x[1] & p <= x[2]] }
     xscale <- current.viewport()$xscale
     scale <- match.arg(scale)
-    mul <- switch(match.arg(scale), Mb=1e6, Kb=1e3, 1)
-    grid.xaxis(tck(xscale), format(tck(xscale/mul)))
+    div <- switch(scale, Mb=1e6, Kb=1e3, 1)
+    grid.xaxis(tck(xscale), format(tck(xscale/div)))
     if (missing(xlab))
         xlab <- paste(gt$scaffold[1], 'position,', scale)
     if (!is.null(xlab))
         grid.text(xlab, y=unit(-3,'lines'))
     y <- unit(0,'npc')
     for (tn in 1:length(tracks)) {
+        tr <- tracks[[tn]]
+        if (is.null(tr)) next
         name <- names(tracks)[tn]
         if (is.null(name) || name=='')
             name <- sprintf("track%d", tn)
-        tr <- tracks[[tn]]
         args <- list(name=name, y=y, just='bottom', xscale=xscale)
         vp <- do.call('viewport', c(args, tr$vp))
         pushViewport(vp)
@@ -55,7 +56,7 @@
     }
     pushViewport(viewport(name='border', y=unit(0,'npc'),
                           height=y, just='bottom'))
-    grid.rect()
+    grid.rect(gp=gpar(fill='transparent'))
     upViewport()
 }
 

Modified: pkg/gt.db/man/manhattan.plot.Rd
===================================================================
--- pkg/gt.db/man/manhattan.plot.Rd	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/manhattan.plot.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -26,7 +26,8 @@
 \usage{
 manhattan.plot(y, data, gap=0, threshold=-log10(5e-8), around=0,
                xticks=c(1:12,14,16,18,20,22,'X','Y'), cex=0.25,
-               xlab=NULL, ylab=deparse(substitute(y)), yrange, ylim,
+               xlab=, ylab=expression(-log[10](pvalue)), yrange, ylim,
+               scale=c('Mb','Kb','bp'),
                col=c('#d0d0d0','#e0e0e0','#ff0000'), ...)
 }
 \arguments{
@@ -41,6 +42,8 @@
   \item{cex, xlab, ylab, ylim}{see \code{\link[lattice:xyplot]{xyplot}}.}
   \item{yrange}{similar to \code{ylim}, except that this range will be
     padded as if it were the range of actual data points.}
+  \item{scale}{determines the scaling of tick labels on the X axis, if
+    the data all falls on a single chromosome.}
   \item{col}{colors to use for odd and even chromosomes, and hits.}
   \item{\dots}{additional arguments passed to
     \code{\link[lattice:xyplot]{xyplot}}.}

Added: pkg/gt.db/man/ramToChar.Rd
===================================================================
--- pkg/gt.db/man/ramToChar.Rd	                        (rev 0)
+++ pkg/gt.db/man/ramToChar.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,52 @@
+%
+% Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
+% 
+% Written by David A. Hinds <dhinds at sonic.net>
+% 
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+% 
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+% 
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>
+% 
+\name{charToRam}
+\alias{charToRam}
+\alias{ramToChar}
+\title{Convert between Raw Matrices and Character Vectors}
+\description{
+  Convert between raw matrices and character vectors.
+}
+\usage{
+charToRam(str)
+ramToChar(raw)
+}
+\arguments{
+  \item{str}{a character vector.}
+  \item{raw}{a raw matrix.}
+}
+\value{
+  For \code{charToRam}, a raw matrix with one column per string in
+  \code{char}.
+  
+  For \code{ramToChar}, a character vector with one element per column
+  in \code{raw}.
+}
+\seealso{
+  \code{\link{hexToRam}}, \code{\link{ramToHex}},
+  \code{\link{charToRaw}}, \code{\link{rawToChar}}.
+}
+\examples{
+charToRam(c('1234','5678','abcd'))
+m <- matrix(charToRaw('abcd'),4,3)
+print(m)
+ramToChar(m)
+}
+\keyword{classes}

Added: pkg/gt.db/man/score.coxph.Rd
===================================================================
--- pkg/gt.db/man/score.coxph.Rd	                        (rev 0)
+++ pkg/gt.db/man/score.coxph.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,84 @@
+%
+% Copyright (C) 2010, 23andMe, Inc.
+% 
+% Written by David A. Hinds <dhinds at 23andMe.com>
+% 
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+% 
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+% 
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>
+% 
+\name{score.coxph}
+\alias{score.coxph}
+\alias{score.survreg}
+\title{Test for Association using Survival Analysis}
+\description{
+  Test for association using either a semi-parametric Cox proportional
+  hazards model, or a parametric survival regression model.
+}
+\usage{
+score.coxph(formula, data, ploidy, test=c('LR','Wald'),
+            mode=c('additive','recessive','dominant','general'), ...)
+score.survreg(formula, data, ploidy, test=c('LR','Wald'),
+              mode=c('additive','recessive','dominant','general'), ...)
+}
+\arguments{
+  \item{formula}{a symbolic description of the model to be fit.}
+  \item{data}{a data frame containing the variables in the model.}
+  \item{ploidy}{see \code{\link{fetch.gt.data}}.  Ignored.}
+  \item{test}{the test to use for computing P values.}
+  \item{mode}{genetic mode of action to test.}
+  \item{\dots}{additional arguments passed to
+    \code{\link[survival:coxph]{coxph}} or
+    \code{\link[survival:survreg]{survreg}}.}
+}
+\details{
+  These functions perform association tests using survival regression
+  models.  The model formula may include covariates and should contain a
+  single genotype term without interactions.  The left-hand-side of the
+  model formula can either identify a survival object, or a numeric
+  vector.  If it is numeric, then a survival object will be created
+  using the absolute magnitude of the value as the followup time and
+  the sign of the value as the event indicator.
+
+  At sex linked loci, haploid males are treated the same as the
+  corresponding diploid female homozygotes.
+}
+\value{
+  A data frame with one row and three columns.  An effect size is not
+  reported if the mode of action is \code{'general'}.
+  \item{pvalue}{P value for the specified \code{test}.}
+  \item{effect}{the regression coefficient (log hazard) for the genotype
+    term.}
+  \item{stderr}{the standard error of the effect size estimate.}
+}
+\seealso{
+  \code{\link[survival:Surv]{Surv}},
+  \code{\link[survival:coxph]{coxph}},
+  \code{\link[survival:survreg]{survreg}},
+  \code{\link{score.gt.data}}.
+}
+\examples{
+gt.demo.check()
+library(survival)
+pt <- fetch.pt.data('Demo_2')
+gt <- fetch.gt.data('Demo_2')[1:10,]
+pt$age <- 50+10*rnorm(nrow(pt))
+pt$event <- rbinom(nrow(pt),1,pt$age/100)
+pt$survival <- Surv(pt$age,pt$event)
+score.gt.data(survival~genotype, pt, gt, score.coxph)
+score.gt.data(survival~genotype, pt, gt, score.survreg)
+# use numeric coding for outcome
+pt$survival <- pt$age * ifelse(pt$event,1,-1)
+score.gt.data(survival~genotype, pt, gt, score.survreg)
+}
+\keyword{htest}
+\keyword{regression}

Modified: pkg/gt.db/man/score.glm.Rd
===================================================================
--- pkg/gt.db/man/score.glm.Rd	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.glm.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -1,7 +1,8 @@
 %
 % Copyright (C) 2009, Perlegen Sciences, Inc.
+% Copyright (C) 2010, 23andMe, Inc.
 % 
-% Written by David A. Hinds <dhinds at sonic.net>
+% Written by David A. Hinds <dhinds at 23andMe.com>
 % 
 % This is free software; you can redistribute it and/or modify it
 % under the terms of the GNU General Public License as published by
@@ -51,16 +52,17 @@
 \value{
   A data frame with one row and three columns.  An effect size is not
   reported if the mode of action is \code{'general'}.
-  \item{pvalue}{P value for an F test comparing the full and null models.}
+  \item{pvalue}{P value for the specified \code{test}.}
   \item{effect}{the regression coefficient (log odds) for \code{term}.}
   \item{stderr}{the standard error of the effect size estimate.}
 }
 \seealso{
-  \code{\link{lm}},
+  \code{\link{glm}},
   \code{\link{score.trend}},
   \code{\link{score.chisq}},
   \code{\link{score.glm.general}},
   \code{\link{score.glm.groups}},
+  \code{\link{score.glm.scoretest}},
   \code{\link{score.gt.data}}.
 }
 \examples{

Modified: pkg/gt.db/man/score.glm.general.Rd
===================================================================
--- pkg/gt.db/man/score.glm.general.Rd	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.glm.general.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -45,7 +45,7 @@
   the second describes the fitted additive effect; and the third
   describes the fitted dominance effect.
   \item{term}{\code{NA}, \code{'additive'}, or \code{'dominance'}.}
-  \item{pvalue}{For the overall test, the ANOVA F test result.
+  \item{pvalue}{For the overall test, the ANOVA test result.
     For additive and dominance effects, Wald test results.}
   \item{effect}{the regression coefficient, for the additive or
     dominant effect.}

Added: pkg/gt.db/man/score.glm.scoretest.Rd
===================================================================
--- pkg/gt.db/man/score.glm.scoretest.Rd	                        (rev 0)
+++ pkg/gt.db/man/score.glm.scoretest.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -0,0 +1,69 @@
+%
+% Copyright (C) 2010, 23andMe, Inc.
+% 
+% Written by David A. Hinds <dhinds at 23andMe.com>
+% 
+% This is free software; you can redistribute it and/or modify it
+% under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the license, or
+% (at your option) any later version.
+% 
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+% 
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>
+% 
+\name{score.glm.scoretest}
+\alias{score.glm.scoretest}
+\title{Fast Score Test for Association using Logistic Regression}
+\description{
+  A score test for association with a binary outcome based on logistic
+  regression, using a precalculated null model.
+}
+\usage{
+score.glm.scoretest(formula, data, ploidy,
+                    precalc=glm(.null.model(formula),binomial,data))
+}
+\arguments{
+  \item{formula}{a symbolic description of the model to be fit.}
+  \item{data}{a data frame containing the variables in the model.}
+  \item{ploidy}{see \code{\link{fetch.gt.data}}.  Ignored.}
+  \item{precalc}{a null model to be precalculated once.}
+}
+\details{
+  This is a variation on \code{\link{score.glm}} that calculates P
+  values based on Rao's score test.  It precalculates the base model
+  (which can contain covariates).  In order to use this precalculated
+  model, there can be no missing genotype data, so missing observations
+  are replaced by the mean.  A consequence is that only the additive
+  mode of action is supported.  It is much faster than
+  \code{\link{score.glm}}.
+  
+  At sex linked loci, haploid males are treated the same as the
+  corresponding diploid female homozygotes.
+}
+\value{
+  A data frame with one row and three columns:
+  \item{pvalue}{P value for the score test.}
+  \item{effect}{always \code{NA}.}
+  \item{stderr}{always \code{NA}.}
+}
+\seealso{
+  \code{\link{glm}}, \code{\link[statmod:glm.scoretest]{glm.scoretest}},
+  \code{\link{score.glm}}, \code{\link{score.trend}},
+  \code{\link{score.gt.data}}.
+}
+\examples{
+gt.demo.check()
+pt <- fetch.pt.data('Demo_2')
+gt <- fetch.gt.data('Demo_2')[1:10,]
+pt$status <- as.logical(rbinom(nrow(pt), 1, 0.5))
+score.gt.data(status~genotype, pt, gt, score.glm, test='Rao')
+score.gt.data(status~genotype, pt, gt, score.trend)
+score.gt.data(status~genotype, pt, gt, score.glm.scoretest)
+}
+\keyword{htest}
+\keyword{regression}

Modified: pkg/gt.db/man/score.gt.data.Rd
===================================================================
--- pkg/gt.db/man/score.gt.data.Rd	2010-09-10 20:56:58 UTC (rev 58)
+++ pkg/gt.db/man/score.gt.data.Rd	2010-12-07 19:21:33 UTC (rev 59)
@@ -32,7 +32,8 @@
   \item{formula}{a symbolic description of the model to be fit.}
   \item{pt.data}{a data frame of phenotypes from \code{\link{fetch.pt.data}}.}
   \item{gt.data}{a data frame of genotypes from \code{\link{fetch.gt.data}}.}
-  \item{score.fn}{the scoring function to be applied to each SNP.}
+  \item{score.fn}{the scoring function to be applied to each SNP, or a
+    short name like \code{'lm'}, \code{'glm'}, etc.}
   \item{pt.filter}{an expression to use for subsetting on the
     phenotype table.}
   \item{gt.filter}{an expression to use for subsetting on the
@@ -53,8 +54,8 @@
   the form of the model formula.  A trend test will be used for simple
   binary-outcome models of the form "status~genotype".  Logistic
   regression will be used for binary outcomes with more complicated
-  model functions.  And linear regression will be used for
-  quantitative outcomes.
+  model functions.  Cox proportional hazards will be used for a survival
+  outcome.  And linear regression will be used for quantitative outcomes.
 
   The filters \code{pt.filter} and \code{gt.filter} are evaluated
   in the context of the \code{pt.data} and \code{gt.data} tables,
@@ -89,6 +90,8 @@
   \code{\link{score.glm}},
   \code{\link{score.glm.general}},
   \code{\link{score.glm.groups}},
+  \code{\link{score.coxph}},
+  \code{\link{score.survreg}},
   \code{\link{score.and.store}}.
 }
 \examples{



More information about the Gtdb-commits mailing list