[Genabel-commits] r1244 - in pkg/GenABEL: . R

Thu Jun 6 22:20:43 CEST 2013

Author: lckarssen
Date: 2013-06-06 22:20:43 +0200 (Thu, 06 Jun 2013)
New Revision: 1244

Added:
   pkg/GenABEL/R/lilog.R
   pkg/GenABEL/R/palinear2LiLog.R
Modified:
   pkg/GenABEL/CHANGES.LOG
Log:
Initial	commit of the LiLog() and palinear2LiLog() functions. Thanks to	Nicola
Pirastu for contributing these. 


Modified: pkg/GenABEL/CHANGES.LOG
===================================================================

--- pkg/GenABEL/CHANGES.LOG	2013-06-03 13:36:16 UTC (rev 1243)
+++ pkg/GenABEL/CHANGES.LOG	2013-06-06 20:20:43 UTC (rev 1244)
@@ -1,5 +1,12 @@
 ***  v. 1.7-
 
+(2013.06.06)
+Two new functions added: LiLog() and palinear2LiLog() for the
+estimation of logistic beta's from a linear regression. Thanks to
+Nicola Pirastu for this contribution. The paper that describes these
+methods has been submitted (see the help of both functions for the
+citation). 
+
 (2013.06.03)
 Fixed bug #2664 in export.merlin() (exported files differ when using
 different settings for the stepids argument). 

Added: pkg/GenABEL/R/lilog.R
===================================================================
--- pkg/GenABEL/R/lilog.R	                        (rev 0)
+++ pkg/GenABEL/R/lilog.R	2013-06-06 20:20:43 UTC (rev 1244)
@@ -0,0 +1,120 @@
+#' LiLog (Linear to Logistic Beta estimator)
+#'
+#' Estimating Logistic Betas from a linear regression.
+#'
+#' The function transforms the betas from the linear regression of the
+#' binomial trait into its logistic counterpart.
+#' It is based on the following relationship: if we denote with
+#' \eqn{\alpha} the coefficients of the logistic regression and with
+#' \eqn{\beta} those coming from the linear regression we have that:
+#'
+#' \deqn{z=\alpha_0 + \alpha_{SNP} + \alpha_{COV1} \ldots +
+#'  \alpha_{COVi} = \ln \left( \frac{p}{1-p} \right),}{%
+#'  z = alpha0 + alphaSNP + alphaCOV1 + ... + alphaCOVi = ln(p/(1-p)),}
+#' with
+#' \deqn{p = \beta_0 + \beta_{SNP}}{%
+#'       p = beta0 + betaSNP}
+#' Covariates are disregarded and the relationship
+#' \deqn{\alpha_0 + \alpha_{SNP} = \mathrm{logit}(\beta_0 + \beta_{SNP})}{%
+#' alpha0 + \alphaSNP = logit(beta0 + betaSNP)}
+#' holds.
+#'
+#' \eqn{\beta_0} is not the coefficient coming from the linear
+#' regression but it is estimated as:
+#' \deqn{\mathrm{prev} - 2  q  (1-q)  \beta - q^2  \beta}{%
+#'       prev - 2 * q * (1-q) * beta - q^2 * beta}
+#'
+#' and \eqn{\alpha_0} is estimated as
+#' \eqn{\mathrm{logit}(\beta_0)}{logit(beta0)}.
+#' So \eqn{\alpha_{SNP}}, which is the parameter of interest, will be:
+#' \deqn{\alpha_{SNP} = \mathrm{logit}(\beta_0 + \beta_{SNP}) -
+#'            \mathrm{logit}(\beta_0).}{%
+#'       alphaSNP = logit(beta0 + betaSNP) - logit(beta0).}
+#'
+#' \eqn{SE(\alpha_{SNP})}{SE(\alphaSNP)} can be estimated considering
+#' that the following relationship must be true:
+#' \deqn{\frac{\alpha_{SNP}}{SE(\alpha_{SNP})} =
+#'       \frac{\beta_{SNP}}{SE(\beta_{SNP})},}{%
+#'       alphaSNP/SEalphaSNP = betaSNP/SEbetaSNP,} which is equivalent to
+#' \deqn{SE(\alpha_{SNP}) = \alpha_{SNP}
+#'         \frac{SE(\beta_{SNP})}{\beta_{SNP}}.}{%
+#'       SE(alphaSNP) = alphaSNP * SE(betaSNP)/betaSNP.}
+#'
+#' @param prev: Prevalence of the disease as observed from the data
+#' @param beta: \eqn{\beta_{SNP}}{beta_SNP} as estimated from a linear
+#'  regression of the binomial trait
+#' @param SEbeta: Standard Error of \eqn{\beta}{beta} coming from the
+#'  linear regression
+#' @param q: Frequency of the coded allele.
+#'
+#' @return Dataframe containing \eqn{\beta}{beta} and
+#' \eqn{SE(\beta)}{SE(beta)}.
+#'
+#' @author Nicola Pirastu, Lennart Karssen, Yurii Aulchenko
+#'
+#' @references
+#'
+#' \emph{Genome-wide feasible and meta-analysis oriented methods for
+#' analysis of binary traits using mixed models},
+#' Nicola Pirastu, Lennart C. Karssen, Pio D'adamo, Paolo Gasparini,
+#' Yurii Aulchenko (Submitted).
+#'
+#' @seealso
+#' For transforming results from \code{\link{formetascore}} see
+#' \code{\link{formetascore2lilog}}
+#' while for tranforming the output from ProbABEL use \code{\link{palinear2LiLog}}
+#'
+#' @examples
+#'
+#' # simulate a snp with q = 0.3
+#' snp <- rbinom(n=1000, size=2, prob=0.3)
+#' # estimate the probability according to a logistic model with beta 0.4
+#' probabil <- 1/(1 + exp( -(snp * 0.4 - 1) ))
+#' # simulate the binary trait
+#' bin.trait <- rbinom(n=1000, size=1, prob=probabil)
+#' # run linear regression
+#' res.lin <- glm(bin.trait~snp)
+#' # run logistic regression
+#' res.log <- glm(bin.trait~snp, family=binomial)
+#'
+#' # recover information from linear regression
+#' snp.info <- summary(res.lin)$coefficients["snp",]
+#'
+#' # transform linear regression coefficients into logistic regression scale
+#' LiLog(prev=mean(bin.trait), beta=snp.info[1], SEbeta=snp.info[2],
+#'       q=mean(snp)/2)
+#'
+#' summary(res.log)$coefficients["snp", 1:2]
+#'
+#' # Example with qtscore
+#' data(srdta)
+#'
+#' snp <- as.numeric(srdta[, 100])
+#' probabil <- 1/(1 + exp( -(snp * 0.4 - 1) ))
+#' bin.trait <- rbinom(n=nids(srdta), size=1, prob=probabil)
+#' srdta at phdata$binary <- bin.trait
+#' res <- qtscore(binary, data=srdta)
+#' sum <- summary(srdta)
+#'
+#' lilog.res <- LiLog(prev=mean(bin.trait), beta=res at results$effB,
+#'                    SEbeta=res at results$se_effB, q=sum$Q.2)
+#' summary(glm(bin.trait~snp,
+#'         family=binomial)$coefficients)["snp"1:2,] # logistic regression
+#' lilog.res[100, ] # lilog result
+#'
+#' @keywords
+#' LiLog
+#'
+
+LiLog <- function(prev, beta, SEbeta, q){
+  Icept <- (prev - 2 * q * beta)
+  alfa0 <- log(Icept / (1-Icept))
+  T1    <- (Icept + beta)
+  T1[which(T1<0 | T1>1)] <- NA
+
+  log.beta   <- binomial()$linkfun(T1) - alfa0
+  se.log     <- (log.beta * SEbeta) / beta
+  res        <- data.frame(log.beta, se.log)
+  names(res) <- c("beta", "sebeta")
+  return(res)
+}

Added: pkg/GenABEL/R/palinear2LiLog.R
===================================================================
--- pkg/GenABEL/R/palinear2LiLog.R	                        (rev 0)
+++ pkg/GenABEL/R/palinear2LiLog.R	2013-06-06 20:20:43 UTC (rev 1244)
@@ -0,0 +1,51 @@
+#' palinear2LiLog
+#'
+#' Apply LiLog to \code{palinear} output files
+#'
+#' The function tranforms betas from the \code{palinear} output files
+#' into their logistic correspondents.
+#'
+#' @param prevalence: prevalence of the studied diseases
+#' @param input.file: palinear output file that needs transformation
+#' @param output.file: name of the transformed file, if NULL
+#'  \code{input.file} will be overwritten
+#'
+#' @return Transformed file
+#'
+#' @author Nicola Pirastu, Lennart Karssen
+#'
+#' @seealso
+#' For futher details on the methods and for the basic function see
+#' \code{\link{LiLog}}. For transforming the output from
+#' \code{formetascore} use \code{\link{formetascore2LiLog}}.
+#'
+#'
+#' @examples
+#'
+#' \dontrun{
+#'  palinear2LiLog(prevalence=0.2, input.file="chr18.add.out.txt",
+#'                 output.file="chr18.logistic.add.out.txt")
+#' }
+#'
+#' @references
+#'
+#' \emph{Genome-wide feasible and meta-analysis oriented methods for
+#' analysis of binary traits using mixed models},
+#' Nicola Pirastu, Lennart C. Karssen, Pio D'adamo, Paolo Gasparini,
+#' Yurii Aulchenko (Submitted).
+
+
+palinear2LiLog <- function(prevalence, input.file, output.file=NULL){
+  if(is.null(output.file)){
+    output.file <- input.file
+  }
+
+  res  <- read.table(input.file, header=TRUE, stringsAsFactors=FALSE)
+  Lres <- LiLog(prev=prevalence, beta=res$beta_SNP_add,
+                SEbeta=res$sebeta_SNP_add, q=res$Mean_predictor_allele/2.)
+
+  res$beta_SNP_add   <- Lres$beta
+  res$sebeta_SNP_add <- Lres$sebeta
+
+  write.table(res, file=output.file, row.names=FALSE, quote=FALSE)
+}