[Genabel-commits] r2049 - in pkg/MultiABEL: . R src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Mar 2 16:04:10 CET 2016
Author: shenxia
Date: 2016-03-02 16:04:10 +0100 (Wed, 02 Mar 2016)
New Revision: 2049
Modified:
pkg/MultiABEL/ChangeLog
pkg/MultiABEL/DESCRIPTION
pkg/MultiABEL/R/MultiSummary.R
pkg/MultiABEL/R/load.summary.R
pkg/MultiABEL/src/MultiSummaryLoop.f90
Log:
T'RT implementation commit
Modified: pkg/MultiABEL/ChangeLog
===================================================================
--- pkg/MultiABEL/ChangeLog 2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/ChangeLog 2016-03-02 15:04:10 UTC (rev 2049)
@@ -1,3 +1,8 @@
+2015-02-25 15:52 xia
+
+ * Updated to version 1.1-4
+ * MultiSummary() implemented using T-stats
+
2015-08-20 09:09 xia
* Updated to version 1.1-3
Modified: pkg/MultiABEL/DESCRIPTION
===================================================================
--- pkg/MultiABEL/DESCRIPTION 2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/DESCRIPTION 2016-03-02 15:04:10 UTC (rev 2049)
@@ -1,8 +1,8 @@
Package: MultiABEL
Type: Package
Title: Multi-Trait Genome-Wide Association Analyses
-Version: 1.1-3
-Date: 2015-08-15
+Version: 1.1-4
+Date: 2016-02-25
Author: Xia Shen
Maintainer: Xia Shen <xia.shen at ki.se>
Description: Multivariate genome-wide association analyses. The analysis can be performed on individual-level data or multiple single-trait genome-wide summary statistics.
@@ -14,4 +14,4 @@
DatABEL
License: GPL (>= 2)
LazyLoad: yes
-Packaged: 2015-08-15 18:30:18 CET; xia
+Packaged: 2016-02-25 15:52:58 CET; xia
Modified: pkg/MultiABEL/R/MultiSummary.R
===================================================================
--- pkg/MultiABEL/R/MultiSummary.R 2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/R/MultiSummary.R 2016-03-02 15:04:10 UTC (rev 2049)
@@ -3,12 +3,15 @@
#' This function performs multivariate GWA analysis using meta-GWAS summary statistics
#'
#' @param x A data object of class \code{multi.summary} loaded by the function \code{load.summary}.
+#' @param index A numeric vector that gives the indices of the traits to be analyzed jointly.
#' @param type A string gives the type of analysis. Default is \code{"outbred"}, referring to
#' general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to
#' inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
#' \code{"precise"} refers to precise test statistics, especially when the individual-level data
-#' are available, for which the argument \code{vars} has to be given.
-#' @param vars A numeric vector gives the variance of the genotypes at each SNP, coded as 0, 1 and 2.
+#' are available, for which the argument \code{vars} has to be given. \code{"direct"} refers to
+#' test statistics directly constructed from the T-statistics in univariate GWAS, this provides a
+#' scale-invariant test most similar to the direct MANOVA, but may be less powerful in some scenarios.
+#' @param vars A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
#' Only used when \code{type = "precise"}.
#'
#' @return The function returns a data frame containing the multi-trait GWAS results, where the row names are
@@ -20,7 +23,7 @@
#' @author Xia Shen
#'
#' @references
-#' Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
+#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
#' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
#' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
#' Simple multi-trait analysis identifies novel loci
@@ -57,9 +60,9 @@
#'
`MultiSummary` <- function(x, index = NULL, type = 'outbred', vars = NULL) {
if (class(x) != 'multi.summary') {
- stop('wrong data type!')
+ stop('Wrong data type!')
}
- cat('multi-trait genome scan ... ')
+ cat('Multi-trait genome scan ... ')
if (!is.null(index)) {
m <- nrow(x$cor.pheno)
x$cor.pheno <- x$cor.pheno[index,index]
@@ -71,6 +74,8 @@
n <- x$gwa[,2*m + 2]
k <- nrow(x$gwa)
betamat <- x$gwa[,seq(1, 2*m, 2)]
+ semat <- x$gwa[,seq(2, 2*m, 2)]
+ tmat <- betamat/semat
dimnames(betamat) <- list(NULL, NULL)
res <- data.frame(Marker = rownames(x$gwa), Freq = f, N = n, Beta.S = NA, SE = NA, P = NA)
rownames(res) <- rownames(x$gwa)
@@ -81,24 +86,32 @@
pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
} else if (type == 'inbred') {
scan <- .Fortran('MultiSummaryLoopInbred', k = as.integer(k), m = as.integer(m), nn = n,
- f = f, betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno),
- E0 = diag(m), H = diag(m), coef = betamat,
- pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
+ f = f, betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno),
+ E0 = diag(m), H = diag(m), coef = betamat,
+ pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
} else if (type == 'precise' & !is.null(vars)) {
scan <- .Fortran('MultiSummaryLoopPrecise', k = as.integer(k), m = as.integer(m), nn = n,
- betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno),
- E0 = diag(m), H = diag(m), coef = betamat,
- pil = numeric(k), fstat = numeric(k), vg = vars, PACKAGE = "MultiABEL")
- } else {
- stop('wrong type of analysis!')
+ betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno),
+ E0 = diag(m), H = diag(m), coef = betamat,
+ pil = numeric(k), fstat = numeric(k), vg = vars, PACKAGE = "MultiABEL")
+ } else if (type == 'direct') {
+ scan <- .Fortran('MultiSummaryLoopDirect', k = as.integer(k), m = as.integer(m), nn = as.numeric(n),
+ tmat = tmat, invR = solve(x$cor.pheno),
+ pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
+ }else {
+ stop('Wrong type of analysis!')
}
- cat('done.\n')
+ cat('Done.\n')
res$Beta.S <- scan$pil
res$P <- pf(scan$fstat, m, n - m - 1, lower.tail = FALSE)
res$SE <- res$Beta.S/sqrt(qchisq(res$P, 1, lower.tail = FALSE))
- coef <- matrix(NA, k, m*2)
- coef[,seq(1, 2*m, 2)] <- scan$coef
- coef[,seq(2, 2*m, 2)] <- sqrt(scan$s)
- colnames(coef) <- colnames(x$gwa)[1:(2*m)]
- return(data.frame(res, coef))
+ if (type != 'direct') {
+ coef <- matrix(NA, k, m*2)
+ coef[,seq(1, 2*m, 2)] <- scan$coef
+ coef[,seq(2, 2*m, 2)] <- sqrt(scan$s)
+ colnames(coef) <- colnames(x$gwa)[1:(2*m)]
+ return(data.frame(res, coef))
+ } else {
+ return(res)
+ }
}
Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R 2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/R/load.summary.R 2016-03-02 15:04:10 UTC (rev 2049)
@@ -17,6 +17,16 @@
#' recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the
#' number of SNPs should be more than 10,000 in order to obtain a good estimate of \code{cor.pheno}. If
#' \code{cor.pheno = NULL}, this argument cannot be \code{NULL}.
+#' @param est.var A logical value. If \code{FALSE}, each phenotypic variance is assumed to be known as 1.
+#' If \code{TRUE}, each phenotypic variance will be estimated to adjust the summary statistics, so that
+#' the corresponding phenoypic variance is 1.
+#' @param type A string gives the type of analysis. Default is \code{"outbred"}, referring to
+#' general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to
+#' inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
+#' \code{"precise"} refers to precise genotypic variance, especially when the individual-level data
+#' are available, for which the argument \code{vars} has to be given.
+#' @param vars A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
+#' Only used when \code{type = "precise"}.
#'
#' @return The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
#' (the cleaned data to be processed in multi-trait GWAS) and \code{cor.pheno} (user input or estimated).
@@ -24,7 +34,7 @@
#' @author Xia Shen
#'
#' @references
-#' Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
+#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
#' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
#' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
#' Simple multi-trait analysis identifies novel loci
@@ -57,7 +67,7 @@
#' @aliases load.summary
#' @keywords multivariate, meta-analysis
#'
-`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL) {
+`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE, type = 'outbred', vars = NULL) {
if (!all(is.character(files))) {
stop('files should be given as strings!')
}
@@ -76,6 +86,7 @@
cat('loading data ...\n')
data <- c()
fn <- files # rev(files)
+ vys <- rep(1, m)
for (i in m:1) {
dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
colnames(dd) <- tolower(colnames(dd))
@@ -87,9 +98,16 @@
data[[i]] <- dd
rownames(data[[i]]) <- dd$snp
}
+ if (est.var) {
+ D <- dd$n*2*dd$freq*(1 - dd$freq)
+ vy <- D*dd$se**2 + D*dd$beta**2/(dd$n - 1)
+ dvy <- density(na.omit(vy))
+ vys[i] <- dvy$x[which.max(dvy$y)] #median(vy, na.rm = TRUE)
+ }
progress((m - i + 1)/m*100)
}
cat('\n')
+ if (est.var) cat('phenotypic variances are:', vys, '\n')
cat('checking markers ...\n')
snps <- data[[1]]$snp
for (i in 2:m) {
Modified: pkg/MultiABEL/src/MultiSummaryLoop.f90
===================================================================
--- pkg/MultiABEL/src/MultiSummaryLoop.f90 2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/src/MultiSummaryLoop.f90 2016-03-02 15:04:10 UTC (rev 2049)
@@ -46,3 +46,4 @@
end subroutine MultiSummaryLoop
+
More information about the Genabel-commits
mailing list