[Genabel-commits] r2049 - in pkg/MultiABEL: . R src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Mar 2 16:04:10 CET 2016


Author: shenxia
Date: 2016-03-02 16:04:10 +0100 (Wed, 02 Mar 2016)
New Revision: 2049

Modified:
   pkg/MultiABEL/ChangeLog
   pkg/MultiABEL/DESCRIPTION
   pkg/MultiABEL/R/MultiSummary.R
   pkg/MultiABEL/R/load.summary.R
   pkg/MultiABEL/src/MultiSummaryLoop.f90
Log:
T'RT implementation commit

Modified: pkg/MultiABEL/ChangeLog
===================================================================
--- pkg/MultiABEL/ChangeLog	2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/ChangeLog	2016-03-02 15:04:10 UTC (rev 2049)
@@ -1,3 +1,8 @@
+2015-02-25 15:52 xia
+
+    * Updated to version 1.1-4
+    * MultiSummary() implemented using T-stats
+
 2015-08-20 09:09 xia
 
     * Updated to version 1.1-3

Modified: pkg/MultiABEL/DESCRIPTION
===================================================================
--- pkg/MultiABEL/DESCRIPTION	2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/DESCRIPTION	2016-03-02 15:04:10 UTC (rev 2049)
@@ -1,8 +1,8 @@
 Package: MultiABEL
 Type: Package
 Title: Multi-Trait Genome-Wide Association Analyses
-Version: 1.1-3
-Date: 2015-08-15
+Version: 1.1-4
+Date: 2016-02-25
 Author: Xia Shen
 Maintainer: Xia Shen <xia.shen at ki.se>
 Description: Multivariate genome-wide association analyses. The analysis can be performed on individual-level data or multiple single-trait genome-wide summary statistics. 
@@ -14,4 +14,4 @@
     DatABEL
 License: GPL (>= 2)
 LazyLoad: yes
-Packaged: 2015-08-15 18:30:18 CET; xia
+Packaged: 2016-02-25 15:52:58 CET; xia

Modified: pkg/MultiABEL/R/MultiSummary.R
===================================================================
--- pkg/MultiABEL/R/MultiSummary.R	2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/R/MultiSummary.R	2016-03-02 15:04:10 UTC (rev 2049)
@@ -3,12 +3,15 @@
 #' This function performs multivariate GWA analysis using meta-GWAS summary statistics
 #' 
 #' @param x A data object of class \code{multi.summary} loaded by the function \code{load.summary}.
+#' @param index A numeric vector that gives the indices of the traits to be analyzed jointly.
 #' @param type A string gives the type of analysis. Default is \code{"outbred"}, referring to 
 #' general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to 
 #' inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
 #' \code{"precise"} refers to precise test statistics, especially when the individual-level data 
-#' are available, for which the argument \code{vars} has to be given.
-#' @param vars A numeric vector gives the variance of the genotypes at each SNP, coded as 0, 1 and 2.
+#' are available, for which the argument \code{vars} has to be given. \code{"direct"} refers to 
+#' test statistics directly constructed from the T-statistics in univariate GWAS, this provides a
+#' scale-invariant test most similar to the direct MANOVA, but may be less powerful in some scenarios.
+#' @param vars A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
 #' Only used when \code{type = "precise"}.
 #' 
 #' @return The function returns a data frame containing the multi-trait GWAS results, where the row names are
@@ -20,7 +23,7 @@
 #' @author Xia Shen
 #' 
 #' @references 
-#' Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali, 
+#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,  
 #' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward, 
 #' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
 #' Simple multi-trait analysis identifies novel loci 
@@ -57,9 +60,9 @@
 #' 
 `MultiSummary` <- function(x, index = NULL,  type = 'outbred', vars = NULL) {
 	if (class(x) != 'multi.summary') {
-		stop('wrong data type!')
+		stop('Wrong data type!')
 	}
-	cat('multi-trait genome scan ... ')
+	cat('Multi-trait genome scan ... ')
 	if (!is.null(index)) {
 		m <- nrow(x$cor.pheno)
 		x$cor.pheno <- x$cor.pheno[index,index]
@@ -71,6 +74,8 @@
 	n <- x$gwa[,2*m + 2]
 	k <- nrow(x$gwa)
 	betamat <- x$gwa[,seq(1, 2*m, 2)]
+	semat <- x$gwa[,seq(2, 2*m, 2)]
+	tmat <- betamat/semat
 	dimnames(betamat) <- list(NULL, NULL)
 	res <- data.frame(Marker = rownames(x$gwa), Freq = f, N = n, Beta.S = NA, SE = NA, P = NA)
 	rownames(res) <- rownames(x$gwa)
@@ -81,24 +86,32 @@
 			pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
 	} else if (type == 'inbred') {
 		scan <- .Fortran('MultiSummaryLoopInbred', k = as.integer(k), m = as.integer(m), nn = n, 
-				f = f, betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno), 
-				E0 = diag(m), H = diag(m), coef = betamat, 
-				pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
+			f = f, betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno), 
+			E0 = diag(m), H = diag(m), coef = betamat, 
+			pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
 	} else if (type == 'precise' & !is.null(vars)) {
 		scan <- .Fortran('MultiSummaryLoopPrecise', k = as.integer(k), m = as.integer(m), nn = n, 
-				betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno), 
-				E0 = diag(m), H = diag(m), coef = betamat, 
-				pil = numeric(k), fstat = numeric(k), vg = vars, PACKAGE = "MultiABEL")
-	} else {
-		stop('wrong type of analysis!')
+			betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno), 
+			E0 = diag(m), H = diag(m), coef = betamat, 
+			pil = numeric(k), fstat = numeric(k), vg = vars, PACKAGE = "MultiABEL")
+	} else if (type == 'direct') {
+		scan <- .Fortran('MultiSummaryLoopDirect', k = as.integer(k), m = as.integer(m), nn = as.numeric(n), 
+				tmat = tmat, invR = solve(x$cor.pheno), 
+				pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
+	}else {
+		stop('Wrong type of analysis!')
 	}
-	cat('done.\n')
+	cat('Done.\n')
 	res$Beta.S <- scan$pil
 	res$P <- pf(scan$fstat, m, n - m - 1, lower.tail = FALSE)
 	res$SE <- res$Beta.S/sqrt(qchisq(res$P, 1, lower.tail = FALSE))
-	coef <- matrix(NA, k, m*2)
-	coef[,seq(1, 2*m, 2)] <- scan$coef
-	coef[,seq(2, 2*m, 2)] <- sqrt(scan$s)
-	colnames(coef) <- colnames(x$gwa)[1:(2*m)]
-	return(data.frame(res, coef))
+	if (type != 'direct') {
+		coef <- matrix(NA, k, m*2)
+		coef[,seq(1, 2*m, 2)] <- scan$coef
+		coef[,seq(2, 2*m, 2)] <- sqrt(scan$s)
+		colnames(coef) <- colnames(x$gwa)[1:(2*m)]
+		return(data.frame(res, coef))
+	} else {
+		return(res)
+	}
 }

Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R	2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/R/load.summary.R	2016-03-02 15:04:10 UTC (rev 2049)
@@ -17,6 +17,16 @@
 #' recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the 
 #' number of SNPs should be more than 10,000 in order to obtain a good estimate of \code{cor.pheno}. If
 #' \code{cor.pheno = NULL}, this argument cannot be \code{NULL}.
+#' @param est.var A logical value. If \code{FALSE}, each phenotypic variance is assumed to be known as 1. 
+#' If \code{TRUE}, each phenotypic variance will be estimated to adjust the summary statistics, so that
+#' the corresponding phenoypic variance is 1.
+#' @param type A string gives the type of analysis. Default is \code{"outbred"}, referring to 
+#' general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to 
+#' inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
+#' \code{"precise"} refers to precise genotypic variance, especially when the individual-level data 
+#' are available, for which the argument \code{vars} has to be given. 
+#' @param vars A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
+#' Only used when \code{type = "precise"}.
 #' 
 #' @return The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
 #' (the cleaned data to be processed in multi-trait GWAS) and \code{cor.pheno} (user input or estimated).
@@ -24,7 +34,7 @@
 #' @author Xia Shen
 #' 
 #' @references 
-#' Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali, 
+#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali, 
 #' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward, 
 #' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
 #' Simple multi-trait analysis identifies novel loci 
@@ -57,7 +67,7 @@
 #' @aliases load.summary
 #' @keywords multivariate, meta-analysis
 #' 
-`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL) {
+`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE, type = 'outbred', vars = NULL) {
 	if (!all(is.character(files))) {
 		stop('files should be given as strings!')
 	}
@@ -76,6 +86,7 @@
 	cat('loading data ...\n')
 	data <- c()
 	fn <- files # rev(files)
+	vys <- rep(1, m)
 	for (i in m:1) {
 		dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
 		colnames(dd) <- tolower(colnames(dd))
@@ -87,9 +98,16 @@
 			data[[i]] <- dd
 			rownames(data[[i]]) <- dd$snp
 		}
+		if (est.var) {
+			D <-  dd$n*2*dd$freq*(1 - dd$freq)
+			vy <- D*dd$se**2 + D*dd$beta**2/(dd$n - 1)
+			dvy <- density(na.omit(vy))
+			vys[i] <- dvy$x[which.max(dvy$y)] #median(vy, na.rm = TRUE)
+		}
 		progress((m - i + 1)/m*100)
 	}
 	cat('\n')
+	if (est.var) cat('phenotypic variances are:', vys, '\n')
 	cat('checking markers ...\n')
 	snps <- data[[1]]$snp
 	for (i in 2:m) {

Modified: pkg/MultiABEL/src/MultiSummaryLoop.f90
===================================================================
--- pkg/MultiABEL/src/MultiSummaryLoop.f90	2016-02-03 13:49:45 UTC (rev 2048)
+++ pkg/MultiABEL/src/MultiSummaryLoop.f90	2016-03-02 15:04:10 UTC (rev 2049)
@@ -46,3 +46,4 @@
 
 end subroutine MultiSummaryLoop
 
+



More information about the Genabel-commits mailing list