[Genabel-commits] r2010 - in pkg/MultiABEL: . R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 31 16:08:45 CEST 2015
Author: shenxia
Date: 2015-07-31 16:08:45 +0200 (Fri, 31 Jul 2015)
New Revision: 2010
Added:
pkg/MultiABEL/R/MultiLoad.R
pkg/MultiABEL/man/MultiLoad.Rd
Modified:
pkg/MultiABEL/ChangeLog
pkg/MultiABEL/DESCRIPTION
pkg/MultiABEL/NAMESPACE
pkg/MultiABEL/R/MultiSummary.R
pkg/MultiABEL/R/Multivariate.R
pkg/MultiABEL/R/load.summary.R
pkg/MultiABEL/R/misc.R
pkg/MultiABEL/man/MultiSummary.Rd
pkg/MultiABEL/man/Multivariate.Rd
pkg/MultiABEL/src/MultiSummaryLoopPrecise.f90
pkg/MultiABEL/src/symbols.rds
Log:
Updated to version 1.1-2 for efficiency.
Multivariate() updated to two procedures: MultiLoad() + Multivariate(), where the former loads individual-level data and reports detailed single-trait summary statistics, and the latter calls MultiSummary(type = 'precise') procedure.
Modified: pkg/MultiABEL/ChangeLog
===================================================================
--- pkg/MultiABEL/ChangeLog 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/ChangeLog 2015-07-31 14:08:45 UTC (rev 2010)
@@ -1,3 +1,19 @@
+2015-07-31 15:54 xia
+
+ * Updated to version 1.1-2
+ * Multivariate() updated to two procedures:
+ MultiLoad() + Multivariate()
+ where the former loads individual-level
+ data and reports detailed single-trait
+ summary statistics, and the latter calls
+ MultiSummary(type = 'precise') procedure.
+
+2015-06-05 02:01 xia
+
+ * Updated to version 1.1-1
+ * duplicated marker names bug fixed
+ for load.summary()
+
2015-06-02 02:01 xia
* Updated to version 1.1-0
Modified: pkg/MultiABEL/DESCRIPTION
===================================================================
--- pkg/MultiABEL/DESCRIPTION 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/DESCRIPTION 2015-07-31 14:08:45 UTC (rev 2010)
@@ -1,12 +1,11 @@
Package: MultiABEL
Type: Package
-Title: Multi-trait Genome-Wide Association Analyses
-Version: 1.1-0
-Date: 2015-06-02
+Title: Multi-Trait Genome-Wide Association Analyses
+Version: 1.1-2
+Date: 2015-07-31
Author: Xia Shen
Maintainer: Xia Shen <xia.shen at ki.se>
-Description: Performing multivariate
- genome-wide association (MVGWA) analyses.
+Description: Multivariate genome-wide association analyses. The analysis can be performed on individual-level data or multiple single-trait genome-wide summary statistics.
Depends:
R (>= 2.10),
svMisc
@@ -15,4 +14,4 @@
DatABEL
License: GPL (>= 2)
LazyLoad: yes
-Packaged: 2015-06-02 02:00:18 CET; xia
+Packaged: 2015-07-31 16:03:18 CET; xia
Modified: pkg/MultiABEL/NAMESPACE
===================================================================
--- pkg/MultiABEL/NAMESPACE 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/NAMESPACE 2015-07-31 14:08:45 UTC (rev 2010)
@@ -1,4 +1,4 @@
useDynLib(MultiABEL)
import("svMisc")
-export("Multivariate", "MultiRep", "MultiMeta", "load.summary", "MultiSummary")
-exportClasses("multi.summary")
\ No newline at end of file
+export("Multivariate", "MultiRep", "MultiMeta", "MultiLoad","load.summary", "MultiSummary")
+exportClasses("multi.summary", "multi.loaded")
\ No newline at end of file
Added: pkg/MultiABEL/R/MultiLoad.R
===================================================================
--- pkg/MultiABEL/R/MultiLoad.R (rev 0)
+++ pkg/MultiABEL/R/MultiLoad.R 2015-07-31 14:08:45 UTC (rev 2010)
@@ -0,0 +1,164 @@
+#' Load individual-level data for multivariate GWA analysis
+#'
+#' The function imports GenABEL (gwaa.data class) or DatABEL (.fv*) data formats
+#' to perform multivariate test for each genetic variant.
+#'
+#' @param gwaa.data An (optional) object of \code{\link{gwaa.data-class}}.
+#' @param phenofile An (optional) plain text file contains phenotypic outcomes and covariates.
+#' @param genofile An (optional) object of \code{\link{databel-class}} containing genotype data.
+#' @param trait.cols A vector (length > 1) giving the column indices of the phenotypes to be analyzed.
+#' @param covariate.cols An (optional) vector giving the column indices of the covariates to be included.
+#' @param cuts An integer telling how many pieces the genotype data matrix will be splitted for analyze.
+#' The value can be set depending on the memory size. The smaller the value is, potentially the faster
+#' the analysis will be.
+#' @param impute An (optional) logical argument telling whether missing genotypes should be imputed.
+#' @param ... not used.
+#'
+#' @note Either \code{gwaa.data} (for GenABEL data format) or the combination of
+#' \code{phenofile} and \code{genofile} (for DatABEL data format) has to be provided.
+#' If all are provided, only \code{phenofile} and \code{genofile} will be used. When using
+#' DatABEL format input, individual IDs in \code{phenofile} and \code{genofile} have to match!
+#'
+#' @return The function returns a list of cleaned statistics for subsequent, with class \code{multi.loaded}.
+#'
+#' @author Xia Shen
+#'
+#' @references
+#' Xia Shen, ..., Jim Wilson, Gordan Lauc, Yurii Aulchenko (2015).
+#' Multi-omic-variate analysis identified novel loci associated with
+#' compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
+#'
+#' @seealso
+#' \code{\link{Multivariate}}
+#'
+#' @examples
+#' \dontrun{
+#' ## loading example gwaa.data in GenABEL
+#' require(GenABEL)
+#' data(ge03d2ex.clean)
+#'
+#' ## running multivariate GWAS for 3 traits: height, weight, bmi
+#' loaded <- MultiLoad(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+#' covariate.cols = c(2, 3))
+#'
+#' ## converting the same dataset into DatABEL format files
+#' require(DatABEL)
+#' write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
+#' quote = FALSE, sep = '\t')
+#' geno <- as.double(ge03d2ex.clean)
+#' matrix2databel(geno, 'geno')
+#'
+#' ## running the multivariate GWAS again
+#' loaded <- MultiLoad(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
+#' covariate.cols = c(2, 3))
+#' }
+#' @aliases MultiLoad, multiload
+#' @keywords multivariate, multiload, load
+#'
+`MultiLoad` <- function(gwaa.data = NULL, phenofile = NULL, genofile = NULL,
+ trait.cols, covariate.cols = NULL, cuts = 20, impute = TRUE, ...) {
+ set.seed(911)
+ if (!require(GenABEL) | !require(DatABEL)) {
+ stop('GenABEL and DatABEL packages required!')
+ }
+ cat('loading data ...')
+ if (length(trait.cols) == 1) {
+ stop('select multiple traits to analyze!')
+ }
+ if (!is.null(phenofile) & !is.null(genofile)) {
+ pheno <- read.table(phenofile, header = TRUE)
+ geno <- databel(genofile)
+ if (nrow(pheno) != nrow(geno)) {
+ stop('sizes of phenofile and genofile do not match!')
+ }
+ GenABEL <- FALSE
+ cat(' OK\n')
+ } else if (!is.null(gwaa.data)) {
+ if (!is.null(phenofile)) {
+ pheno <- read.table(phenofile, header = TRUE)
+ } else {
+ pheno <- phdata(gwaa.data)
+ }
+ GenABEL <- TRUE
+ cat(' OK\n')
+ } else {
+ stop('insufficient data input!')
+ }
+ cat('preparing data ...')
+ Y <- as.matrix(pheno[,trait.cols])
+ okidx <- which(!is.na(rowSums(Y)))
+ Y <- Y[okidx,]
+ n <- length(okidx)
+ for (i in 1:ncol(Y)) Y[,i] <- zscore(Y[,i])
+ X <- matrix(1, length(okidx), 1)
+ if (!is.null(covariate.cols)) {
+ X <- cbind(X, as.matrix(pheno[okidx,covariate.cols]))
+ }
+ if (!GenABEL) {
+ nsnp <- ncol(geno)
+ snp <- colnames(geno)
+ } else {
+ nsnp <- ncol(gwaa.data at gtdata)
+ snp <- gwaa.data at gtdata@snpnames
+ }
+ bs <- matrix(NA, nsnp, ncol(Y)*2)
+ cvg <- numeric(nsnp)
+ if (cuts != 1) {
+ cutpoints <- round(seq(1, nsnp, length = cuts + 1))
+ starts <- cutpoints[1:cuts]
+ ends <- c(cutpoints[2:cuts] - 1, cutpoints[cuts + 1])
+ } else {
+ starts <- 1
+ ends <- nsnp
+ }
+ if (cuts != 1) {
+ cat('\n')
+ }
+ for (k in 1:cuts) {
+ idx <- starts[k]:ends[k]
+ if (!GenABEL) {
+ g <- databel2matrix(geno[,idx])[okidx,]
+ } else {
+ g <- as.double(gwaa.data at gtdata[,idx])[okidx,]
+ }
+ if (any(is.na(g))) {
+ if (impute) {
+ for (j in which(is.na(colSums(g)))) {
+ naidx <- which(is.na(g[,j]))
+ g[naidx,j] <- sample(g[-naidx,j], length(naidx), replace = TRUE)
+ }
+ }
+ }
+ cvg[idx] <- colVars(g)
+ U3 = crossprod(X, g)
+ U4 = solve(crossprod(X), U3)
+ Str = g - X %*% U4
+ Str2 = colSums(Str ^ 2)
+ for (i in 1:ncol(Y)) {
+ y <- Y[,i]
+ U1 = crossprod(X, y)
+ U2 = solve(crossprod(X), U1)
+ ytr = y - X %*% U2
+ b = as.vector(crossprod(ytr, Str) / Str2)
+ ## calculate residual error
+ sig = (sum(ytr ^ 2) - b ^ 2 * Str2) / (n - k - 2)
+ ## calculate standard error for beta
+ err = sqrt(sig * (1 / Str2))
+ bs[idx,i*2 - 1] <- b
+ bs[idx,i*2] <- err
+ }
+ if (cuts != 1) {
+ progress(k/cuts*100)
+ }
+ }
+ if (cuts == 1) {
+ cat(' OK\n')
+ } else {
+ cat('\n')
+ }
+ R <- cor(Y)
+ rownames(bs) <- snp
+ res <- list(gwa = bs, cor.pheno = R, cvg = cvg, snp = snp, nid = nrow(Y), nsnp = nsnp)
+ class(res) <- 'multi.loaded'
+ return(res)
+}
Property changes on: pkg/MultiABEL/R/MultiLoad.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/MultiABEL/R/MultiSummary.R
===================================================================
--- pkg/MultiABEL/R/MultiSummary.R 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/R/MultiSummary.R 2015-07-31 14:08:45 UTC (rev 2010)
@@ -1,6 +1,6 @@
#' Multivariate genome-wide association scan using summary statistics
#'
-#' The function performs multivariate GWA analysis using meta-GWAS summary statistics
+#' This function performs multivariate GWA analysis using meta-GWAS summary statistics
#'
#' @param x A data object of class \code{multi.summary} loaded by the function \code{load.summary}.
#' @param type A string gives the type of analysis. Default is \code{"outbred"}, referring to
@@ -10,12 +10,12 @@
#' are available, for which the argument \code{vars} has to be given.
#' @param vars A numeric vector gives the variance of the genotypes at each SNP, coded as 0, 1 and 2.
#' Only used when \code{type = "precise"}.
-#'
+#'
#' @return The function returns a data frame containing the multi-trait GWAS results, where the row names are
#' the variants names. The column names are: variant name (\code{Marker}), allele frequency (\code{Freq}),
#' the smallest sample size of the traits (\code{N}), effect on the phenotype score (\code{Beta.S}, see reference),
#' standard error (\code{SE}), p-value (\code{P}), and the rest the coefficients to construct the phenotype score
-#' (see reference)
+#' (see reference).
#'
#' @author Xia Shen
#'
Modified: pkg/MultiABEL/R/Multivariate.R
===================================================================
--- pkg/MultiABEL/R/Multivariate.R 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/R/Multivariate.R 2015-07-31 14:08:45 UTC (rev 2010)
@@ -4,37 +4,29 @@
#' and performs multivariate test for each genetic variant using multivariate
#' analysis of variance (MANOVA).
#'
-#' @param gwaa.data An (optional) object of \code{\link{gwaa.data-class}}.
-#' @param phenofile An (optional) plain text file contains phenotypic outcomes and covariates.
-#' @param genofile An (optional) object of \code{\link{databel-class}} containing genotype data.
-#' @param outfile A string giving the path and file name of the output file. By default, a file
-#' named \code{'Multivariate_GWAS_results.txt'} will be written into the current working directory.
-#' @param trait.cols A vector (length > 1) giving the column indices of the phenotypes to be analyzed.
-#' @param covariate.cols An (optional) vector giving the column indices of the covariates to be included.
-#' @param cuts An integer telling how many pieces the genotype data matrix will be splitted for analyze.
-#' The value can be set depending on the memory size. The smaller the value is, potentially the faster
-#' the analysis will be.
-#' @param ... other parameters passed to \code{\link{summary.manova}}, e.g. different kinds of test statistics.
+#' @param x An object created by \code{\link{MultiLoad}}.
+#' @param ... not used.
#'
#' @note Either \code{gwaa.data} (for GenABEL data format) or the combination of
#' \code{phenofile} and \code{genofile} (for DatABEL data format) has to be provided.
#' If all are provided, only \code{phenofile} and \code{genofile} will be used. When using
#' DatABEL format input, individual IDs in \code{phenofile} and \code{genofile} have to match!
#'
-#' @return The function returns a matrix of class "MultiRes" containing the multivariate GWA results,
-#' where the row names are the variants names, and the column names correspond to the manova test
-#' statistic value, approximated F statistic value, F statistic numerator degrees of freedom,
-#' F statistic denominator degrees of freedom, and the P-value. The results are also written into \code{outfile}.
+#' @return The function returns a data frame containing the multi-trait GWAS results, where the row names are
+#' the variants names. The column names are: variant name (\code{Marker}), allele frequency (\code{Freq}),
+#' the smallest sample size of the traits (\code{N}), effect on the phenotype score (\code{Beta.S}, see reference),
+#' standard error (\code{SE}), p-value (\code{P}), and the rest the coefficients to construct the phenotype score
+#' (see reference).
#'
#' @author Xia Shen
#'
#' @references
-#' Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
-#' Multi-omic-variate analysis identified the association between 14q32.33 and
-#' compound N-Glycosylation of human Immunoglobulin G \emph{Submitted}.
+#' Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2015).
+#' Multi-omic-variate analysis identified novel loci associated with
+#' compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
#'
#' @seealso
-#' \code{\link{aov}}, \code{\link{summary.manova}}
+#' \code{\link{MultiLoad}}
#'
#' @examples
#' \dontrun{
@@ -42,111 +34,40 @@
#' data(ge03d2ex.clean)
#'
#' ## running multivariate GWAS for 3 traits: height, weight, bmi
-#' res <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+#' loaded <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
#' covariate.cols = c(2, 3))
#'
-#' ## converting the same dataset into DatABEL format files
-#' write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
-#' quote = FALSE, sep = '\t')
-#' geno <- as.double(ge03d2ex.clean)
-#' matrix2databel(geno, 'geno')
-#'
#' ## running the multivariate GWAS again
-#' res <- Multivariate(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
-#' covariate.cols = c(2, 3))
+#' res <- Multivariate(loaded)
#' }
#' @aliases Multivariate, multivariate
#' @keywords multivariate
#'
-`Multivariate` <- function(gwaa.data = NULL, phenofile = NULL, genofile = NULL, outfile = 'Multivariate_GWAS_results.txt',
- trait.cols, covariate.cols = NULL, cuts = 20, ...) {
- if (!require(GenABEL) | !require(DatABEL)) {
- stop('GenABEL and DatABEL packages required!')
+`Multivariate` <- function(x, ...) {
+ if (class(x) != 'multi.loaded') {
+ stop('wrong data type!')
}
- cat('loading data ...')
- if (length(trait.cols) == 1) {
- stop('select multiple traits to analyze!')
- }
- if (!is.null(phenofile) & !is.null(genofile)) {
- pheno <- read.table(phenofile, header = TRUE)
- geno <- databel(genofile)
- if (nrow(pheno) != nrow(geno)) {
- stop('sizes of phenofile and genofile do not match!')
- }
- GenABEL <- FALSE
- cat(' OK\n')
- } else if (!is.null(gwaa.data)) {
- if (!is.null(phenofile)) {
- pheno <- read.table(phenofile, header = TRUE)
- } else {
- pheno <- phdata(gwaa.data)
- }
- GenABEL <- TRUE
- cat(' OK\n')
- } else {
- stop('insufficient data input!')
- }
+ npheno <- ncol(x$cor.pheno)
cat('initializing analysis ...')
- if (!GenABEL) {
- res <- matrix(NA, ncol(geno), 5)
- } else {
- res <- matrix(NA, ncol(gwaa.data at gtdata), 5)
- }
- colnames(res) <- c('MANOVA.stat', 'F.stat', 'df.num', 'df.den', 'P.F')
- if (!GenABEL) {
- rownames(res) <- colnames(geno)
- } else {
- rownames(res) <- gwaa.data at gtdata@snpnames
- }
- Y <- as.matrix(pheno[,trait.cols])
- X <- matrix(1, nrow(Y), 1)
- if (!is.null(covariate.cols)) {
- X <- cbind(X, as.matrix(pheno[,covariate.cols]))
- }
- if (!GenABEL) {
- nsnp <- ncol(geno)
- } else {
- nsnp <- ncol(gwaa.data at gtdata)
- }
- if (cuts != 1) {
- cutpoints <- round(seq(1, nsnp, length = cuts + 1))
- starts <- cutpoints[1:cuts]
- ends <- c(cutpoints[2:cuts] - 1, cutpoints[cuts + 1])
- } else {
- starts <- 1
- ends <- nsnp
- }
+ res <- matrix(NA, x$nsnp, 5)
+ coef <- matrix(NA, x$nsnp, npheno)
+ res <- data.frame(Marker = rownames(x$gwa), Beta.S = NA, SE = NA, P = NA)
+ rownames(res) <- rownames(coef) <- x$snp
cat(' OK\n')
- if (cuts != 1) {
- cat('multivariate GWA scan ...\n')
- } else {
- cat('multivariate GWA scan ...')
- }
- for (k in 1:cuts) {
- idx <- starts[k]:ends[k]
- if (!GenABEL) {
- g <- databel2matrix(geno[,idx])
- } else {
- g <- as.double(gwaa.data at gtdata[,idx])
- }
- for (j in 1:ncol(g)) {
- stats <- try(summary(manova(Y ~ 0 + X + g[,j]), ...)$stats, silent = TRUE)
- if (!inherits(stats, 'try-error')) {
- res[idx[j],] <- stats[nrow(stats) - 1,-1]
- }
- }
- if (cuts != 1) {
- progress(k/cuts*100)
- }
- }
- if (cuts == 1) {
- cat(' OK\n')
- } else {
- cat('\n')
- }
- cat('writing results ...')
- write.table(res, outfile, row.names = TRUE, col.names = TRUE, quote = FALSE, sep = '\t')
- cat(' OK\n')
- class(res) <- 'MultiRes'
- return(res)
+ cat('multivariate GWA scan ...')
+ k <- x$nsnp
+ m <- nrow(x$cor.pheno)
+ n <- rep(as.integer(x$nid), k)
+ betamat <- x$gwa[,seq(1, 2*m, 2)]
+ scan <- .Fortran('MultiSummaryLoopPrecise', k = as.integer(k), m = as.integer(m), nn = n,
+ betamat = betamat, R = x$cor.pheno, invR = solve(x$cor.pheno),
+ E0 = diag(m), H = diag(m), coef = betamat,
+ pil = numeric(k), fstat = numeric(k), vg = x$cvg, PACKAGE = "MultiABEL")
+ cat(' OK\n')
+ res$Beta.S <- scan$pil
+ res$P <- pf(scan$fstat, m, n - m - 1, lower.tail = FALSE)
+ res$SE <- res$Beta.S/sqrt(qchisq(res$P, 1, lower.tail = FALSE))
+ coef <- scan$coef
+ colnames(coef) <- paste('coef.', colnames(x$cor.pheno), sep = '')
+ return(data.frame(res, coef))
}
Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/R/load.summary.R 2015-07-31 14:08:45 UTC (rev 2010)
@@ -76,8 +76,13 @@
for (i in m:1) {
dd <- read.table(fn[i], header = TRUE)
idx <- which(duplicated(dd$snp))
- data[[i]] <- dd[-idx,]
- rownames(data[[i]]) <- dd$snp[-idx]
+ if (length(idx) > 0) {
+ data[[i]] <- dd[-idx,]
+ rownames(data[[i]]) <- dd$snp[-idx]
+ } else {
+ data[[i]] <- dd
+ rownames(data[[i]]) <- dd$snp
+ }
progress((m - i + 1)/m*100)
}
cat('\n')
Modified: pkg/MultiABEL/R/misc.R
===================================================================
--- pkg/MultiABEL/R/misc.R 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/R/misc.R 2015-07-31 14:08:45 UTC (rev 2010)
@@ -16,3 +16,6 @@
packageStartupMessage('Use citation("MultiABEL") to know how to cite this work.\n')
}
+colVars <- function(x) colSums((t(t(x) - colMeans(x)))^2)/(nrow(x) - 1)
+
+zscore <- function (x) qnorm((rank(x, na.last = "keep") - 0.5)/sum(!is.na(x)))
\ No newline at end of file
Added: pkg/MultiABEL/man/MultiLoad.Rd
===================================================================
--- pkg/MultiABEL/man/MultiLoad.Rd (rev 0)
+++ pkg/MultiABEL/man/MultiLoad.Rd 2015-07-31 14:08:45 UTC (rev 2010)
@@ -0,0 +1,80 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/MultiLoad.R
+\name{MultiLoad}
+\alias{MultiLoad}
+\alias{MultiLoad,}
+\alias{multiload}
+\title{Load individual-level data for multivariate GWA analysis}
+\usage{
+MultiLoad(gwaa.data = NULL, phenofile = NULL, genofile = NULL, trait.cols,
+ covariate.cols = NULL, cuts = 20, impute = TRUE, ...)
+}
+\arguments{
+\item{gwaa.data}{An (optional) object of \code{\link{gwaa.data-class}}.}
+
+\item{phenofile}{An (optional) plain text file contains phenotypic outcomes and covariates.}
+
+\item{genofile}{An (optional) object of \code{\link{databel-class}} containing genotype data.}
+
+\item{trait.cols}{A vector (length > 1) giving the column indices of the phenotypes to be analyzed.}
+
+\item{covariate.cols}{An (optional) vector giving the column indices of the covariates to be included.}
+
+\item{cuts}{An integer telling how many pieces the genotype data matrix will be splitted for analyze.
+The value can be set depending on the memory size. The smaller the value is, potentially the faster
+the analysis will be.}
+
+\item{impute}{An (optional) logical argument telling whether missing genotypes should be imputed.}
+
+\item{...}{not used.}
+}
+\value{
+The function returns a list of cleaned statistics for subsequent, with class \code{multi.loaded}.
+}
+\description{
+The function imports GenABEL (gwaa.data class) or DatABEL (.fv*) data formats
+to perform multivariate test for each genetic variant.
+}
+\note{
+Either \code{gwaa.data} (for GenABEL data format) or the combination of
+\code{phenofile} and \code{genofile} (for DatABEL data format) has to be provided.
+If all are provided, only \code{phenofile} and \code{genofile} will be used. When using
+DatABEL format input, individual IDs in \code{phenofile} and \code{genofile} have to match!
+}
+\examples{
+\dontrun{
+## loading example gwaa.data in GenABEL
+require(GenABEL)
+data(ge03d2ex.clean)
+
+## running multivariate GWAS for 3 traits: height, weight, bmi
+loaded <- MultiLoad(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+ covariate.cols = c(2, 3))
+
+## converting the same dataset into DatABEL format files
+require(DatABEL)
+write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
+ quote = FALSE, sep = '\\t')
+geno <- as.double(ge03d2ex.clean)
+matrix2databel(geno, 'geno')
+
+## running the multivariate GWAS again
+loaded <- MultiLoad(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
+ covariate.cols = c(2, 3))
+}
+}
+\author{
+Xia Shen
+}
+\references{
+Xia Shen, ..., Jim Wilson, Gordan Lauc, Yurii Aulchenko (2015).
+Multi-omic-variate analysis identified novel loci associated with
+compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
+}
+\seealso{
+\code{\link{Multivariate}}
+}
+\keyword{load}
+\keyword{multiload,}
+\keyword{multivariate,}
+
Property changes on: pkg/MultiABEL/man/MultiLoad.Rd
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/MultiABEL/man/MultiSummary.Rd
===================================================================
--- pkg/MultiABEL/man/MultiSummary.Rd 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/man/MultiSummary.Rd 2015-07-31 14:08:45 UTC (rev 2010)
@@ -25,10 +25,10 @@
the variants names. The column names are: variant name (\code{Marker}), allele frequency (\code{Freq}),
the smallest sample size of the traits (\code{N}), effect on the phenotype score (\code{Beta.S}, see reference),
standard error (\code{SE}), p-value (\code{P}), and the rest the coefficients to construct the phenotype score
-(see reference)
+(see reference).
}
\description{
-The function performs multivariate GWA analysis using meta-GWAS summary statistics
+This function performs multivariate GWA analysis using meta-GWAS summary statistics
}
\examples{
\dontrun{
Modified: pkg/MultiABEL/man/Multivariate.Rd
===================================================================
--- pkg/MultiABEL/man/Multivariate.Rd 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/man/Multivariate.Rd 2015-07-31 14:08:45 UTC (rev 2010)
@@ -6,35 +6,19 @@
\alias{multivariate}
\title{Multivariate genome-wide association scan}
\usage{
-Multivariate(gwaa.data = NULL, phenofile = NULL, genofile = NULL,
- outfile = "Multivariate_GWAS_results.txt", trait.cols,
- covariate.cols = NULL, cuts = 20, ...)
+Multivariate(x, ...)
}
\arguments{
-\item{gwaa.data}{An (optional) object of \code{\link{gwaa.data-class}}.}
+\item{x}{An object created by \code{\link{MultiLoad}}.}
-\item{phenofile}{An (optional) plain text file contains phenotypic outcomes and covariates.}
-
-\item{genofile}{An (optional) object of \code{\link{databel-class}} containing genotype data.}
-
-\item{outfile}{A string giving the path and file name of the output file. By default, a file
-named \code{'Multivariate_GWAS_results.txt'} will be written into the current working directory.}
-
-\item{trait.cols}{A vector (length > 1) giving the column indices of the phenotypes to be analyzed.}
-
-\item{covariate.cols}{An (optional) vector giving the column indices of the covariates to be included.}
-
-\item{cuts}{An integer telling how many pieces the genotype data matrix will be splitted for analyze.
-The value can be set depending on the memory size. The smaller the value is, potentially the faster
-the analysis will be.}
-
-\item{...}{other parameters passed to \code{\link{summary.manova}}, e.g. different kinds of test statistics.}
+\item{...}{not used.}
}
\value{
-The function returns a matrix of class "MultiRes" containing the multivariate GWA results,
-where the row names are the variants names, and the column names correspond to the manova test
-statistic value, approximated F statistic value, F statistic numerator degrees of freedom,
-F statistic denominator degrees of freedom, and the P-value. The results are also written into \code{outfile}.
+The function returns a data frame containing the multi-trait GWAS results, where the row names are
+the variants names. The column names are: variant name (\code{Marker}), allele frequency (\code{Freq}),
+the smallest sample size of the traits (\code{N}), effect on the phenotype score (\code{Beta.S}, see reference),
+standard error (\code{SE}), p-value (\code{P}), and the rest the coefficients to construct the phenotype score
+(see reference).
}
\description{
The function imports GenABEL (gwaa.data class) or DatABEL (.fv*) data formats
@@ -53,30 +37,23 @@
data(ge03d2ex.clean)
## running multivariate GWAS for 3 traits: height, weight, bmi
-res <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+loaded <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
-## converting the same dataset into DatABEL format files
-write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
- quote = FALSE, sep = '\\t')
-geno <- as.double(ge03d2ex.clean)
-matrix2databel(geno, 'geno')
-
## running the multivariate GWAS again
-res <- Multivariate(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
- covariate.cols = c(2, 3))
+res <- Multivariate(loaded)
}
}
\author{
Xia Shen
}
\references{
-Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
-Multi-omic-variate analysis identified the association between 14q32.33 and
-compound N-Glycosylation of human Immunoglobulin G \emph{Submitted}.
+Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2015).
+Multi-omic-variate analysis identified novel loci associated with
+compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
}
\seealso{
-\code{\link{aov}}, \code{\link{summary.manova}}
+\code{\link{MultiLoad}}
}
\keyword{multivariate}
Modified: pkg/MultiABEL/src/MultiSummaryLoopPrecise.f90
===================================================================
--- pkg/MultiABEL/src/MultiSummaryLoopPrecise.f90 2015-07-27 15:04:38 UTC (rev 2009)
+++ pkg/MultiABEL/src/MultiSummaryLoopPrecise.f90 2015-07-31 14:08:45 UTC (rev 2010)
@@ -8,23 +8,26 @@
implicit none
integer :: i, j, k, m
+ integer, dimension(k) :: nn
double precision, dimension(m, m) :: R, invR, E0, H, HinvE0
double precision, dimension(1, m) :: beta
double precision, dimension(m, 1) :: tmp
- double precision, dimension(k) :: nn, pil, fstat
+ double precision, dimension(k) :: pil, fstat, vg
double precision, dimension(k, m) :: betamat, coef
double precision, dimension(m) :: lambda
- double precision :: vg
do j = 1, k
+ !if (j < 2) then
+ ! write(*,*) nn(j)
+ !end if
E0 = nn(j)*R
do i = 1, m
beta(1,i) = betamat(j,i)
end do
- tmp = matmul(invR, transpose(beta))*vg
+ tmp = matmul(invR, transpose(beta))*vg(j)
do i = 1, m
coef(j,i) = tmp(i,1)
end do
- H = matmul(transpose(beta), beta)*nn(j)*vg
+ H = matmul(transpose(beta), beta)*nn(j)*vg(j)
HinvE0 = matmul(H, invR)/nn(j)
do i = 1, m
lambda(i) = HinvE0(i,i)
Modified: pkg/MultiABEL/src/symbols.rds
===================================================================
(Binary files differ)
More information about the Genabel-commits
mailing list