[Genabel-commits] r2052 - in pkg/MultiABEL: . R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Apr 16 22:39:43 CEST 2016
Author: yurii
Date: 2016-04-16 22:39:42 +0200 (Sat, 16 Apr 2016)
New Revision: 2052
Modified:
pkg/MultiABEL/DESCRIPTION
pkg/MultiABEL/R/load.summary.R
pkg/MultiABEL/man/MultiABEL.Rd
pkg/MultiABEL/man/MultiLoad.Rd
pkg/MultiABEL/man/MultiMeta.Rd
pkg/MultiABEL/man/MultiRep.Rd
pkg/MultiABEL/man/MultiSummary.Rd
pkg/MultiABEL/man/Multivariate.Rd
pkg/MultiABEL/man/load.summary.Rd
pkg/MultiABEL/src/symbols.rds
Log:
added options columnNames and fixedN to load.summary
Modified: pkg/MultiABEL/DESCRIPTION
===================================================================
--- pkg/MultiABEL/DESCRIPTION 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/DESCRIPTION 2016-04-16 20:39:42 UTC (rev 2052)
@@ -5,7 +5,9 @@
Date: 2016-02-25
Author: Xia Shen
Maintainer: Xia Shen <xia.shen at ki.se>
-Description: Multivariate genome-wide association analyses. The analysis can be performed on individual-level data or multiple single-trait genome-wide summary statistics.
+Description: Multivariate genome-wide association analyses. The analysis can be
+ performed on individual-level data or multiple single-trait genome-wide summary
+ statistics.
Depends:
R (>= 2.10),
svMisc
@@ -15,3 +17,4 @@
License: GPL (>= 2)
LazyLoad: yes
Packaged: 2016-02-25 15:52:58 CET; xia
+RoxygenNote: 5.0.1
Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/R/load.summary.R 2016-04-16 20:39:42 UTC (rev 2052)
@@ -27,11 +27,15 @@
#' are available, for which the argument \code{vars} has to be given.
#' @param vars A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
#' Only used when \code{type = "precise"}.
+#' @param columnNames A vector with names of columns containing necessary information in the input file;
+#' default values are c('snp','a1','freq','beta','se','n'). The values are case-insensitive.
+#' @param fixedN sample size to assume across all analyses, when provided, this number will be used
+#' (instead of the ones specified in the input files)
#'
#' @return The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
#' (the cleaned data to be processed in multi-trait GWAS) and \code{cor.pheno} (user input or estimated).
#'
-#' @author Xia Shen
+#' @author Xia Shen, Yurii Aulchenko
#'
#' @references
#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
@@ -67,7 +71,8 @@
#' @aliases load.summary
#' @keywords multivariate, meta-analysis
#'
-`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE, type = 'outbred', vars = NULL) {
+`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE, type = 'outbred', vars = NULL,
+ columnNames = c ('snp','a1','freq','beta','se','n'), fixedN = NULL ) {
if (!all(is.character(files))) {
stop('files should be given as strings!')
}
@@ -83,6 +88,30 @@
stop('wrong dimensions of cor.pheno!')
}
}
+ columnNames <- tolower( columnNames)
+ if (!is.null( fixedN )) if (fixedN <= 0) {
+ stop('fixedN should be a positive number')
+ }
+ if (is.null(fixedN)) { colNamLen = 6 } else { colNamLen = 5 }
+ if (!is.character(columnNames)) {
+ stop('columnNames should be character')
+ }
+ if (length( columnNames ) != colNamLen) {
+ cat('columnNames should be a vector with',colNamLen,'elements')
+ stop('... exiting')
+ }
+ if ( length(unique(columnNames)) != colNamLen ) {
+ stop('elements of columnNames must be unique')
+ }
+ # column Names Translation
+ cNT = list(
+ 'snp' = columnNames[1],
+ 'a1' = columnNames[2],
+ 'freq'= columnNames[3],
+ 'beta'= columnNames[4],
+ 'se' = columnNames[5],
+ 'n' = columnNames[6]
+ )
cat('loading data ...\n')
data <- c()
fn <- files # rev(files)
@@ -90,17 +119,27 @@
for (i in m:1) {
dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
colnames(dd) <- tolower(colnames(dd))
- idx <- which(duplicated(dd$snp))
+ currentColNames <- colnames(dd)
+ if ( any( !( columnNames %in% currentColNames ) ) ) {
+ cat('file column names do not match columnNames in ',fn[i],'... ')
+ stop('exiting')
+ }
+ idx <- which(duplicated(dd[, cNT[['snp']] ]))
if (length(idx) > 0) {
data[[i]] <- dd[-idx,]
- rownames(data[[i]]) <- dd$snp[-idx]
+ rownames(data[[i]]) <- dd[ -idx , cNT[['snp']] ]
} else {
data[[i]] <- dd
- rownames(data[[i]]) <- dd$snp
+ rownames(data[[i]]) <- dd[, cNT[['snp']] ]
}
if (est.var) {
- D <- dd$n*2*dd$freq*(1 - dd$freq)
- vy <- D*dd$se**2 + D*dd$beta**2/(dd$n - 1)
+ if (!is.null(fixedN)) {
+ D <- dd[, cNT[['n']] ]*2*dd[, cNT[['freq']] ]*(1 - dd[, cNT[['freq']] ])
+ vy <- D*dd[ , cNT[['se']] ]**2 + D*dd[, cNT[['beta']] ]**2/(dd[, cNT[['n']] ] - 1)
+ } else {
+ D <- fixedN*2*dd[, cNT[['freq']] ]*(1 - dd[, cNT[['freq']] ])
+ vy <- D*dd[ , cNT[['se']] ]**2 + D*dd[, cNT[['beta']] ]**2/( fixedN - 1)
+ }
dvy <- density(na.omit(vy))
vys[i] <- dvy$x[which.max(dvy$y)] #median(vy, na.rm = TRUE)
}
@@ -109,9 +148,9 @@
cat('\n')
if (est.var) cat('phenotypic variances are:', vys, '\n')
cat('checking markers ...\n')
- snps <- data[[1]]$snp
+ snps <- data[[1]][, cNT[['snp']] ]
for (i in 2:m) {
- snps <- data[[i]]$snp[data[[i]]$snp %in% snps]
+ snps <- data[[i]][ data[[i]][, cNT[['snp']] ] %in% snps, cNT[['snp']] ]
progress(i/m*100)
}
snps <- unique(snps)
@@ -124,31 +163,37 @@
cat('\n')
cat('correcting parameters ...\n')
for (i in 2:m) {
- if (any(data[[i]]$a1 != data[[1]]$a1)) {
- adj <- 2*as.numeric(data[[i]]$a1 == data[[1]]$a1) - 1
- data[[i]]$beta <- data[[i]]$beta*adj
- data[[i]]$freq <- (adj == 1)*data[[i]]$freq + (adj == -1)*(1 - data[[i]]$freq)
+ if (any( data[[i]][, cNT[['a1']] ] != data[[1]][, cNT[['a1']] ] )) {
+ adj <- 2*as.numeric( data[[i]][, cNT[['a1']] ] == data[[1]][, cNT[['a1']] ] ) - 1
+ data[[i]][, cNT$beta ] <- data[[i]][, cNT$beta ]*adj
+ data[[i]][, cNT$freq ] <- (adj == 1)*data[[i]][, cNT$freq] + (adj == -1)*(1 - data[[i]][,cNT$freq])
}
progress(i/m*100)
}
cat('\n')
cat('adjusting sample size ... ')
n0 <- matrix(NA, nrow(data[[1]]), m)
- for (i in 1:m) {
- n0[,i] <- data[[i]]$n
- }
+ if (is.null(fixedN)) {
+ for (i in 1:m) {
+ n0[,i] <- data[[i]][,cNT$n]
+ }
+ } else {
+ for (i in 1:m) {
+ n0[,i] <- fixedN
+ }
+ }
n <- apply(n0, 1, "min")
cat('done.\n')
cat('finalizing summary statistics ...\n')
gwa0 <- matrix(NA, nrow(data[[1]]), 2*m + 2)
for (i in 1:m) {
- gwa0[,i*2 - 1] <- data[[i]][,'beta']
- gwa0[,i*2] <- data[[i]][,'se']
+ gwa0[,i*2 - 1] <- data[[i]][,cNT$beta]
+ gwa0[,i*2] <- data[[i]][,cNT$se]
progress(i/m*100)
}
- gwa0[,2*length(data) + 1] <- data[[1]][,'freq']
+ gwa0[,2*length(data) + 1] <- data[[1]][,cNT$freq]
gwa0[,2*length(data) + 2] <- n
- rownames(gwa0) <- data[[1]]$snp
+ rownames(gwa0) <- data[[1]][,cNT$snp]
gwa0 <- na.omit(gwa0)
cat('\n')
if (is.null(cor.pheno)) {
Modified: pkg/MultiABEL/man/MultiABEL.Rd
===================================================================
--- pkg/MultiABEL/man/MultiABEL.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/MultiABEL.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MultiABEL.R
\docType{package}
\name{MultiABEL}
@@ -10,16 +10,16 @@
MultiABEL: Multivariate Genome-Wide Association Analyses
}
\details{
-Performing multivariate genome-wide association (MVGWA) analyses.
+Performing multivariate genome-wide association (MVGWA) analyses.
The modules are compatible with existing *ABEL data formats. The GWA
analyses can be done on individual level data or on
single-trait GWA summary statistics only.
For converting data from other formats, see
-\code{\link{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is
-our preferred converting function, very extensively tested. Other conversion
-functions include:
+\code{\link{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is
+our preferred converting function, very extensively tested. Other conversion
+functions include:
\code{\link{convert.snp.text}} (conversion from human-readable GenABEL format),
\code{\link{convert.snp.ped}} (Linkage, Merlin, Mach, and similar files),
\code{\link{convert.snp.mach}} (Mach-format),
@@ -27,16 +27,16 @@
\code{\link{convert.snp.affymetrix}} (BRML-style files).
For converting of GenABEL's data to other formats, see
-\code{\link{export.merlin}} (MERLIN and MACH formats),
+\code{\link{export.merlin}} (MERLIN and MACH formats),
\code{\link{export.impute}} (IMPUTE, SNPTEST and CHIAMO formats),
\code{\link{export.plink}} (PLINK format, also exports phenotypic data).
To load the data, see \code{\link{load.gwaa.data}}.
-For conversion to DatABEL format (used by ProbABEL and some other
-GenABEL suite packages), see
-\code{\link{impute2databel}},
-\code{\link{impute2mach}},
+For conversion to DatABEL format (used by ProbABEL and some other
+GenABEL suite packages), see
+\code{\link{impute2databel}},
+\code{\link{impute2mach}},
\code{\link{mach2databel}}.
For data managment and manipulations see
Modified: pkg/MultiABEL/man/MultiLoad.Rd
===================================================================
--- pkg/MultiABEL/man/MultiLoad.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/MultiLoad.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MultiLoad.R
\name{MultiLoad}
\alias{MultiLoad}
@@ -7,7 +7,8 @@
\title{Load individual-level data for multivariate GWA analysis}
\usage{
MultiLoad(gwaa.data = NULL, phenofile = NULL, genofile = NULL, trait.cols,
- covariate.cols = NULL, cuts = 20, impute = TRUE, ...)
+ covariate.cols = NULL, cuts = 20, impute = TRUE, gaussianize = TRUE,
+ ...)
}
\arguments{
\item{gwaa.data}{An (optional) object of \code{\link{gwaa.data-class}}.}
@@ -36,7 +37,7 @@
to perform multivariate test for each genetic variant.
}
\note{
-Either \code{gwaa.data} (for GenABEL data format) or the combination of
+Either \code{gwaa.data} (for GenABEL data format) or the combination of
\code{phenofile} and \code{genofile} (for DatABEL data format) has to be provided.
If all are provided, only \code{phenofile} and \code{genofile} will be used. When using
DatABEL format input, individual IDs in \code{phenofile} and \code{genofile} have to match!
@@ -48,18 +49,18 @@
data(ge03d2ex.clean)
## running multivariate GWAS for 3 traits: height, weight, bmi
-loaded <- MultiLoad(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+loaded <- MultiLoad(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
## converting the same dataset into DatABEL format files
require(DatABEL)
-write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
+write.table(phdata(ge03d2ex.clean), 'pheno.txt', col.names = TRUE, row.names = TRUE,
quote = FALSE, sep = '\\t')
geno <- as.double(ge03d2ex.clean)
matrix2databel(geno, 'geno')
## running the multivariate GWAS again
-loaded <- MultiLoad(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
+loaded <- MultiLoad(phenofile = 'pheno.txt', genofile = 'geno', trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
}
}
@@ -68,7 +69,7 @@
}
\references{
Xia Shen, ..., Jim Wilson, Gordan Lauc, Yurii Aulchenko (2015).
-Multi-omic-variate analysis identified novel loci associated with
+Multi-omic-variate analysis identified novel loci associated with
compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/man/MultiMeta.Rd
===================================================================
--- pkg/MultiABEL/man/MultiMeta.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/MultiMeta.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MultiMeta.R
\name{MultiMeta}
\alias{MultiMeta}
@@ -17,7 +17,7 @@
\value{
The function returns a matrix containing the meta-analysis results, where the row names are
the variants names, and the column names are the names of the studies provided in \code{reslist} or
-generated by the program if no names are given, with an extra column \code{"p.meta"} containing the
+generated by the program if no names are given, with an extra column \code{"p.meta"} containing the
meta-analysis P-values. The results are also written into \code{outfile}.
}
\description{
@@ -30,9 +30,9 @@
data(ge03d2ex)
## in each dataset, running multivariate GWAS for 3 traits: height, weight, bmi
-res1 <- Multivariate(gwaa.data = ge03d2, trait.cols = c(5, 6, 8),
+res1 <- Multivariate(gwaa.data = ge03d2, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
-res2 <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+res2 <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
## running meta-analysis by combining the P-values
@@ -44,7 +44,7 @@
}
\references{
Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
-Multi-omic-variate analysis identified the association between 14q32.33 and
+Multi-omic-variate analysis identified the association between 14q32.33 and
compound N-Glycosylation of human Immunoglobulin G \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/man/MultiRep.Rd
===================================================================
--- pkg/MultiABEL/man/MultiRep.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/MultiRep.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MultiRep.R
\name{MultiRep}
\alias{MultiRep}
@@ -14,30 +14,30 @@
\item{training.pheno}{An (optional) matrix or data frame contains the phenotype data for the discovery
sample, preferrably adjusted for fixed effects and population structure before multivariate GWA analysis.}
-\item{training.phenofile}{An (optional) plain text file contains phenotypes for the discovery sample.
+\item{training.phenofile}{An (optional) plain text file contains phenotypes for the discovery sample.
If this is provided, it will serve as \code{training.pheno}.}
\item{test.pheno}{An (optional) matrix or data frame contains the phenotype data for the replication
sample, preferrably adjusted for fixed effects and population structure.}
-\item{test.phenofile}{An (optional) plain text file contains phenotypes of the replication sample.
+\item{test.phenofile}{An (optional) plain text file contains phenotypes of the replication sample.
If this is provided, it will serve as \code{test.pheno}.}
\item{pheno.names}{A vector (length > 1) giving the column names of the phenotypes to be analyzed.}
-\item{training.geno}{A matrix or data.frame that contains the discovery sample genotype dosages
+\item{training.geno}{A matrix or data.frame that contains the discovery sample genotype dosages
of the variants to replicate.}
-\item{test.geno}{A matrix or data.frame that contains the replication sample genotype dosages
-of the variants to replicate. This object should have the same column names and order
+\item{test.geno}{A matrix or data.frame that contains the replication sample genotype dosages
+of the variants to replicate. This object should have the same column names and order
as \code{training.geno}.}
}
\value{
-The function returns a list of 3 matrices. \code{$replication} contains the estimate of
-variant effect on the corresponding compound phenotype (\code{beta_c}), standard error (\code{s.e.}),
+The function returns a list of 3 matrices. \code{$replication} contains the estimate of
+variant effect on the corresponding compound phenotype (\code{beta_c}), standard error (\code{s.e.}),
replication P-value (\code{P}), and proportion of phenotypic variance explained (\code{R-squared}).
-\code{$training.coef} contains the estimated coefficients in the discovery sample of each phenotype
-for each variant to construct the compound phenotype. \code{$test.coef} contains similar coefficients
+\code{$training.coef} contains the estimated coefficients in the discovery sample of each phenotype
+for each variant to construct the compound phenotype. \code{$test.coef} contains similar coefficients
as in \code{$training.coef} but estimated in the replication sample, but these are just for the record,
NOT used in the replication procedure.
}
@@ -46,7 +46,7 @@
}
\note{
Either \code{.pheno} or \code{.phenofile} has to be provided.
-If both are provided, only \code{phenofile} will be used. Individual IDs
+If both are provided, only \code{phenofile} will be used. Individual IDs
in \code{.pheno} or \code{.phenofile} and \code{.geno} have to match!
}
\examples{
@@ -55,7 +55,7 @@
data(ge03d2)
## running multivariate GWAS for 3 traits: height, weight, bmi
-res <- Multivariate(gwaa.data = ge03d2, trait.cols = c(5, 6, 8),
+res <- Multivariate(gwaa.data = ge03d2, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
## extracting 5 significant variants
@@ -70,7 +70,7 @@
test.geno <- as.double(gtdata(ge03d2c)[,snps])
## try replication
-rep <- MultiRep(training.pheno = phdata(ge03d2), test.pheno = phdata(ge03d2c),
+rep <- MultiRep(training.pheno = phdata(ge03d2), test.pheno = phdata(ge03d2c),
pheno.names = c('height', 'weight', 'bmi'),
training.geno = training.geno, test.geno = test.geno)
}
@@ -80,7 +80,7 @@
}
\references{
Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
-Multi-omic-variate analysis identified the association between 14q32.33 and
+Multi-omic-variate analysis identified the association between 14q32.33 and
compound N-Glycosylation of human Immunoglobulin G \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/man/MultiSummary.Rd
===================================================================
--- pkg/MultiABEL/man/MultiSummary.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/MultiSummary.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MultiSummary.R
\name{MultiSummary}
\alias{MultiSummary}
@@ -6,18 +6,22 @@
\alias{multi.summary}
\title{Multivariate genome-wide association scan using summary statistics}
\usage{
-MultiSummary(x, type = "outbred", vars = NULL)
+MultiSummary(x, index = NULL, type = "outbred", vars = NULL)
}
\arguments{
\item{x}{A data object of class \code{multi.summary} loaded by the function \code{load.summary}.}
-\item{type}{A string gives the type of analysis. Default is \code{"outbred"}, referring to
-general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to
+\item{index}{A numeric vector that gives the indices of the traits to be analyzed jointly.}
+
+\item{type}{A string gives the type of analysis. Default is \code{"outbred"}, referring to
+general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to
inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
-\code{"precise"} refers to precise test statistics, especially when the individual-level data
-are available, for which the argument \code{vars} has to be given.}
+\code{"precise"} refers to precise test statistics, especially when the individual-level data
+are available, for which the argument \code{vars} has to be given. \code{"direct"} refers to
+test statistics directly constructed from the T-statistics in univariate GWAS, this provides a
+scale-invariant test most similar to the direct MANOVA, but may be less powerful in some scenarios.}
-\item{vars}{A numeric vector gives the variance of the genotypes at each SNP, coded as 0, 1 and 2.
+\item{vars}{A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
Only used when \code{type = "precise"}.}
}
\value{
@@ -45,8 +49,8 @@
indep.snps <- as.character(read.table('indep.snps')$V1)
## load summary statistics of the six traits
-stats.male <- load.summary(files = c('bmi.txt', 'height.txt',
- 'weight.txt', 'hip.txt', 'wc.txt',
+stats.male <- load.summary(files = c('bmi.txt', 'height.txt',
+ 'weight.txt', 'hip.txt', 'wc.txt',
'whr.txt'), indep.snps = indep.snps)
## perform multi-trait meta-GWAS
@@ -58,10 +62,10 @@
Xia Shen
}
\references{
-Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
-Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
+Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
+Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
-Simple multi-trait analysis identifies novel loci
+Simple multi-trait analysis identifies novel loci
associated with growth and obesity measures. \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/man/Multivariate.Rd
===================================================================
--- pkg/MultiABEL/man/Multivariate.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/Multivariate.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,4 +1,4 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Multivariate.R
\name{Multivariate}
\alias{Multivariate}
@@ -6,11 +6,13 @@
\alias{multivariate}
\title{Multivariate genome-wide association scan}
\usage{
-Multivariate(x, ...)
+Multivariate(x, trait.idx = NULL, ...)
}
\arguments{
\item{x}{An object created by \code{\link{MultiLoad}}.}
+\item{trait.idx}{A vector giving the indices of traits to be analyzed.}
+
\item{...}{not used.}
}
\value{
@@ -26,7 +28,7 @@
analysis of variance (MANOVA).
}
\note{
-Either \code{gwaa.data} (for GenABEL data format) or the combination of
+Either \code{gwaa.data} (for GenABEL data format) or the combination of
\code{phenofile} and \code{genofile} (for DatABEL data format) has to be provided.
If all are provided, only \code{phenofile} and \code{genofile} will be used. When using
DatABEL format input, individual IDs in \code{phenofile} and \code{genofile} have to match!
@@ -37,7 +39,7 @@
data(ge03d2ex.clean)
## running multivariate GWAS for 3 traits: height, weight, bmi
-loaded <- Multivariate(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
+loaded <- MultiLoad(gwaa.data = ge03d2ex.clean, trait.cols = c(5, 6, 8),
covariate.cols = c(2, 3))
## running the multivariate GWAS again
@@ -49,7 +51,7 @@
}
\references{
Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2015).
-Multi-omic-variate analysis identified novel loci associated with
+Multi-omic-variate analysis identified novel loci associated with
compound N-Glycosylation of human Immunoglobulin G. \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/man/load.summary.Rd
===================================================================
--- pkg/MultiABEL/man/load.summary.Rd 2016-03-23 01:12:36 UTC (rev 2051)
+++ pkg/MultiABEL/man/load.summary.Rd 2016-04-16 20:39:42 UTC (rev 2052)
@@ -1,35 +1,56 @@
-% Generated by roxygen2 (4.1.1): do not edit by hand
+% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/load.summary.R
\name{load.summary}
\alias{load.summary}
\title{Loading multiple summary statistics from genome-wide association studies}
\usage{
-load.summary(files, cor.pheno = NULL, indep.snps = NULL)
+load.summary(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE,
+ type = "outbred", vars = NULL, columnNames = c("snp", "a1", "freq",
+ "beta", "se", "n"), fixedN = NULL)
}
\arguments{
\item{files}{A vector of file names as strings. Each file name should contain summary statistics of
one trait to be included in the multi-trait analysis. The columns of the summary statistics have to
-contain \code{'snp'} (marker ID), \code{'a1'} (the first allele), \code{'freq'}
-(frequency of the first allele), \code{'beta'} (effect size), \code{'se'} (standard error), and
+contain (uppercase or lowercase does not matter) \code{'snp'} (marker ID), \code{'a1'} (the first allele), \code{'freq'}
+(frequency of the first allele), \code{'beta'} (effect size), \code{'se'} (standard error), and
\code{'n'} (sample size).}
-\item{cor.pheno}{A #traits x #traits matrix of correlation matrix of the phenotypes, to be used to
+\item{cor.pheno}{A #traits x #traits matrix of correlation matrix of the phenotypes, to be used to
construct the multi-trait test statistic. If \code{NULL},
-this matrix will be estimated from genome-wide summary statistics. If you have partially overlapping
+this matrix will be estimated from genome-wide summary statistics. If you have partially overlapping
samples for different traits, shrinkage correlation matrix is recommended (see reference), so in that
case, unless you know what you are doing, leave this argument as default, i.e. \code{NULL}.}
-\item{indep.snps}{A vector of strings containing the names of a set of independent SNPs. This is
-recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the
+\item{indep.snps}{A vector of strings containing the names of a set of independent SNPs. This is
+recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the
number of SNPs should be more than 10,000 in order to obtain a good estimate of \code{cor.pheno}. If
\code{cor.pheno = NULL}, this argument cannot be \code{NULL}.}
+
+\item{est.var}{A logical value. If \code{FALSE}, each phenotypic variance is assumed to be known as 1.
+If \code{TRUE}, each phenotypic variance will be estimated to adjust the summary statistics, so that
+the corresponding phenoypic variance is 1.}
+
+\item{type}{A string gives the type of analysis. Default is \code{"outbred"}, referring to
+general outbred populations, following Hardy-Weinberg equilibrium. \code{"inbred"} refers to
+inbred populations, where no heterzygotes exists, namely, allele frequency = genotype frequency.
+\code{"precise"} refers to precise genotypic variance, especially when the individual-level data
+are available, for which the argument \code{vars} has to be given.}
+
+\item{vars}{A numeric vector gives the variance of the genotypes at each SNP, e.g. coded as 0, 1 and 2.
+Only used when \code{type = "precise"}.}
+
+\item{columnNames}{A vector with names of columns containing necessary information in the input file;
+default values are c('snp','a1','freq','beta','se','n'). The values are case-insensitive.}
+
+\item{fixedN}{sample size to assume across all analyses, when provided, this number will be used
+(instead of the ones specified in the input files)}
}
\value{
The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
(the cleaned data to be processed in multi-trait GWAS) and \code{cor.pheno} (user input or estimated).
}
\description{
-The function loads multiple meta-GWAS summary statistics, for subsequent multi-trait GWAS.
+The function loads multiple meta-GWAS summary statistics, for subsequent multi-trait GWAS.
Currently, the package only analyzes summary statistics from inverse-Gaussianized continuous traits.
}
\examples{
@@ -55,13 +76,13 @@
}
}
\author{
-Xia Shen
+Xia Shen, Yurii Aulchenko
}
\references{
-Xia Shen, Xiao Wang, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
-Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
+Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali,
+Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward,
David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
-Simple multi-trait analysis identifies novel loci
+Simple multi-trait analysis identifies novel loci
associated with growth and obesity measures. \emph{Submitted}.
}
\seealso{
Modified: pkg/MultiABEL/src/symbols.rds
===================================================================
(Binary files differ)
More information about the Genabel-commits
mailing list