[Genabel-commits] r2069 - in pkg/MultiABEL: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Jun 15 00:35:33 CEST 2017


Author: shenxia
Date: 2017-06-15 00:35:32 +0200 (Thu, 15 Jun 2017)
New Revision: 2069

Modified:
   pkg/MultiABEL/ChangeLog
   pkg/MultiABEL/DESCRIPTION
   pkg/MultiABEL/NAMESPACE
   pkg/MultiABEL/R/MultiABEL.R
   pkg/MultiABEL/R/MultiLoad.R
   pkg/MultiABEL/R/load.summary.R
   pkg/MultiABEL/R/misc.R
   pkg/MultiABEL/man/MultiABEL.Rd
   pkg/MultiABEL/man/MultiLoad.Rd
   pkg/MultiABEL/man/MultiMeta.Rd
   pkg/MultiABEL/man/MultiRep.Rd
   pkg/MultiABEL/man/MultiSummary.Rd
   pkg/MultiABEL/man/Multivariate.Rd
   pkg/MultiABEL/man/load.summary.Rd
Log:
Checked for CRAN

Modified: pkg/MultiABEL/ChangeLog
===================================================================
--- pkg/MultiABEL/ChangeLog	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/ChangeLog	2017-06-14 22:35:32 UTC (rev 2069)
@@ -1,3 +1,8 @@
+2017-06-14 23:50 xia
+    
+    * Updated to version 1.1-6
+    * R CMD check --as-cran for CRAN submission
+
 2016-12-24 18:09 xia
 
     * MultiSummary() NaN bug fixed when index 

Modified: pkg/MultiABEL/DESCRIPTION
===================================================================
--- pkg/MultiABEL/DESCRIPTION	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/DESCRIPTION	2017-06-14 22:35:32 UTC (rev 2069)
@@ -1,21 +1,20 @@
 Package: MultiABEL
 Type: Package
-Title: Multi-Trait Genome-Wide Association Analyses
+Title: Multi-Trait Genome-Wide Association Analysis
 Version: 1.1-6
-Date: 2017-01-25
+Date: 2017-06-14
 Author: Xia Shen
-Maintainer: Xia Shen <xia.shen at ki.se>
+Maintainer: Xia Shen <xia.shen at ed.ac.uk>
 Description: Multivariate genome-wide association analyses. The analysis can be
     performed on individual-level data or multiple single-trait genome-wide summary
     statistics.
 Depends:
     R (>= 2.10),
-    svMisc,
-    data.table
+    svMisc
 Suggests:
     GenABEL,
     DatABEL
 License: GPL (>= 2)
 LazyLoad: yes
-Packaged: 2016-05-07 15:46 CET; xia
-RoxygenNote: 5.0.1
\ No newline at end of file
+Packaged: 2017-06-14 23:26 CET; xia
+RoxygenNote: 6.0.1
\ No newline at end of file

Modified: pkg/MultiABEL/NAMESPACE
===================================================================
--- pkg/MultiABEL/NAMESPACE	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/NAMESPACE	2017-06-14 22:35:32 UTC (rev 2069)
@@ -1,6 +1,7 @@
 useDynLib(MultiABEL)
+importFrom("stats", "cor", "density", "lm", "na.omit", "pchisq", "pf",
+             "qchisq", "qnorm", "quantile")
+importFrom("utils", "packageDescription", "read.table", "write.table")
 import("svMisc")
-import("stats")
-import("utils")
 export("Multivariate", "MultiRep", "MultiMeta", "MultiLoad","load.summary", "MultiSummary")
 exportClasses("multi.summary", "multi.loaded")
\ No newline at end of file

Modified: pkg/MultiABEL/R/MultiABEL.R
===================================================================
--- pkg/MultiABEL/R/MultiABEL.R	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/R/MultiABEL.R	2017-06-14 22:35:32 UTC (rev 2069)
@@ -7,35 +7,35 @@
 #' 
 #' For converting data from other formats, see
 #' 
-#' \code{\link{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is 
+#' \code{{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is 
 #' our preferred converting function, very extensively tested. Other conversion 
 #' functions include: 
-#' \code{\link{convert.snp.text}} (conversion from human-readable GenABEL format),
-#' \code{\link{convert.snp.ped}} (Linkage, Merlin, Mach, and similar files),
-#' \code{\link{convert.snp.mach}} (Mach-format),
-#' \code{\link{convert.snp.tped}} (from PLINK TPED format),
-#' \code{\link{convert.snp.affymetrix}} (BRML-style files).
+#' \code{{convert.snp.text}} (conversion from human-readable GenABEL format),
+#' \code{{convert.snp.ped}} (Linkage, Merlin, Mach, and similar files),
+#' \code{{convert.snp.mach}} (Mach-format),
+#' \code{{convert.snp.tped}} (from PLINK TPED format),
+#' \code{{convert.snp.affymetrix}} (BRML-style files).
 #' 
 #' For converting of GenABEL's data to other formats, see
-#' \code{\link{export.merlin}} (MERLIN and MACH formats), 
-#' \code{\link{export.impute}} (IMPUTE, SNPTEST and CHIAMO formats),
-#' \code{\link{export.plink}} (PLINK format, also exports phenotypic data). 
+#' \code{{export.merlin}} (MERLIN and MACH formats), 
+#' \code{{export.impute}} (IMPUTE, SNPTEST and CHIAMO formats),
+#' \code{{export.plink}} (PLINK format, also exports phenotypic data). 
 #' 
-#' To load the data, see \code{\link{load.gwaa.data}}.
+#' To load the data, see \code{{load.gwaa.data}}.
 #' 
 #' For conversion to DatABEL format (used by ProbABEL and some other 
 #' GenABEL suite packages), see 
-#' \code{\link{impute2databel}}, 
-#' \code{\link{impute2mach}}, 
-#' \code{\link{mach2databel}}. 
+#' \code{{impute2databel}}, 
+#' \code{{impute2mach}}, 
+#' \code{{mach2databel}}. 
 #' 
 #' For data managment and manipulations see
-#' \code{\link{merge.gwaa.data}},
-#' \code{\link{merge.snp.data}},
-#' \code{\link{gwaa.data-class}},
-#' \code{\link{snp.data-class}},
-#' \code{\link{snp.names}},
-#' \code{\link{snp.subset}}.
+#' \code{{merge.gwaa.data}},
+#' \code{{merge.snp.data}},
+#' \code{{gwaa.data-class}},
+#' \code{{snp.data-class}},
+#' \code{{snp.names}},
+#' \code{{snp.subset}}.
 #' 
 #' @author Xia Shen
 #' 

Modified: pkg/MultiABEL/R/MultiLoad.R
===================================================================
--- pkg/MultiABEL/R/MultiLoad.R	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/R/MultiLoad.R	2017-06-14 22:35:32 UTC (rev 2069)
@@ -3,16 +3,17 @@
 #' The function imports GenABEL (gwaa.data class) or DatABEL (.fv*) data formats
 #' to perform multivariate test for each genetic variant.
 #' 
-#' @param gwaa.data An (optional) object of \code{\link{gwaa.data-class}}.
+#' @param gwaa.data An (optional) object of \code{{gwaa.data-class}}.
 #' @param phenofile An (optional) plain text file contains phenotypic outcomes and covariates.
-#' @param genofile An (optional) object of \code{\link{databel-class}} containing genotype data.
+#' @param genofile An (optional) object of \code{{databel-class}} containing genotype data.
 #' @param trait.cols A vector (length > 1) giving the column indices of the phenotypes to be analyzed. 
 #' @param covariate.cols An (optional) vector giving the column indices of the covariates to be included.
 #' @param cuts An integer telling how many pieces the genotype data matrix will be splitted for analyze.
 #' The value can be set depending on the memory size. The smaller the value is, potentially the faster
 #' the analysis will be.
 #' @param impute An (optional) logical argument telling whether missing genotypes should be imputed.
-#' @param gaussianize logical argument gaussianize telling whether traits should be gaussianized
+#' @param gaussianize An (optional) logical argument telling whether the phenotypes should be gaussianized
+#' via inverse-Gaussian transformation.
 #' @param ... not used.
 #' 
 #' @note Either \code{gwaa.data} (for GenABEL data format) or the combination of 
@@ -60,9 +61,9 @@
                            trait.cols, covariate.cols = NULL, cuts = 20, 
 						   impute = TRUE, gaussianize = TRUE, ...) {
 	set.seed(911)
-	if (!require(GenABEL) | !require(DatABEL)) {
-		stop('GenABEL and DatABEL packages required!')
-	}
+	#if (!require(GenABEL) | !require(DatABEL)) {
+	#	stop('GenABEL and DatABEL packages required!')
+	#}
     cat('loading data ...')
     if (length(trait.cols) == 1) {
 		stop('select multiple traits to analyze!')

Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/R/load.summary.R	2017-06-14 22:35:32 UTC (rev 2069)
@@ -1,289 +1,220 @@
-#' Loading multiple summary statistics from genome-wide association studies 
-#' 
-#' The function loads multiple meta-GWAS summary statistics, for subsequent multi-trait GWAS. 
-#' Currently, the package only analyzes summary statistics from inverse-Gaussianized continuous traits.
-#' 
-#' @param files A vector of file names as strings. Each file name should contain summary statistics of
-#' one trait to be included in the multi-trait analysis. The columns of the summary statistics have to
-#' contain (uppercase or lowercase does not matter) \code{'snp'} (marker ID), \code{'a1'} (the first allele),
-#' \code{'a2'} (the second allele), \code{'freq'},
-#' (frequency of the first allele), \code{'beta'} (effect size), \code{'se'} (standard error), and 
-#' \code{'n'} (sample size).
-#' @param cor.pheno A #traits x #traits matrix of correlation matrix of the phenotypes, to be used to 
-#' construct the multi-trait test statistic. If \code{NULL},
-#' this matrix will be estimated from genome-wide summary statistics. If you have partially overlapping 
-#' samples for different traits, shrinkage correlation matrix is recommended (see reference), so in that
-#' case, unless you know what you are doing, leave this argument as default, i.e. \code{NULL}.
-#' @param indep.snps A vector of strings containing the names of a set of independent SNPs. This is 
-#' recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the 
-#' number of SNPs should be more than 10,000 in order to obtain a good estimate of \code{cor.pheno}. If
-#' \code{cor.pheno = NULL}, this argument cannot be \code{NULL}.
-#' @param est.var A logical value. If \code{FALSE}, each phenotypic variance is assumed to be known as 1. 
-#' If \code{TRUE}, each phenotypic variance will be estimated to adjust the summary statistics, so that
-#' the corresponding phenoypic variance is 1.
-#' @param columnNames A vector with names of columns containing necessary information in the input file;
-#' default values are c('snp','a1','a2','freq','beta','se','n'). The values are case-insensitive. Note: check
-#' your allele definitions for different traits are based on the same strand!
-#' @param fixedN sample size to assume across all analyses, when provided, this number will be used
-#' (instead of the ones specified in the input files)
-#' 
-#' @return The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
-#' (the cleaned data to be processed in multi-trait GWAS), \code{cor.pheno} (user input or estimated), and 
-#' \code{var.pheno} (default or estimated).
-#' 
-#' @author Xia Shen, Yurii S. Aulchenko
-#' 
-#' @references 
-#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali, 
-#' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward, 
-#' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
-#' Simple multi-trait analysis identifies novel loci 
-#' associated with growth and obesity measures. \emph{Submitted}.
-#' 
-#' @seealso 
-#' \code{MultiSummary}
-#' 
-#' @examples 
-#' \dontrun{
-#' ## download the six example files from:
-#' ## https://www.dropbox.com/sh/hhta45cewvvea2s/AADfj4OXlbroToZAwIii2Buha?dl=0
-#' ## the summary statistics from Randall et al. (2013) PLoS Genet
-#' ## for males only
-#' ## bmi: body mass index
-#' ## hip: hip circumference
-#' ## wc: waist circumference
-#' ## whr: waist-hip ratio
-#' 
-#' ## load the prepared set of independent SNPs
-#' indep.snps <- as.character(read.table('indep.snps')$V1)
-#' 
-#' ## load summary statistics of the six traits
-#' stats.male <- load.summary(files = list.files(pattern = '*.txt'), indep.snps = indep.snps)
-#' 
-#' ## perform multi-trait meta-GWAS
-#' result <- MultiSummary(stats.male)
-#' head(result)
-#' }
-#' @aliases load.summary
-#' @keywords multivariate, meta-analysis
-#' 
-`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, 
-	est.var = FALSE, 
-	columnNames = c('snp', 'a1', 'a2', 'freq', 'beta', 'se', 'n'),
-	fixedN = NULL) {
-	
-	### I. Sanity checks ###
-	
-	# require(data.table)
-	
-	if (!all(is.character(files))) {
-		stop('files should be given as strings!')
-	}
-	
-	if (sum(file.exists(files)) < 2) {
-		stop('number of traits has to be more than 2!')
-	}
-	
-	if (is.null(cor.pheno) & is.null(indep.snps)) {
-		stop('indep.snps required for estimating cor.pheno!')
-	}
-	
-	m <- length(files)
-	if (!is.null(cor.pheno)) {
-		if (nrow(cor.pheno) != m | ncol(cor.pheno) != m) {
-			stop('wrong dimensions of cor.pheno!')
-		}
-	}
-	
-	columnNames <- tolower(columnNames)
-	
-	if (!is.null(fixedN)) if (fixedN <= 0) {
-		stop('fixedN should be a positive number')
-	}
-	
-	if (is.null(fixedN)) { colNamLen = 7 } else { colNamLen = 6 }
-	
-	if (!is.character(columnNames)) {
-		stop('columnNames should be character')
-	}
-	
-	if (length(columnNames) != colNamLen) {
-		cat('columnNames should be a vector with',colNamLen,'elements')
-		stop('... exiting')
-	}
-	
-	if (length(unique(columnNames)) != colNamLen) {
-		stop('elements of columnNames must be unique')
-	}
-
-	cNT = list(
-		'snp' = columnNames[1],
-		'a1'  = columnNames[2],
-		'a2' = columnNames[3],
-		'freq'= columnNames[4],
-		'beta'= columnNames[5],
-		'se'  = columnNames[6],
-		'n'   = columnNames[7]
-	)
-	
-	### II. Data load ###
-	
-	cat("loading data ...\n")
-	
-	data <- c()
-	fn <- files # rev(files)
-	vys <- rep(1, m)
-	for (i in m:1) {
-		
-		# Reserved for possible problems with data.table package
-		# dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
-		
-		dd <- data.table::fread(fn[i], header = TRUE, stringsAsFactors = FALSE,verbose=FALSE,showProgress=FALSE,data.table=FALSE)
-		
-		colnames(dd) <- tolower(colnames(dd))
-		currentColNames <- colnames(dd)
-		if (any(!(columnNames %in% currentColNames))) {
-			stop('file column names do not match columnNames in ', fn[i], '... exiting!')
-		}
-		idx <- which(duplicated(dd[, cNT$snp]))
-		if (length(idx) > 0) {
-			data[[i]] <- dd[-idx,]
-			rownames(data[[i]]) <- dd[ -idx , cNT$snp]
-		} else {
-			data[[i]] <- dd
-			rownames(data[[i]]) <- dd[, cNT$snp]
-		}
-		if (est.var) {
-			if (is.null(fixedN)) {
-				D <-  dd[,cNT$n]*2*dd[, cNT$freq]*(1 - dd[, cNT$freq])
-				vy <- D*dd[ , cNT$se]**2 + D*dd[, cNT$beta]**2/(dd[, cNT$n] - 1)
-			} else {
-				D <-  fixedN*2*dd[, cNT$freq]*(1 - dd[, cNT$freq])
-				vy <- D*dd[ , cNT$se]**2 + D*dd[, cNT$beta]**2/(fixedN - 1)
-			}
-			dvy <- density(na.omit(vy))
-			vys[i] <- dvy$x[which.max(dvy$y)] #median(vy, na.rm = TRUE)
-		}
-		progress((m - i + 1)/m*100)
-	}
-	cat('\n')
-	if (est.var) cat('phenotypic variances are:', vys, '\n')
-	
-	### III. SNP cheking and cleaning ###
-	
-    cat("checking markers ...\n")
-    
-	snps <- data[[1]][,cNT$snp]
-	for (i in 1:m) {
-		data[[i]]=na.omit(data[[i]][,unlist(cNT)])
-		snps=intersect(snps,data[[i]][, cNT$snp])
-        progress(i/m * 100)
-    }
-		
-    snps <- unique(snps)
-    cat("\n")
-    
-	cat("cleaning data ...\n")
-    for (i in 1:m) {
-        data[[i]] <- data[[i]][snps, ]
-        progress(i/m * 100)
-    }
-    cat("\n")
-    cat("correcting parameters ...\n")
-	
-	for (i in 1:m) {
-		data[[i]][, cNT[["a1"]]] = toupper(data[[i]][, cNT[["a1"]]])
-		data[[i]][, cNT[["a2"]]] = toupper(data[[i]][, cNT[["a2"]]])
-	}
-	
-	d1a12=paste(data[[1]][,c(cNT$a1)],data[[1]][,c(cNT$a2)],sep="")
-	d1a21=paste(data[[1]][,c(cNT$a2)],data[[1]][,c(cNT$a1)],sep="")
-	
-	
-	snps=rownames(data[[1]])
-	
-	i=2
-	for (i in 2:m) {
-	
-		dia12=paste(data[[i]][,c(cNT$a1)],data[[i]][,c(cNT$a2)],sep="")
-		
-		ind=which(((dia12==d1a12) + (dia12==d1a21))==1)
-		snps=intersect(rownames(data[[i]])[ind],snps)
-		
-	    if (any(data[[i]][, cNT[["a1"]]] != data[[1]][, cNT[["a1"]]])) {
-            adj <- 2 * as.numeric(data[[i]][, cNT[["a1"]]] == data[[1]][, cNT[["a1"]]]) - 1
-			data[[i]][, cNT$beta] <- data[[i]][, cNT$beta] *adj
-			data[[i]][, cNT$freq] <- (adj == 1) * data[[i]][,cNT$freq] + (adj == -1) * (1 - data[[i]][, cNT$freq])
-        }
-        progress(i/m * 100)
-    }
-	
-	for (i in 1:m) {
-        data[[i]] <- data[[i]][snps, ]
-        #progress(i/m * 100)
-    }
-	
-    cat("\n")
-	
-	### IV. Sample size and finalisation ###
-	
-    cat("adjusting sample size ... ")
-    n0 <- matrix(NA, nrow(data[[1]]), m)
-    if (is.null(fixedN)) {
-        for (i in 1:m) {
-            n0[, i] <- data[[i]][, cNT$n]
-        }
-    }else {
-        for (i in 1:m) {
-            n0[, i] <- fixedN
-        }
-    }
-    n <- apply(n0, 1, "min")
-    cat("done.\n")
-	
-    cat('finalizing summary statistics ...\n')
-	gwa0 <- matrix(NA, nrow(data[[1]]), 2*m + 2)
-	for (i in 1:m) {
-		gwa0[,i*2 - 1] <- data[[i]][,cNT$beta]
-		gwa0[,i*2] <- data[[i]][,cNT$se]
-		progress(i/m*100)
-	}
-	gwa0[,2*length(data) + 1] <- data[[1]][,cNT$freq]
-	gwa0[,2*length(data) + 2] <- n
-	rownames(gwa0) <- data[[1]][,cNT$snp]
-	gwa0 <- na.omit(gwa0)
-	cat('\n')
-	
-	if (is.null(cor.pheno)) {
-		n.ratio <- diag(m)
-		for (i in 1:(m - 1)) {
-			for (j in (i + 1):m) {
-				ratio <- mean(sqrt(n0[,j]/n0[,i]), na.rm = TRUE)
-				n.ratio[i,j] <- n.ratio[j,i] <- ifelse(ratio > 1, 1/ratio, ratio)
-			}
-		}
-		if (any(n.ratio < 1)) {
-			cat('samples partially overlap!\n')
-			cat('estimating shrinkage phenotypic correlations ... ')
-		} else {
-			cat('estimating phenotypic correlations ... ')
-		}
-		idx <- which(rownames(gwa0) %in% indep.snps)
-		gwa1 <- gwa0[idx,]
-		z <- gwa1[,seq(1, 2*m, 2)]/gwa1[,seq(2, 2*m, 2)]
-		cor.pheno <- cor(z, use = 'pairwise.complete.obs')
-		cat('done.\n')
-	}
-	
-	dimnames(cor.pheno) <- list(files, files)
-	gwanames <- c(paste(rep(files, each = 2), rep(c('.beta', '.se'), m), sep = ''), 'f', 'n')
-	colnames(gwa0) <- gwanames
-	
-	A12=data[[1]][rownames(gwa0), c(cNT[["a1"]],cNT[["a2"]])]
-	colnames(A12)=c("A1","A2")
-	
-    dd <- list(gwa = gwa0, cor.pheno = cor.pheno, var.pheno = vys, alleles=A12)
-    class(dd) <- "multi.summary"
-    return(dd)
-
-}
+#' Loading multiple summary statistics from genome-wide association studies 
+#' 
+#' The function loads multiple meta-GWAS summary statistics, for subsequent multi-trait GWAS. 
+#' Currently, the package only analyzes summary statistics from inverse-Gaussianized continuous traits.
+#' 
+#' @param files A vector of file names as strings. Each file name should contain summary statistics of
+#' one trait to be included in the multi-trait analysis. The columns of the summary statistics have to
+#' contain (uppercase or lowercase does not matter) \code{'snp'} (marker ID), \code{'a1'} (the first allele), \code{'freq'} 
+#' (frequency of the first allele), \code{'beta'} (effect size), \code{'se'} (standard error), and 
+#' \code{'n'} (sample size).
+#' @param cor.pheno A #traits x #traits matrix of correlation matrix of the phenotypes, to be used to 
+#' construct the multi-trait test statistic. If \code{NULL},
+#' this matrix will be estimated from genome-wide summary statistics. If you have partially overlapping 
+#' samples for different traits, shrinkage correlation matrix is recommended (see reference), so in that
+#' case, unless you know what you are doing, leave this argument as default, i.e. \code{NULL}.
+#' @param indep.snps A vector of strings containing the names of a set of independent SNPs. This is 
+#' recommended to be generated by LD-pruning the genotype data in a certain cohort. Typically the 
+#' number of SNPs should be more than 10,000 in order to obtain a good estimate of \code{cor.pheno}. If
+#' \code{cor.pheno = NULL}, this argument cannot be \code{NULL}.
+#' @param est.var A logical value. If \code{FALSE}, each phenotypic variance is assumed to be known as 1. 
+#' If \code{TRUE}, each phenotypic variance will be estimated to adjust the summary statistics, so that
+#' the corresponding phenoypic variance is 1.
+#' @param columnNames A vector with names of columns containing necessary information in the input file;
+#' default values are c('snp','a1','freq','beta','se','n'). The values are case-insensitive. Note: check
+#' your allele definitions for different traits are based on the same strand!
+#' @param fixedN sample size to assume across all analyses, when provided, this number will be used
+#' (instead of the ones specified in the input files)
+#' 
+#' @return The function returns a list of class \code{multi.summary}, containing two elements: \code{gwa}
+#' (the cleaned data to be processed in multi-trait GWAS), \code{cor.pheno} (user input or estimated), and 
+#' \code{var.pheno} (default or estimated).
+#' 
+#' @author Xia Shen, Yurii S. Aulchenko
+#' 
+#' @references 
+#' Xia Shen, Zheng Ning, Yakov Tsepilov, Masoud Shirali, 
+#' Generation Scotland, Blair H. Smith, Lynne J. Hocking, Sandosh Padmanabhan, Caroline Hayward, 
+#' David J. Porteous, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2015).
+#' Simple multi-trait analysis identifies novel loci 
+#' associated with growth and obesity measures. \emph{Submitted}.
+#' 
+#' @seealso 
+#' \code{MultiSummary}
+#' 
+#' @examples 
+#' \dontrun{
+#' ## download the six example files from:
+#' ## https://www.dropbox.com/sh/hhta45cewvvea2s/AADfj4OXlbroToZAwIii2Buha?dl=0
+#' ## the summary statistics from Randall et al. (2013) PLoS Genet
+#' ## for males only
+#' ## bmi: body mass index
+#' ## hip: hip circumference
+#' ## wc: waist circumference
+#' ## whr: waist-hip ratio
+#' 
+#' ## load the prepared set of independent SNPs
+#' indep.snps <- as.character(read.table('indep.snps')$V1)
+#' 
+#' ## load summary statistics of the six traits
+#' stats.male <- load.summary(files = list.files(pattern = '*.txt'), indep.snps = indep.snps)
+#' 
+#' ## perform multi-trait meta-GWAS
+#' result <- MultiSummary(stats.male)
+#' head(result)
+#' }
+#' @aliases load.summary
+#' @keywords multivariate, meta-analysis
+#' 
+`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, est.var = FALSE, 
+		                   columnNames = c('snp', 'a1', 'freq', 'beta', 'se', 'n'), fixedN = NULL) {
+	if (!all(is.character(files))) {
+		stop('files should be given as strings!')
+	}
+	if (sum(file.exists(files)) < 2) {
+		stop('number of traits has to be more than 2!')
+	}
+	if (is.null(cor.pheno) & is.null(indep.snps)) {
+		stop('indep.snps required for estimating cor.pheno!')
+	}
+	m <- length(files)
+	if (!is.null(cor.pheno)) {
+		if (nrow(cor.pheno) != m | ncol(cor.pheno) != m) {
+			stop('wrong dimensions of cor.pheno!')
+		}
+	}
+	columnNames <- tolower(columnNames)
+	if (!is.null(fixedN)) if (fixedN <= 0) {
+			stop('fixedN should be a positive number')
+		}
+	if (is.null(fixedN)) { colNamLen = 6 } else { colNamLen = 5 }
+	if (!is.character(columnNames)) {
+		stop('columnNames should be character')
+	}
+	if (length(columnNames) != colNamLen) {
+		cat('columnNames should be a vector with', colNamLen, 'elements')
+		stop('... exiting')
+	}
+	if (length(unique(columnNames)) != colNamLen) {
+		stop('elements of columnNames must be unique')
+	}
+	# column Names Translation
+	cNT = list(
+		'snp' = columnNames[1],
+		'a1'  = columnNames[2],
+		'freq'= columnNames[3],
+		'beta'= columnNames[4],
+		'se'  = columnNames[5],
+		'n'   = columnNames[6]
+	)
+	cat('loading data ...\n')
+	data <- c()
+	fn <- files # rev(files)
+	vys <- rep(1, m)
+	for (i in m:1) {
+		dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
+		colnames(dd) <- tolower(colnames(dd))
+		currentColNames <- colnames(dd)
+		if (any(!(columnNames %in% currentColNames))) {
+			stop('file column names do not match columnNames in ', fn[i], '... exiting!')
+		}
+		dd[,cNT$a1] <- toupper(dd[,cNT$a1])
+		idx <- which(duplicated(dd[,cNT$snp]))
+		if (length(idx) > 0) {
+			data[[i]] <- dd[-idx,]
+			rownames(data[[i]]) <- dd[-idx,cNT$snp]
+		} else {
+			data[[i]] <- dd
+			rownames(data[[i]]) <- dd[,cNT$snp]
+		}
+		if (est.var) {
+			if (is.null(fixedN)) {
+				D <-  dd[,cNT$n]*2*dd[, cNT$freq]*(1 - dd[, cNT$freq])
+				vy <- D*dd[ , cNT$se]**2 + D*dd[, cNT$beta]**2/(dd[, cNT$n] - 1)
+			} else {
+				D <-  fixedN*2*dd[, cNT$freq]*(1 - dd[, cNT$freq])
+				vy <- D*dd[ , cNT$se]**2 + D*dd[, cNT$beta]**2/(fixedN - 1)
+			}
+			dvy <- density(na.omit(vy))
+			vys[i] <- dvy$x[which.max(dvy$y)] #median(vy, na.rm = TRUE)
+		}
+		progress((m - i + 1)/m*100)
+	}
+	cat('\n')
+	if (est.var) cat('phenotypic variances are:', vys, '\n')
+	cat('checking markers ...\n')
+	snps <- data[[1]][, cNT$snp]
+	for (i in 2:m) {
+		snps <- data[[i]][ data[[i]][, cNT$snp] %in% snps, cNT$snp]
+		progress(i/m*100)
+	}
+	snps <- unique(snps)
+	cat('\n')
+	cat('cleaning data ...\n')
+	for (i in 1:m) {
+		data[[i]] <- data[[i]][snps,]
+		progress(i/m*100)
+	}
+	cat('\n')
+	cat('correcting parameters ...\n')
+	for (i in 2:m) {
+		if (any(data[[i]][, cNT[['a1']]] != data[[1]][, cNT[['a1']]])) {
+			adj <- 2*as.numeric(data[[i]][, cNT[['a1']]] == data[[1]][, cNT[['a1']]]) - 1
+			data[[i]][, cNT$beta] <- data[[i]][, cNT$beta]*adj
+			data[[i]][, cNT$freq] <- (adj == 1)*data[[i]][, cNT$freq] + (adj == -1)*(1 - data[[i]][,cNT$freq])
+		}
+		progress(i/m*100)
+	}
+	cat('\n')
+	cat('adjusting sample size ... ')
+	n0 <- matrix(NA, nrow(data[[1]]), m)
+	if (is.null(fixedN)) {
+		for (i in 1:m) {
+			n0[,i] <- data[[i]][,cNT$n]
+		}
+	} else {
+		for (i in 1:m) {
+			n0[,i] <- fixedN
+		}            
+	}
+	n <- apply(n0, 1, "min")
+	cat('done.\n')
+	cat('finalizing summary statistics ...\n')
+	gwa0 <- matrix(NA, nrow(data[[1]]), 2*m + 2)
+	for (i in 1:m) {
+		gwa0[,i*2 - 1] <- data[[i]][,cNT$beta]
+		gwa0[,i*2] <- data[[i]][,cNT$se]
+		progress(i/m*100)
+	}
+	gwa0[,2*length(data) + 1] <- data[[1]][,cNT$freq]
+	gwa0[,2*length(data) + 2] <- n
+	rownames(gwa0) <- data[[1]][,cNT$snp]
+	gwa0 <- na.omit(gwa0)
+	cat('\n')
+	if (is.null(cor.pheno)) {
+		n.ratio <- diag(m)
+		for (i in 1:(m - 1)) {
+			for (j in (i + 1):m) {
+				ratio <- mean(sqrt(n0[,j]/n0[,i]), na.rm = TRUE)
+				n.ratio[i,j] <- n.ratio[j,i] <- ifelse(ratio > 1, 1/ratio, ratio)
+			}
+		}
+		if (any(n.ratio < 1)) {
+			cat('samples partially overlap!\n')
+			cat('estimating shrinkage phenotypic correlations ... ')
+		} else {
+			cat('estimating phenotypic correlations ... ')
+		}
+		idx <- which(rownames(gwa0) %in% indep.snps)
+		gwa1 <- gwa0[idx,]
+		z <- gwa1[,seq(1, 2*m, 2)]/gwa1[,seq(2, 2*m, 2)]
+		cor.pheno <- cor(z, use = 'pairwise.complete.obs')
+		cat('done.\n')
+	}
+	dimnames(cor.pheno) <- list(files, files)
+	gwanames <- c(paste(rep(files, each = 2), rep(c('.beta', '.se'), m), sep = ''), 'f', 'n')
+	colnames(gwa0) <- gwanames
+	dd <- list(gwa = gwa0, cor.pheno = cor.pheno, var.pheno = vys)
+	class(dd) <- 'multi.summary'
+	return(dd)
+}

Modified: pkg/MultiABEL/R/misc.R
===================================================================
--- pkg/MultiABEL/R/misc.R	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/R/misc.R	2017-06-14 22:35:32 UTC (rev 2069)
@@ -2,7 +2,7 @@
 .onAttach <- 
 		function(lib, pkg, ...)
 {
-	pkgDescription <- utils::packageDescription(pkg)
+	pkgDescription <- packageDescription(pkg)
 	pkgVersion <- pkgDescription$Version
 	pkgDate <- pkgDescription$Date
 	pkgName <- pkgDescription$Package

Modified: pkg/MultiABEL/man/MultiABEL.Rd
===================================================================
--- pkg/MultiABEL/man/MultiABEL.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/MultiABEL.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -3,8 +3,9 @@
 \docType{package}
 \name{MultiABEL}
 \alias{MultiABEL}
+\alias{multiabel}
 \alias{MultiABEL-package}
-\alias{multiabel}
+\alias{MultiABEL-package}
 \title{Multivariate GWAS in R}
 \description{
 MultiABEL: Multivariate Genome-Wide Association Analyses
@@ -17,39 +18,36 @@
 
 For converting data from other formats, see
 
-\code{\link{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is 
+\code{{convert.snp.illumina}} (Illumina/Affymetrix-like format). This is 
 our preferred converting function, very extensively tested. Other conversion 
 functions include: 
-\code{\link{convert.snp.text}} (conversion from human-readable GenABEL format),
-\code{\link{convert.snp.ped}} (Linkage, Merlin, Mach, and similar files),
-\code{\link{convert.snp.mach}} (Mach-format),
-\code{\link{convert.snp.tped}} (from PLINK TPED format),
-\code{\link{convert.snp.affymetrix}} (BRML-style files).
+\code{{convert.snp.text}} (conversion from human-readable GenABEL format),
+\code{{convert.snp.ped}} (Linkage, Merlin, Mach, and similar files),
+\code{{convert.snp.mach}} (Mach-format),
+\code{{convert.snp.tped}} (from PLINK TPED format),
+\code{{convert.snp.affymetrix}} (BRML-style files).
 
 For converting of GenABEL's data to other formats, see
-\code{\link{export.merlin}} (MERLIN and MACH formats), 
-\code{\link{export.impute}} (IMPUTE, SNPTEST and CHIAMO formats),
-\code{\link{export.plink}} (PLINK format, also exports phenotypic data).
+\code{{export.merlin}} (MERLIN and MACH formats), 
+\code{{export.impute}} (IMPUTE, SNPTEST and CHIAMO formats),
+\code{{export.plink}} (PLINK format, also exports phenotypic data).
 
-To load the data, see \code{\link{load.gwaa.data}}.
+To load the data, see \code{{load.gwaa.data}}.
 
 For conversion to DatABEL format (used by ProbABEL and some other 
 GenABEL suite packages), see 
-\code{\link{impute2databel}}, 
-\code{\link{impute2mach}}, 
-\code{\link{mach2databel}}.
+\code{{impute2databel}}, 
+\code{{impute2mach}}, 
+\code{{mach2databel}}.
 
 For data managment and manipulations see
-\code{\link{merge.gwaa.data}},
-\code{\link{merge.snp.data}},
-\code{\link{gwaa.data-class}},
-\code{\link{snp.data-class}},
-\code{\link{snp.names}},
-\code{\link{snp.subset}}.
+\code{{merge.gwaa.data}},
+\code{{merge.snp.data}},
+\code{{gwaa.data-class}},
+\code{{snp.data-class}},
+\code{{snp.names}},
+\code{{snp.subset}}.
 }
-\author{
-Xia Shen
-}
 \references{
 If you use the MultiABEL package in your analysis, please cite the
 papers in \code{citation("MultiABEL")}.
@@ -57,5 +55,7 @@
 \seealso{
 \code{GenABEL}, \code{DatABEL}
 }
+\author{
+Xia Shen
+}
 \keyword{package}
-

Modified: pkg/MultiABEL/man/MultiLoad.Rd
===================================================================
--- pkg/MultiABEL/man/MultiLoad.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/MultiLoad.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -11,11 +11,11 @@
   ...)
 }
 \arguments{
-\item{gwaa.data}{An (optional) object of \code{\link{gwaa.data-class}}.}
+\item{gwaa.data}{An (optional) object of \code{{gwaa.data-class}}.}
 
 \item{phenofile}{An (optional) plain text file contains phenotypic outcomes and covariates.}
 
-\item{genofile}{An (optional) object of \code{\link{databel-class}} containing genotype data.}
+\item{genofile}{An (optional) object of \code{{databel-class}} containing genotype data.}
 
 \item{trait.cols}{A vector (length > 1) giving the column indices of the phenotypes to be analyzed.}
 
@@ -27,7 +27,8 @@
 
 \item{impute}{An (optional) logical argument telling whether missing genotypes should be imputed.}
 
-\item{gaussianize}{logical argument gaussianize telling whether traits should be gaussianized}
+\item{gaussianize}{An (optional) logical argument telling whether the phenotypes should be gaussianized
+via inverse-Gaussian transformation.}
 
 \item{...}{not used.}
 }
@@ -66,9 +67,6 @@
                     covariate.cols = c(2, 3))
 }
 }
-\author{
-Xia Shen
-}
 \references{
 Xia Shen, ..., Jim Wilson, Gordan Lauc, Yurii Aulchenko (2015).
 Multi-omic-variate analysis identified novel loci associated with 
@@ -77,7 +75,9 @@
 \seealso{
 \code{\link{Multivariate}}
 }
+\author{
+Xia Shen
+}
 \keyword{load}
 \keyword{multiload,}
 \keyword{multivariate,}
-

Modified: pkg/MultiABEL/man/MultiMeta.Rd
===================================================================
--- pkg/MultiABEL/man/MultiMeta.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/MultiMeta.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -39,9 +39,6 @@
 meta <- MultiMeta(list(res1, res2))
 }
 }
-\author{
-Xia Shen
-}
 \references{
 Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
 Multi-omic-variate analysis identified the association between 14q32.33 and 
@@ -50,6 +47,8 @@
 \seealso{
 \code{Multivariate}
 }
+\author{
+Xia Shen
+}
 \keyword{meta-analysis}
 \keyword{multivariate,}
-

Modified: pkg/MultiABEL/man/MultiRep.Rd
===================================================================
--- pkg/MultiABEL/man/MultiRep.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/MultiRep.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -75,9 +75,6 @@
                    training.geno = training.geno, test.geno = test.geno)
 }
 }
-\author{
-Xia Shen
-}
 \references{
 Xia Shen, ..., Gordan Lauc, Jim Wilson, Yurii Aulchenko (2014).
 Multi-omic-variate analysis identified the association between 14q32.33 and 
@@ -86,6 +83,8 @@
 \seealso{
 \code{\link{Multivariate}}
 }
+\author{
+Xia Shen
+}
 \keyword{multivariate,}
 \keyword{replication}
-

Modified: pkg/MultiABEL/man/MultiSummary.Rd
===================================================================
--- pkg/MultiABEL/man/MultiSummary.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/MultiSummary.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -58,9 +58,6 @@
 head(result)
 }
 }
-\author{
-Xia Shen
-}
 \references{
 Xia Shen, Zheng Ning, Yakov Tsepilov, Peter K. Joshi,
 James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko (2016).
@@ -69,6 +66,8 @@
 \seealso{
 \code{load.summary}
 }
+\author{
+Xia Shen
+}
 \keyword{meta-analysis}
 \keyword{multivariate,}
-

Modified: pkg/MultiABEL/man/Multivariate.Rd
===================================================================
--- pkg/MultiABEL/man/Multivariate.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/Multivariate.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -46,9 +46,6 @@
 res <- Multivariate(loaded)
 }
 }
-\author{
-Xia Shen
-}
 \references{
 Xia Shen, ..., Jim Wilson, Gordan Lauc, Yurii Aulchenko (2015).
 Multi-omic-variate analysis identified novel loci associated with 
@@ -57,5 +54,7 @@
 \seealso{
 \code{\link{MultiLoad}}
 }
+\author{
+Xia Shen
+}
 \keyword{multivariate}
-

Modified: pkg/MultiABEL/man/load.summary.Rd
===================================================================
--- pkg/MultiABEL/man/load.summary.Rd	2017-06-14 22:23:46 UTC (rev 2068)
+++ pkg/MultiABEL/man/load.summary.Rd	2017-06-14 22:35:32 UTC (rev 2069)
@@ -5,14 +5,12 @@
 \title{Loading multiple summary statistics from genome-wide association studies}
 \usage{
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 2069


More information about the Genabel-commits mailing list