[Genabel-commits] r2078 - pkg/MultiABEL/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Feb 9 04:32:23 CET 2019
Author: shenxia
Date: 2019-02-09 04:32:22 +0100 (Sat, 09 Feb 2019)
New Revision: 2078
Added:
pkg/MultiABEL/R/MV.cor.test.R
pkg/MultiABEL/R/MultiSecondary.R
Modified:
pkg/MultiABEL/R/MultiSummary.R
pkg/MultiABEL/R/Multivariate.R
pkg/MultiABEL/R/load.summary.R
Log:
update functions
Added: pkg/MultiABEL/R/MV.cor.test.R
===================================================================
--- pkg/MultiABEL/R/MV.cor.test.R (rev 0)
+++ pkg/MultiABEL/R/MV.cor.test.R 2019-02-09 03:32:22 UTC (rev 2078)
@@ -0,0 +1,170 @@
+#' Correlation replication test based on multivariate analysis results
+#'
+#' This function is developed to implement correlation replication test based on MVA or cMVA results
+#'
+#' @param marker The SNP to be analyzed
+#' @param gwa.1 GWAS summary statistics for sample 1, includes A1, A2 and two columns for each trait: beta and se
+#' @param gwa.2 GWAS summary statistics for sample 2, includes A1, A2 and two columns for each trait: beta and se
+#' @param R.1 Phenotypic correlation matrix for sample 1
+#' @param R.2 Phenotypic correlation matrix for sample 2
+#' @param traits Traits to be analyzed
+#' @param nrep The number of Monte Carlo repetitions
+#' @param probs Percentiles of the endpoints of confidence interval
+#' @param method The method used for computing correlation coefficient
+#' @param plot If the results for making correlation test figure are needed
+#'
+#' @return The function returns two lists of \code{res}, which includes
+#' 1) \code{correlation} Estimated correlation computed from original sample;
+#' 2) \code{ci.left} The value at left endpoint of confidence interval;
+#' 3) \code{ci.right} The value at right endpoint of confidence interval (Note: If there are only two traits,
+#' then the ratio of correlation equals to one is provided instead of ci.left and ci.right),
+#' and \code{df.plot}, will be provided if \code{plot = TRUE}, includes
+#' 1) \code{traits} The name of traits in analysis;
+#' 2) \code{rank.1} The rank of estimated effect sizes in sample 1;
+#' 3) \code{rank.2} The rank of estimated effect sizes in sample 2;
+#' 4) \code{mean.conc} The mean of concordant pairs in MC generated by the trait;
+#' 5) \code{sd.conc} The standard deviation of concordant pairs in MC generated by the trait;
+#' 6) \code{se.beta} The standard error of estimated effect sizes computed using inverse-variance weighting.
+#'
+#' @author Zheng Ning, Xia Shen
+#'
+#' @references
+#' Zheng Ning, Yakov Tsepilov, Sodbo Zh. Sharapov, Alexander K. Grishenko, Masoud Shirali, Peter K. Joshi,
+#' James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko, Xia Shen (2018).
+#' Multivariate discovery, replication, and interpretation of pleiotropic loci using summary association statistics. \emph{Submitted}.
+#'
+#' @seealso
+#' \code{MultiSummary}
+#'
+#' @examples
+#' \dontrun{
+#'
+#' data(example.MV.cor.test)
+#'
+#' ## Six-trait correlation test ##
+#' traits <- c("HEIGHT", "BMI", "HIP", "WC", "WHR", "WEIGHT")
+#' set.seed(510)
+#' MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
+#' R.2 = example.R.2, traits = traits, nrep = 10000)
+#'
+#' ## Make correlation correlation test figure ##
+#' require(ggplot2)
+#' require(cowplot)
+#'
+#' set.seed(510)
+#' res.mv.cor <- MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
+#' R.2 = example.R.2, traits = traits, nrep = 10000, plot = TRUE)
+#' df.plot <- res.mv.cor$df.plot
+#'
+#' p1 <- ggplot()+
+#' geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits), size=2) +
+#' geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits, size = se.beta), alpha = 0.2) +
+#' stat_smooth(data=df.plot, mapping=aes(x=rank.1, y=rank.2), method = "lm", se=FALSE, color="black", size=0.3, fullrange = TRUE) +
+#' coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0.5, 6.5)) + xlim(0,200) +
+#' scale_size_continuous(range = c(3, 10)) +
+#' theme(axis.text=element_text(size=10),
+#' axis.title=element_text(size=14,face="bold"),
+#' strip.text.x = element_text(size = 16))+
+#' theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
+#' axis.ticks.x=element_blank(),axis.title.y=element_blank(), legend.position = c(0.8,0.3),
+#' legend.background=element_rect(colour='NA', fill='transparent'), legend.key=element_blank(),
+#' legend.title=element_text(size=14),
+#' legend.text=element_text(size=12), legend.key.size = unit(1.4, 'lines')) +
+#' guides(colour = guide_legend(override.aes = list(alpha = 1)), size = FALSE) +
+#' scale_colour_discrete(name = "Traits")
+#'
+#' p2 <- ggplot(data=df.plot, aes(x=rank.1,y=mean.conc)) +
+#' coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0, 5.5)) +
+#' geom_bar(stat = "identity", aes(fill=traits), width = 0.4) + theme(legend.position="none") + theme(
+#' strip.background = element_blank(),
+#' strip.text.x = element_blank()
+#' ) + geom_errorbar(aes(ymin = mean.conc - sd.conc,ymax = mean.conc + sd.conc), width = 0.1) +
+#' theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
+#' axis.ticks.x=element_blank(),axis.title.y=element_blank()) +
+#' theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
+#'
+#' require(cowplot)
+#' plot_grid(p1,p2,ncol=1,align = "v", rel_heights = c(2,1))
+#'
+#' }
+#' @aliases MV.cor.test, mv.cor.test
+#' @keywords multivariate, replication, correlation test
+#'
+
+`MV.cor.test` <- function(marker, gwa.1, gwa.2, R.1, R.2, traits, nrep = 10000, probs = c(0.025,0.975), method = "kendall", plot = FALSE){
+
+ if((method %in% c("kendall", "pearson", "spearman")) == FALSE){
+ stop("The method has to be one of 'kendall', 'pearson' or 'spearman'.")
+ }
+
+ R.1 <- R.1[traits, traits]
+ R.2 <- R.2[traits, traits]
+
+ beta.1 <- as.numeric(gwa.1[marker, paste0(traits, ".beta")])
+ se.1 <- as.numeric(gwa.1[marker, paste0(traits, ".se")])
+ cov.beta.1 <- diag(se.1) %*% R.1 %*% diag(se.1)
+
+
+ beta.2 <- as.numeric(gwa.2[marker, paste0(traits, ".beta")])
+ se.2 <- as.numeric(gwa.2[marker, paste0(traits, ".se")])
+ cov.beta.2 <- diag(se.2) %*% R.2 %*% diag(se.2)
+
+ A1.1 <- gwa.1[marker, "A1"]
+ A1.2 <- gwa.2[marker, "A1"]
+ if(A1.1 != A1.2){
+ beta.2 <- -beta.2
+ }
+
+ cor.sample <- cor(beta.1, beta.2, method = method)
+
+ n <- nrep
+ beta.1.mc <- rmvnorm(n, mean = beta.1, sigma = cov.beta.1)
+ beta.2.mc <- rmvnorm(n, mean = beta.2, sigma = cov.beta.2)
+
+ cor.mc <- numeric(n)
+ for(i in 1:n){
+ cor.mc[i] <- cor(beta.1.mc[i,], beta.2.mc[i,], method = method)
+ }
+ ci <- quantile(cor.mc, probs = probs)
+
+ df.plot <- NULL
+ if(plot == TRUE && length(traits) > 2){
+ agree <- function(df,i){
+ sum(apply(sign(df[,i] - df[,-i]),2, prod) == 1)
+ }
+ rank.1 <- rank(beta.1, ties.method = "random")
+ rank.2 <- rank(beta.2, ties.method = "random")
+ concordance <- matrix(0,n,length(traits))
+ for(i in 1:n){
+ concordance[i,] <- sapply(1:length(traits), FUN = agree, df = rbind(beta.1.mc[i,],beta.2.mc[i,]))
+ }
+ mean.conc <- colMeans(concordance)
+ sd.conc <- apply(concordance, 2, sd)
+
+ se.beta <- sqrt(1 / (1/(se.1^2) + 1/(se.2^2)))
+
+ df.plot <- cbind(rank.1,rank.2,mean.conc, sd.conc, se.beta)
+ df.plot <- data.frame(traits, df.plot)
+ colnames(df.plot) <- c("traits", "rank.1","rank.2", "mean.conc","sd.conc", "se.beta")
+ }
+
+ if(plot == TRUE && length(traits) == 2){
+ stop("The number of traits should be greater than two to make the figure meaningful.")
+ }
+
+ if(length(traits) == 2){
+ ratio.1 <- sum(cor.mc) / n
+ res <- c(cor.sample,ratio.1)
+ names(res) <- c("correlation", "ratio")
+ } else{
+ res <- c(cor.sample,ci)
+ names(res) <- c("correlation", "ci.left", "ci.right")
+ }
+ if(plot == TRUE){
+ return(list(res = res, df.plot = df.plot))
+ } else{
+ return(res)
+ }
+}
+
+
Property changes on: pkg/MultiABEL/R/MV.cor.test.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: pkg/MultiABEL/R/MultiSecondary.R
===================================================================
--- pkg/MultiABEL/R/MultiSecondary.R (rev 0)
+++ pkg/MultiABEL/R/MultiSecondary.R 2019-02-09 03:32:22 UTC (rev 2078)
@@ -0,0 +1,183 @@
+#' Conditional multivariate association analysis using summary statistics
+#'
+#' This function is developed to implement cMVA based on multivariate results
+#'
+#' @param gwa.region GWAS summary statistics, includes A1, A2 and three columns for each trait: beta, se and N
+#' @param LD.ref Regional LD matrix including SNPs in gwa.region
+#' @param snp.ref The reference alleles of SNPs in the reference LD correlation matrix. The names of the vector
+#' should be SNP names in reference sample
+#' @param R.ref Shrinkage phenotypic correlation matrix, achieved from \code{MultiSummary()}
+#' @param p.threshold P-value threshold in conditional analysis
+#' @param tol Tolerance for multicollinearity
+#' @param traits Traits to be analyzed
+#' @param v_y The variance of the traits
+#' @param T2.return Returning conditional T2 statistic or not
+#'
+#' @return The function returns a list with elements of \code{T2.sele}: The conditional test statistic of the selected variants.
+#' It will be provided if \code{T2.return = TRUE}; \code{p.sele}: The conditional p-value of the selected variants;
+#' \code{b_joint.sele}: The conditional effect size of the selected variants; \code{se_b_joint.sele}: The conditional
+#' standard error of the selected variants.
+#'
+#' @author Zheng Ning, Xia Shen
+#'
+#' @references
+#' Zheng Ning, Yakov Tsepilov, Sodbo Zh. Sharapov, Alexander K. Grishenko, Masoud Shirali, Peter K. Joshi,
+#' James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko, Xia Shen (2018).
+#' Multivariate discovery, replication, and interpretation of pleiotropic loci using summary association statistics. \emph{Submitted}.
+#'
+#' @seealso
+#' \code{MultiSummary}
+#'
+#' @examples
+#' \dontrun{
+#' data(example.MultiSecondary)
+#' ##### 474 snps around rs905938 #####
+#'
+#' ## Six-traits cMVA ##
+#' traits <- c("HEIGHT", "BMI", "HIP", "WC", "WHR", "WEIGHT")
+#' MultiSecondary(gwa.region = example.gwas, LD.ref = example.LD,
+#' snp.ref = example.snp.ref, R.ref = example.R.ref,
+#' p.threshold = 5e-8, tol = 0.9, traits = traits, T2.return = TRUE)
+#'
+#' ## Three-traits cMVA ##
+#' traits <- c("HEIGHT", "BMI", "HIP")
+#' MultiSecondary(gwa.region = example.gwas, LD.ref = example.LD,
+#' snp.ref = example.snp.ref, R.ref = example.R.ref,
+#' p.threshold = 5e-4, tol = 0.9, traits = traits, T2.return = TRUE)
+#' }
+#' @aliases MultiSecondary, multi.secondary
+#' @keywords multivariate, meta-analysis, conditional analysis
+#'
+
+`MultiSecondary` <- function(gwa.region, LD.ref, snp.ref, R.ref, p.threshold = 5e-8, tol = 0.8, traits, v_y = NULL, T2.return = FALSE){
+ if(is.null(v_y) == TRUE){
+ v_y <- rep(1,length(traits))
+ }
+ adj <- function(x,y){
+ (-abs(x-y)+x+y)/2/x/y
+ }
+
+ snp.names <- intersect(rownames(LD.ref), rownames(gwa.region))
+ idx.diff.ref <- which(gwa.region[snp.names,"A2"] != snp.ref[snp.names])
+ diff.adj.vec <- 1 - 2*(gwa.region[snp.names,"A2"] != snp.ref[snp.names])
+ names(diff.adj.vec) <- snp.names
+
+ k <- length(traits)
+ N.vec <- as.matrix(gwa.region[snp.names,paste0(traits, ".N")])
+ for(i in 1:k){
+ assign(paste0(traits[i],".N.adj"), outer(N.vec[,i],N.vec[,i],FUN = "adj"))
+ }
+
+ R.ref <- R.ref[traits, traits]
+
+ snp.sele <- c()
+ snp.left <- snp.names
+ m <- length(snp.left)
+
+ e.value <- svd(R.ref)$d
+ e.vector <- svd(R.ref)$u
+
+ b_uni <- as.matrix(gwa.region[snp.names, paste0(traits,".beta")])
+ if(length(idx.diff.ref) > 0){
+ b_uni[idx.diff.ref] <- -b_uni[idx.diff.ref]
+ }
+
+ se_uni <- as.matrix(gwa.region[snp.names, paste0(traits,".se")])
+
+ t_uni <- b_uni / se_uni
+ vr <- t_uni / sqrt(N.vec)
+ vl <- sqrt(N.vec*se_uni^2)
+ var_X <- 1 / (N.vec*se_uni^2)
+ T2.sele <- p.sele <- b_joint.sele <- se_b_joint.sele <- NULL
+
+ while(m > 0){
+ t_left_mat <- matrix(0,m,k)
+ for(j in 1:m){
+ cor_X_inv <- solve(LD.ref[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])])
+ cor_X_next <- cor_X_inv[length(snp.sele) + 1,]
+ se_b_joint_next <- numeric(k)
+ for(i in 1:k){
+ sd_X_next <- sqrt(var_X[c(snp.sele, snp.left[j]),i]) ## variance of X_c(snp.sele, snp.left[j]) for trait i
+ cov_X_inv <- diag(1/sd_X_next, nrow = length(sd_X_next)) %*% cor_X_inv %*% diag(1/sd_X_next, nrow = length(sd_X_next))
+ cov_X_adj <- (diag(sd_X_next, nrow = length(sd_X_next)) %*%
+ LD.ref[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])] %*%
+ diag(sd_X_next, nrow = length(sd_X_next))) *
+ get(paste0(traits[i],".N.adj"))[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])]
+ se_b_joint_next[i] <- sqrt((v_y[i]*cov_X_inv %*% cov_X_adj %*% cov_X_inv)[length(snp.sele) + 1,length(snp.sele) + 1])
+ }
+ b_joint_next <- vl[snp.left[j],]*crossprod(cor_X_next, vr[c(snp.sele, snp.left[j]),])
+ t_left_mat[j,] <- b_joint_next / se_b_joint_next
+ }
+ # t_summary <- rowSums( (gwa.region[snp.left, paste0(traits,".t")] %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+ t_summary <- rowSums((t_left_mat %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+ p_sum_ref <- sapply(t_summary, pchisq, df = nrow(R.ref), lower.tail = F)
+ if(min(p_sum_ref) < p.threshold){
+ snp.sele.this.time <- snp.left[which.min(p_sum_ref)]
+ snp.sele <- c(snp.sele, snp.sele.this.time)
+
+ snp.r2 <- (LD.ref[snp.left,snp.sele.this.time])^2
+ snp.ld <- snp.left[which(snp.r2 > tol)]
+ snp.left <- setdiff(snp.left, c(snp.sele, snp.ld))
+
+ while(length(snp.sele) > 0){
+ cor_X_inv.sele <- solve(LD.ref[snp.sele, snp.sele])
+ b_joint.sele <- se_b_joint.sele <- matrix(0, length(snp.sele), k) # each column is joint se for a trait
+ for(i in 1:k){
+ sd_X.sele <- sqrt(var_X[snp.sele,i])
+ cov_X_inv.sele <- diag(1/sd_X.sele, nrow = length(sd_X.sele)) %*% cor_X_inv.sele %*% diag(1/sd_X.sele, nrow = length(sd_X.sele))
+ cov_X_adj.sele <- (diag(sd_X.sele, nrow = length(sd_X.sele)) %*%
+ LD.ref[snp.sele, snp.sele] %*%
+ diag(sd_X.sele, nrow = length(sd_X.sele))) *
+ get(paste0(traits[i],".N.adj"))[snp.sele, snp.sele]
+ se_b_joint.sele[,i] <- sqrt(diag(v_y[i]*cov_X_inv.sele %*% cov_X_adj.sele %*% cov_X_inv.sele))
+ b_joint.sele[,i] <- vl[snp.sele,i] * crossprod(cor_X_inv.sele, vr[snp.sele,i])
+ }
+ t_joint.sele <- b_joint.sele / se_b_joint.sele
+ hotelling.t.sele <- rowSums((t_joint.sele %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+ p.sele <- sapply(hotelling.t.sele, pchisq, df = nrow(R.ref), lower.tail = F)
+ if(max(p.sele) > p.threshold){
+ snp.sele <- snp.sele[ - which.max(p.sele)]
+ }
+ if(max(p.sele) < p.threshold){
+ cor_X_inv.sele <- solve(LD.ref[snp.sele, snp.sele])
+
+ snp.r2 <- rowSums((LD.ref[snp.left,snp.sele] %*% cor_X_inv.sele) * LD.ref[snp.left,snp.sele])
+ snp.ld <- snp.left[which(snp.r2 > tol)]
+ snp.left <- setdiff(snp.left, snp.ld)
+ break
+ }
+ }
+ }else{
+ if(length(snp.sele) > 0){
+ names(p.sele) <- snp.sele
+ rownames(b_joint.sele) <- rownames(se_b_joint.sele) <- snp.sele
+ colnames(b_joint.sele) <- colnames(se_b_joint.sele) <- traits
+ b_joint.sele <- diag(diff.adj.vec[snp.sele], nrow = length(snp.sele)) %*% b_joint.sele
+ rownames(b_joint.sele) <- snp.sele
+ }
+ if(T2.return == T){
+ if(length(snp.sele) > 0){
+ T2.sele <- qchisq(p.sele, df = length(traits), lower.tail = F)
+ }
+ return(list(T2.sele = T2.sele, p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+ }
+ }
+ m <- length(snp.left)
+ }
+
+ if(length(snp.sele) > 0){
+ names(p.sele) <- snp.sele
+ rownames(b_joint.sele) <- rownames(se_b_joint.sele) <- snp.sele
+ colnames(b_joint.sele) <- colnames(se_b_joint.sele) <- traits
+ b_joint.sele <- diag(diff.adj.vec[snp.sele], nrow = length(snp.sele)) %*% b_joint.sele
+ rownames(b_joint.sele) <- snp.sele
+ }
+ if(T2.return == T){
+ if(length(snp.sele) > 0){
+ T2.sele <- qchisq(p.sele, df = length(traits), lower.tail = F)
+ }
+ return(list(T2.sele = T2.sele, p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+ }
+
+ return(list(p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+}
Property changes on: pkg/MultiABEL/R/MultiSecondary.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/MultiABEL/R/MultiSummary.R
===================================================================
--- pkg/MultiABEL/R/MultiSummary.R 2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/MultiSummary.R 2019-02-09 03:32:22 UTC (rev 2078)
@@ -118,14 +118,17 @@
fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
mf <- mean(fs)
n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
- res$n <- round(n.eff, digits = 2)
+ res$n <- n
+ res$effective.n <- round(n.eff, digits = 2)
if (!high.dim) {
- if (n.eff < 5*m) {
- warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
- }
- pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # if (n.eff < 5*m) {
+ # warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+ # }
+ # pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # modified to the following line 2019-02-08
+ pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
} else {
- pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)
+ pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n),
# betamat = betamat, invsemat = 1/semat, invse = diag(m), invR = solve(x$cor.pheno), invV = diag(m),
# pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
@@ -150,12 +153,15 @@
fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
mf <- mean(fs)
n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
- res$n <- round(n.eff, digits = 2)
+ res$n <- n
+ res$effective.n <- round(n.eff, digits = 2)
if (!high.dim) {
- if (n.eff < 5*m) {
- warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
- }
- pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # if (n.eff < 5*m) {
+ # warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+ # }
+ # pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # modified to the following line 2019-02-08
+ pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
} else {
pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)
# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n),
@@ -182,12 +188,15 @@
fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
mf <- mean(fs)
n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
- res$n <- round(n.eff, digits = 2)
+ res$n <- n
+ res$effective.n <- round(n.eff, digits = 2)
if (!high.dim) {
- if (n.eff < 5*m) {
- warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
- }
- pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # if (n.eff < 5*m) {
+ # warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+ # }
+ # pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # modified to the following line 2019-02-08
+ pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
} else {
pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)
# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n),
@@ -214,12 +223,15 @@
fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
mf <- mean(fs)
n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
- res$n <- round(n.eff, digits = 2)
+ res$n <- n
+ res$effective.n <- round(n.eff, digits = 2)
if (!high.dim) {
- if (n.eff < 5*m) {
- warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
- }
- pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # if (n.eff < 5*m) {
+ # warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+ # }
+ # pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)
+ # modified to the following line 2019-02-08
+ pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
} else {
pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)
# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n),
Modified: pkg/MultiABEL/R/Multivariate.R
===================================================================
--- pkg/MultiABEL/R/Multivariate.R 2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/Multivariate.R 2019-02-09 03:32:22 UTC (rev 2078)
@@ -22,8 +22,8 @@
#' @author Xia Shen
#'
#' @references
-#' Shen X, Klarić L, Sharapov S, Mangino M, Ning Z, Wu D,
-#' Trbojević-Akmačić I, Pučić-Baković M, Rudan I, Polašek O,
+#' Shen X, Klaric L, Sharapov S, Mangino M, Ning Z, Wu D,
+#' Trbojevic-Akmacic I, Pucic-Bakovic M, Rudan I, Polasek O,
#' Hayward C, Spector TD, Wilson JF, Lauc G, Aulchenko YS (2017):
#' Multivariate discovery and replication of five novel loci
#' associated with Immunoglobulin G N-glycosylation.
@@ -48,6 +48,7 @@
#' @aliases Multivariate, multivariate
#' @keywords multivariate
#'
+
`Multivariate` <- function(x, trait.idx = NULL, ...) {
if (class(x) != 'multi.loaded') {
stop('wrong data type!')
Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R 2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/load.summary.R 2019-02-09 03:32:22 UTC (rev 2078)
@@ -68,7 +68,7 @@
`load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL,
est.var = FALSE,
columnNames = c('snp', 'a1', 'a2', 'freq', 'beta', 'se', 'n'),
- fixedN = NULL) {
+ fixedN = NULL, data.table = TRUE) {
### I. Sanity checks ###
@@ -134,10 +134,12 @@
for (i in m:1) {
# Reserved for possible problems with data.table package
- # dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
+ if (!data.table) {
+ dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
+ } else {
+ dd <- data.table::fread(fn[i], header = TRUE, stringsAsFactors = FALSE,verbose = FALSE, showProgress = FALSE, data.table = FALSE)
+ }
- dd <- data.table::fread(fn[i], header = TRUE, stringsAsFactors = FALSE,verbose = FALSE, showProgress = FALSE, data.table = FALSE)
-
colnames(dd) <- tolower(colnames(dd))
currentColNames <- colnames(dd)
if (any(!(columnNames %in% currentColNames))) {
More information about the Genabel-commits
mailing list