[Genabel-commits] r2078 - pkg/MultiABEL/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Feb 9 04:32:23 CET 2019


Author: shenxia
Date: 2019-02-09 04:32:22 +0100 (Sat, 09 Feb 2019)
New Revision: 2078

Added:
   pkg/MultiABEL/R/MV.cor.test.R
   pkg/MultiABEL/R/MultiSecondary.R
Modified:
   pkg/MultiABEL/R/MultiSummary.R
   pkg/MultiABEL/R/Multivariate.R
   pkg/MultiABEL/R/load.summary.R
Log:
update functions

Added: pkg/MultiABEL/R/MV.cor.test.R
===================================================================
--- pkg/MultiABEL/R/MV.cor.test.R	                        (rev 0)
+++ pkg/MultiABEL/R/MV.cor.test.R	2019-02-09 03:32:22 UTC (rev 2078)
@@ -0,0 +1,170 @@
+#' Correlation replication test based on multivariate analysis results
+#' 
+#' This function is developed to implement correlation replication test based on MVA or cMVA results
+#' 
+#' @param marker The SNP to be analyzed
+#' @param gwa.1 GWAS summary statistics for sample 1, includes A1, A2 and two columns for each trait: beta and se
+#' @param gwa.2 GWAS summary statistics for sample 2, includes A1, A2 and two columns for each trait: beta and se
+#' @param R.1 Phenotypic correlation matrix for sample 1
+#' @param R.2 Phenotypic correlation matrix for sample 2
+#' @param traits Traits to be analyzed
+#' @param nrep The number of Monte Carlo repetitions 
+#' @param probs Percentiles of the endpoints of confidence interval
+#' @param method The method used for computing correlation coefficient
+#' @param plot If the results for making correlation test figure are needed
+#' 
+#' @return The function returns two lists of \code{res}, which includes 
+#' 1) \code{correlation} Estimated correlation computed from original sample; 
+#' 2) \code{ci.left} The value at left endpoint of confidence interval; 
+#' 3) \code{ci.right} The value at right endpoint of confidence interval (Note: If there are only two traits, 
+#' then the ratio of correlation equals to one is provided instead of ci.left and ci.right),
+#' and \code{df.plot}, will be provided if \code{plot = TRUE}, includes 
+#' 1) \code{traits} The name of traits in analysis;
+#' 2) \code{rank.1} The rank of estimated effect sizes in sample 1;
+#' 3) \code{rank.2} The rank of estimated effect sizes in sample 2;
+#' 4) \code{mean.conc} The mean of concordant pairs in MC generated by the trait;
+#' 5) \code{sd.conc} The standard deviation of concordant pairs in MC generated by the trait;
+#' 6) \code{se.beta} The standard error of estimated effect sizes computed using inverse-variance weighting.
+#' 
+#' @author Zheng Ning, Xia Shen
+#' 
+#' @references 
+#' Zheng Ning, Yakov Tsepilov, Sodbo Zh. Sharapov, Alexander K. Grishenko, Masoud Shirali, Peter K. Joshi,
+#' James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko, Xia Shen (2018).
+#' Multivariate discovery, replication, and interpretation of pleiotropic loci using summary association statistics. \emph{Submitted}.
+#' 
+#' @seealso 
+#' \code{MultiSummary}
+#' 
+#' @examples 
+#' \dontrun{
+#' 
+#' data(example.MV.cor.test)
+#' 
+#' ## Six-trait correlation test ##
+#' traits <- c("HEIGHT", "BMI", "HIP", "WC", "WHR", "WEIGHT")
+#' set.seed(510)
+#' MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
+#'             R.2 = example.R.2, traits = traits, nrep = 10000)
+#' 
+#' ## Make correlation correlation test figure ##
+#' require(ggplot2)
+#' require(cowplot)
+#' 
+#' set.seed(510)
+#' res.mv.cor <- MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
+#'                           R.2 = example.R.2, traits = traits, nrep = 10000, plot = TRUE)
+#' df.plot <- res.mv.cor$df.plot
+#' 
+#' p1 <- ggplot()+ 
+#'   geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits), size=2) + 
+#'   geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits, size = se.beta), alpha = 0.2) + 
+#'   stat_smooth(data=df.plot, mapping=aes(x=rank.1, y=rank.2), method = "lm", se=FALSE, color="black", size=0.3, fullrange = TRUE) + 
+#'   coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0.5, 6.5)) + xlim(0,200) + 
+#'   scale_size_continuous(range = c(3, 10)) +
+#'   theme(axis.text=element_text(size=10),
+#'         axis.title=element_text(size=14,face="bold"), 
+#'         strip.text.x = element_text(size = 16))+ 
+#'   theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
+#'         axis.ticks.x=element_blank(),axis.title.y=element_blank(), legend.position = c(0.8,0.3), 
+#'         legend.background=element_rect(colour='NA', fill='transparent'), legend.key=element_blank(), 
+#'         legend.title=element_text(size=14), 
+#'         legend.text=element_text(size=12), legend.key.size = unit(1.4, 'lines')) + 
+#'   guides(colour = guide_legend(override.aes = list(alpha = 1)), size = FALSE) +
+#'   scale_colour_discrete(name = "Traits")
+#' 
+#' p2 <- ggplot(data=df.plot, aes(x=rank.1,y=mean.conc)) +
+#'   coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0, 5.5)) + 
+#'   geom_bar(stat = "identity", aes(fill=traits), width = 0.4) + theme(legend.position="none") + theme(
+#'     strip.background = element_blank(),
+#'     strip.text.x = element_blank()
+#'   ) + geom_errorbar(aes(ymin = mean.conc - sd.conc,ymax = mean.conc + sd.conc), width = 0.1)  + 
+#'   theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
+#'         axis.ticks.x=element_blank(),axis.title.y=element_blank()) + 
+#'   theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
+#' 
+#' require(cowplot)
+#' plot_grid(p1,p2,ncol=1,align = "v", rel_heights = c(2,1))
+#' 
+#' }
+#' @aliases MV.cor.test, mv.cor.test
+#' @keywords multivariate, replication, correlation test
+#' 
+
+`MV.cor.test` <- function(marker, gwa.1, gwa.2, R.1, R.2, traits, nrep = 10000, probs = c(0.025,0.975), method = "kendall", plot = FALSE){
+  
+  if((method %in% c("kendall", "pearson", "spearman")) == FALSE){
+    stop("The method has to be one of 'kendall', 'pearson' or 'spearman'.")
+  }
+  
+  R.1 <- R.1[traits, traits]
+  R.2 <- R.2[traits, traits]
+  
+  beta.1 <- as.numeric(gwa.1[marker, paste0(traits, ".beta")])
+  se.1 <- as.numeric(gwa.1[marker, paste0(traits, ".se")])
+  cov.beta.1 <- diag(se.1) %*% R.1 %*% diag(se.1)
+  
+  
+  beta.2 <- as.numeric(gwa.2[marker, paste0(traits, ".beta")])
+  se.2 <- as.numeric(gwa.2[marker, paste0(traits, ".se")])
+  cov.beta.2 <- diag(se.2) %*% R.2 %*% diag(se.2)
+  
+  A1.1 <- gwa.1[marker, "A1"]
+  A1.2 <- gwa.2[marker, "A1"]
+  if(A1.1 != A1.2){
+    beta.2 <- -beta.2
+  }
+  
+  cor.sample <- cor(beta.1, beta.2, method = method)
+  
+  n <- nrep
+  beta.1.mc <- rmvnorm(n, mean = beta.1, sigma = cov.beta.1)
+  beta.2.mc <- rmvnorm(n, mean = beta.2, sigma = cov.beta.2)
+  
+  cor.mc <- numeric(n)
+  for(i in 1:n){
+    cor.mc[i] <- cor(beta.1.mc[i,], beta.2.mc[i,], method = method)
+  }
+  ci <- quantile(cor.mc, probs = probs)
+  
+  df.plot <- NULL
+  if(plot == TRUE && length(traits) > 2){
+    agree <- function(df,i){
+      sum(apply(sign(df[,i] - df[,-i]),2, prod) == 1)
+    }
+    rank.1 <- rank(beta.1, ties.method = "random")
+    rank.2 <- rank(beta.2, ties.method = "random")
+    concordance <- matrix(0,n,length(traits))
+    for(i in 1:n){
+      concordance[i,] <- sapply(1:length(traits), FUN = agree, df = rbind(beta.1.mc[i,],beta.2.mc[i,]))
+    }
+    mean.conc <- colMeans(concordance)
+    sd.conc <- apply(concordance, 2, sd)
+    
+    se.beta <- sqrt(1 / (1/(se.1^2) + 1/(se.2^2)))
+    
+    df.plot <- cbind(rank.1,rank.2,mean.conc, sd.conc, se.beta)
+    df.plot <- data.frame(traits, df.plot)
+    colnames(df.plot) <- c("traits", "rank.1","rank.2", "mean.conc","sd.conc", "se.beta")
+  }
+  
+  if(plot == TRUE && length(traits) == 2){
+    stop("The number of traits should be greater than two to make the figure meaningful.")
+  }
+  
+  if(length(traits) == 2){
+    ratio.1 <- sum(cor.mc) / n
+    res <- c(cor.sample,ratio.1)
+    names(res) <- c("correlation", "ratio")
+  } else{
+    res <- c(cor.sample,ci)
+    names(res) <- c("correlation", "ci.left", "ci.right")
+  }
+  if(plot == TRUE){
+    return(list(res = res, df.plot = df.plot))
+  } else{
+    return(res)
+  }
+}
+
+


Property changes on: pkg/MultiABEL/R/MV.cor.test.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Added: pkg/MultiABEL/R/MultiSecondary.R
===================================================================
--- pkg/MultiABEL/R/MultiSecondary.R	                        (rev 0)
+++ pkg/MultiABEL/R/MultiSecondary.R	2019-02-09 03:32:22 UTC (rev 2078)
@@ -0,0 +1,183 @@
+#' Conditional multivariate association analysis using summary statistics
+#' 
+#' This function is developed to implement cMVA based on multivariate results
+#' 
+#' @param gwa.region GWAS summary statistics, includes A1, A2 and three columns for each trait: beta, se and N
+#' @param LD.ref Regional LD matrix including SNPs in gwa.region
+#' @param snp.ref The reference alleles of SNPs in the reference LD correlation matrix. The names of the vector 
+#' should be SNP names in reference sample
+#' @param R.ref Shrinkage phenotypic correlation matrix, achieved from \code{MultiSummary()}
+#' @param p.threshold P-value threshold in conditional analysis
+#' @param tol Tolerance for multicollinearity
+#' @param traits Traits to be analyzed
+#' @param v_y The variance of the traits
+#' @param T2.return Returning conditional T2 statistic or not
+#' 
+#' @return The function returns a list with elements of \code{T2.sele}: The conditional test statistic of the selected variants. 
+#' It will be provided if \code{T2.return = TRUE}; \code{p.sele}: The conditional p-value of the selected variants;
+#' \code{b_joint.sele}: The conditional effect size of the selected variants; \code{se_b_joint.sele}: The conditional 
+#' standard error of the selected variants.
+#' 
+#' @author Zheng Ning, Xia Shen
+#' 
+#' @references 
+#' Zheng Ning, Yakov Tsepilov, Sodbo Zh. Sharapov, Alexander K. Grishenko, Masoud Shirali, Peter K. Joshi,
+#' James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko, Xia Shen (2018).
+#' Multivariate discovery, replication, and interpretation of pleiotropic loci using summary association statistics. \emph{Submitted}.
+#' 
+#' @seealso 
+#' \code{MultiSummary}
+#' 
+#' @examples 
+#' \dontrun{
+#' data(example.MultiSecondary)
+#' ##### 474 snps around rs905938 #####
+#' 
+#' ## Six-traits cMVA ##
+#' traits <- c("HEIGHT", "BMI", "HIP", "WC", "WHR", "WEIGHT")
+#' MultiSecondary(gwa.region = example.gwas, LD.ref = example.LD, 
+#' 				  snp.ref = example.snp.ref, R.ref = example.R.ref,
+#'                p.threshold = 5e-8, tol = 0.9, traits = traits, T2.return = TRUE)
+#' 
+#' ## Three-traits cMVA ##
+#' traits <- c("HEIGHT", "BMI", "HIP")
+#' MultiSecondary(gwa.region = example.gwas, LD.ref = example.LD, 
+#'                snp.ref = example.snp.ref, R.ref = example.R.ref,
+#'                p.threshold = 5e-4, tol = 0.9, traits = traits, T2.return = TRUE)
+#' }
+#' @aliases MultiSecondary, multi.secondary
+#' @keywords multivariate, meta-analysis, conditional analysis
+#' 
+
+`MultiSecondary` <- function(gwa.region, LD.ref, snp.ref, R.ref, p.threshold = 5e-8, tol = 0.8, traits, v_y = NULL, T2.return = FALSE){
+  if(is.null(v_y) == TRUE){
+    v_y <- rep(1,length(traits))
+  }
+  adj <- function(x,y){
+    (-abs(x-y)+x+y)/2/x/y
+  }
+  
+  snp.names <- intersect(rownames(LD.ref), rownames(gwa.region))
+  idx.diff.ref <- which(gwa.region[snp.names,"A2"] != snp.ref[snp.names])
+  diff.adj.vec <- 1 - 2*(gwa.region[snp.names,"A2"] != snp.ref[snp.names])
+  names(diff.adj.vec) <- snp.names
+  
+  k <- length(traits)
+  N.vec <- as.matrix(gwa.region[snp.names,paste0(traits, ".N")])
+  for(i in 1:k){
+    assign(paste0(traits[i],".N.adj"), outer(N.vec[,i],N.vec[,i],FUN = "adj"))
+  }
+  
+  R.ref <- R.ref[traits, traits]
+  
+  snp.sele <- c()
+  snp.left <- snp.names
+  m <- length(snp.left)
+  
+  e.value <- svd(R.ref)$d
+  e.vector <- svd(R.ref)$u
+  
+  b_uni <- as.matrix(gwa.region[snp.names, paste0(traits,".beta")])
+  if(length(idx.diff.ref) > 0){
+    b_uni[idx.diff.ref] <- -b_uni[idx.diff.ref]
+  }
+  
+  se_uni <- as.matrix(gwa.region[snp.names, paste0(traits,".se")])
+  
+  t_uni <- b_uni / se_uni
+  vr <- t_uni / sqrt(N.vec)
+  vl <- sqrt(N.vec*se_uni^2)
+  var_X <- 1 / (N.vec*se_uni^2)
+  T2.sele <- p.sele <- b_joint.sele <- se_b_joint.sele <- NULL
+  
+  while(m > 0){
+    t_left_mat <- matrix(0,m,k)
+    for(j in 1:m){
+      cor_X_inv <- solve(LD.ref[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])])
+      cor_X_next <- cor_X_inv[length(snp.sele) + 1,]
+      se_b_joint_next <- numeric(k)
+      for(i in 1:k){
+        sd_X_next <- sqrt(var_X[c(snp.sele, snp.left[j]),i])  ## variance of X_c(snp.sele, snp.left[j]) for trait i
+        cov_X_inv <- diag(1/sd_X_next, nrow = length(sd_X_next)) %*% cor_X_inv %*% diag(1/sd_X_next, nrow = length(sd_X_next))
+        cov_X_adj <- (diag(sd_X_next, nrow = length(sd_X_next)) %*% 
+                        LD.ref[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])] %*% 
+                        diag(sd_X_next, nrow = length(sd_X_next))) * 
+          get(paste0(traits[i],".N.adj"))[c(snp.sele, snp.left[j]), c(snp.sele, snp.left[j])]
+        se_b_joint_next[i] <- sqrt((v_y[i]*cov_X_inv %*% cov_X_adj %*% cov_X_inv)[length(snp.sele) + 1,length(snp.sele) + 1])
+      }
+      b_joint_next <- vl[snp.left[j],]*crossprod(cor_X_next, vr[c(snp.sele, snp.left[j]),])
+      t_left_mat[j,] <- b_joint_next / se_b_joint_next    
+    }
+    # t_summary <- rowSums( (gwa.region[snp.left, paste0(traits,".t")] %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+    t_summary <- rowSums((t_left_mat %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+    p_sum_ref <- sapply(t_summary, pchisq, df = nrow(R.ref), lower.tail = F)
+    if(min(p_sum_ref) < p.threshold){
+      snp.sele.this.time <- snp.left[which.min(p_sum_ref)]
+      snp.sele <- c(snp.sele, snp.sele.this.time)
+      
+      snp.r2 <- (LD.ref[snp.left,snp.sele.this.time])^2
+      snp.ld <- snp.left[which(snp.r2 > tol)]
+      snp.left <- setdiff(snp.left, c(snp.sele, snp.ld))
+      
+      while(length(snp.sele) > 0){
+        cor_X_inv.sele <- solve(LD.ref[snp.sele, snp.sele])
+        b_joint.sele <- se_b_joint.sele <- matrix(0, length(snp.sele), k) # each column is joint se for a trait
+        for(i in 1:k){
+          sd_X.sele <- sqrt(var_X[snp.sele,i])
+          cov_X_inv.sele <- diag(1/sd_X.sele, nrow = length(sd_X.sele)) %*% cor_X_inv.sele %*% diag(1/sd_X.sele, nrow = length(sd_X.sele))
+          cov_X_adj.sele <- (diag(sd_X.sele, nrow = length(sd_X.sele)) %*%
+                               LD.ref[snp.sele, snp.sele] %*%
+                               diag(sd_X.sele, nrow = length(sd_X.sele))) *
+            get(paste0(traits[i],".N.adj"))[snp.sele, snp.sele]
+          se_b_joint.sele[,i] <- sqrt(diag(v_y[i]*cov_X_inv.sele %*% cov_X_adj.sele %*% cov_X_inv.sele))
+          b_joint.sele[,i] <- vl[snp.sele,i] * crossprod(cor_X_inv.sele, vr[snp.sele,i])
+        }
+        t_joint.sele <- b_joint.sele / se_b_joint.sele
+        hotelling.t.sele <- rowSums((t_joint.sele %*% e.vector %*% diag(sqrt(1 / e.value)))^2)
+        p.sele <- sapply(hotelling.t.sele, pchisq, df = nrow(R.ref), lower.tail = F)
+        if(max(p.sele) > p.threshold){
+          snp.sele <- snp.sele[ - which.max(p.sele)]
+        }
+        if(max(p.sele) < p.threshold){
+          cor_X_inv.sele <- solve(LD.ref[snp.sele, snp.sele])
+          
+          snp.r2 <- rowSums((LD.ref[snp.left,snp.sele] %*% cor_X_inv.sele) * LD.ref[snp.left,snp.sele])
+          snp.ld <- snp.left[which(snp.r2 > tol)]
+          snp.left <- setdiff(snp.left, snp.ld)
+          break
+        }
+      }
+    }else{
+      if(length(snp.sele) > 0){
+        names(p.sele) <- snp.sele
+        rownames(b_joint.sele) <- rownames(se_b_joint.sele) <- snp.sele
+        colnames(b_joint.sele) <- colnames(se_b_joint.sele) <- traits
+        b_joint.sele <- diag(diff.adj.vec[snp.sele], nrow = length(snp.sele)) %*% b_joint.sele
+        rownames(b_joint.sele) <- snp.sele
+      }
+      if(T2.return == T){
+        if(length(snp.sele) > 0){
+          T2.sele <- qchisq(p.sele, df = length(traits), lower.tail = F)
+        }
+        return(list(T2.sele = T2.sele, p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+      }
+    }
+    m <- length(snp.left)
+  }
+  
+  if(length(snp.sele) > 0){
+    names(p.sele) <- snp.sele
+    rownames(b_joint.sele) <- rownames(se_b_joint.sele) <- snp.sele
+    colnames(b_joint.sele) <- colnames(se_b_joint.sele) <- traits
+    b_joint.sele <- diag(diff.adj.vec[snp.sele], nrow = length(snp.sele)) %*% b_joint.sele
+    rownames(b_joint.sele) <- snp.sele
+  }
+  if(T2.return == T){
+    if(length(snp.sele) > 0){
+      T2.sele <- qchisq(p.sele, df = length(traits), lower.tail = F)
+    }
+    return(list(T2.sele = T2.sele, p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+  }
+  
+  return(list(p.sele = p.sele, b_joint.sele = b_joint.sele, se_b_joint.sele = se_b_joint.sele))
+}


Property changes on: pkg/MultiABEL/R/MultiSecondary.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: pkg/MultiABEL/R/MultiSummary.R
===================================================================
--- pkg/MultiABEL/R/MultiSummary.R	2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/MultiSummary.R	2019-02-09 03:32:22 UTC (rev 2078)
@@ -118,14 +118,17 @@
 		fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
 		mf <- mean(fs)
 		n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
-		res$n <- round(n.eff, digits = 2)
+		res$n <- n 
+		res$effective.n <- round(n.eff, digits = 2)
 		if (!high.dim) {
-			if (n.eff < 5*m) {
-				warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
-			}
-			pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)	
+			# if (n.eff < 5*m) {
+			#	warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+			# }
+			# pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE) 
+			# modified to the following line 2019-02-08
+			pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
 		} else {
-			pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)	
+			pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)	
 			# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n), 
 			#		betamat = betamat, invsemat = 1/semat, invse = diag(m), invR = solve(x$cor.pheno), invV = diag(m), 
 			#		pil = numeric(k), fstat = numeric(k), PACKAGE = "MultiABEL")
@@ -150,12 +153,15 @@
 		fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
 		mf <- mean(fs)
 		n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
-		res$n <- round(n.eff, digits = 2)
+		res$n <- n 
+		res$effective.n <- round(n.eff, digits = 2)
 		if (!high.dim) {
-			if (n.eff < 5*m) {
-				warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
-			}
-			pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)	
+			# if (n.eff < 5*m) {
+			#	warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+			# }
+			# pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE) 
+			# modified to the following line 2019-02-08
+			pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
 		} else {
 			pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)	
 			# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n), 
@@ -182,12 +188,15 @@
 		fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
 		mf <- mean(fs)
 		n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
-		res$n <- round(n.eff, digits = 2)
+		res$n <- n 
+		res$effective.n <- round(n.eff, digits = 2)
 		if (!high.dim) {
-			if (n.eff < 5*m) {
-				warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
-			}
-			pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)	
+			# if (n.eff < 5*m) {
+			#	warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+			# }
+			# pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE) 
+			# modified to the following line 2019-02-08
+			pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
 		} else {
 			pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)	
 			# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n), 
@@ -214,12 +223,15 @@
 		fs <- scan1$pil/m/(n - scan1$pil)*(n - m - 1)
 		mf <- mean(fs)
 		n.eff <- mf*2/(mf - 1) + m + 1 # effective N: fit a F-distribution to the observed fstat using moment estimator
-		res$n <- round(n.eff, digits = 2)
+		res$n <- n 
+		res$effective.n <- round(n.eff, digits = 2)
 		if (!high.dim) {
-			if (n.eff < 5*m) {
-				warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
-			}
-			pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE)	
+			# if (n.eff < 5*m) {
+			#	warning('Effective sample size < 5 times number of phenotypes. Consider argument high.dim = TRUE.')
+			# }
+			# pv <- pf(scan1$fstat, m, n - m - 1, lower.tail = FALSE) 
+			# modified to the following line 2019-02-08
+			pv <- pf(fs, m, n - m - 1, lower.tail = FALSE)
 		} else {
 			pv <- pf(fs, m, n.eff - m - 1, lower.tail = FALSE)	
 			# scan1 <- .Fortran('MultiSummaryLoopDirectFstat', k = as.integer(k), m = as.integer(m), nn = as.numeric(n), 

Modified: pkg/MultiABEL/R/Multivariate.R
===================================================================
--- pkg/MultiABEL/R/Multivariate.R	2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/Multivariate.R	2019-02-09 03:32:22 UTC (rev 2078)
@@ -22,8 +22,8 @@
 #' @author Xia Shen
 #' 
 #' @references 
-#' Shen X, Klarić L, Sharapov S, Mangino M, Ning Z, Wu D, 
-#' Trbojević-Akmačić I, Pučić-Baković M, Rudan I, Polašek O, 
+#' Shen X, Klaric L, Sharapov S, Mangino M, Ning Z, Wu D, 
+#' Trbojevic-Akmacic I, Pucic-Bakovic M, Rudan I, Polasek O, 
 #' Hayward C, Spector TD, Wilson JF, Lauc G, Aulchenko YS (2017): 
 #' Multivariate discovery and replication of five novel loci 
 #' associated with Immunoglobulin G N-glycosylation. 
@@ -48,6 +48,7 @@
 #' @aliases Multivariate, multivariate
 #' @keywords multivariate
 #' 
+
 `Multivariate` <- function(x, trait.idx = NULL, ...) {
 	if (class(x) != 'multi.loaded') {
 		stop('wrong data type!')

Modified: pkg/MultiABEL/R/load.summary.R
===================================================================
--- pkg/MultiABEL/R/load.summary.R	2019-02-09 03:31:35 UTC (rev 2077)
+++ pkg/MultiABEL/R/load.summary.R	2019-02-09 03:32:22 UTC (rev 2078)
@@ -68,7 +68,7 @@
 `load.summary` <- function(files, cor.pheno = NULL, indep.snps = NULL, 
 		est.var = FALSE, 
 		columnNames = c('snp', 'a1', 'a2', 'freq', 'beta', 'se', 'n'),
-		fixedN = NULL) {
+		fixedN = NULL, data.table = TRUE) {
 	
 	### I. Sanity checks ###
 	
@@ -134,10 +134,12 @@
 	for (i in m:1) {
 		
 		# Reserved for possible problems with data.table package
-		# dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
+		if (!data.table) {
+			dd <- read.table(fn[i], header = TRUE, stringsAsFactors = FALSE)
+		} else {
+			dd <- data.table::fread(fn[i], header = TRUE, stringsAsFactors = FALSE,verbose = FALSE, showProgress = FALSE, data.table = FALSE)
+		}
 		
-		dd <- data.table::fread(fn[i], header = TRUE, stringsAsFactors = FALSE,verbose = FALSE, showProgress = FALSE, data.table = FALSE)
-		
 		colnames(dd) <- tolower(colnames(dd))
 		currentColNames <- colnames(dd)
 		if (any(!(columnNames %in% currentColNames))) {



More information about the Genabel-commits mailing list