[adegenet-commits] r863 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Apr 20 18:27:46 CEST 2011


Author: jombart
Date: 2011-04-20 18:27:46 +0200 (Wed, 20 Apr 2011)
New Revision: 863

Modified:
   pkg/R/glFunctions.R
Log:
Parallelized C code in glDotProd. 
Result OK, but need to check with it is actually slower this way (!)


Modified: pkg/R/glFunctions.R
===================================================================
--- pkg/R/glFunctions.R	2011-04-20 14:19:57 UTC (rev 862)
+++ pkg/R/glFunctions.R	2011-04-20 16:27:46 UTC (rev 863)
@@ -171,45 +171,97 @@
 ## computes all pairs of dot products
 ## between centred/scaled vectors
 ## of SNPs
-glDotProd <- function(x, center=FALSE, scale=FALSE, alleleAsUnit=FALSE){
+glDotProd <- function(x, center=FALSE, scale=FALSE, alleleAsUnit=FALSE,
+                      multicore=require("multicore"), n.cores=NULL){
     if(!inherits(x, "genlight")) stop("x is not a genlight object")
 
-    ## GET INPUTS TO C PROCEDURE ##
-    if(center){
-        mu <- glMean(x,alleleAsUnit=alleleAsUnit)
-    } else {
-        mu <- rep(0, nLoc(x))
+    ## SOME CHECKS ##
+    if(multicore && !require(multicore)) stop("multicore package requested but not installed")
+    if(multicore && is.null(n.cores)){
+        n.cores <- multicore:::detectCores()
     }
 
-    if(scale){
-        s <- sqrt(glVar(x,alleleAsUnit=alleleAsUnit))
-        if(any(s<1e-10)) {
-            warning("Null variances have been detected; corresponding alleles won't be standardized.")
+
+    ## STORE USEFUL INFO ##
+    N <- nInd(x)
+    ind.names <- indNames(x)
+
+
+    if(!multicore){ # DO NOT USE MULTIPLE CORES
+        ## GET INPUTS TO C PROCEDURE ##
+        if(center){
+            mu <- glMean(x,alleleAsUnit=alleleAsUnit)
+        } else {
+            mu <- rep(0, nLoc(x))
         }
-    } else {
-        s <- rep(1, nLoc(x))
-    }
 
-    vecbyte <- unlist(lapply(x at gen, function(e) e$snp))
-    nbVec <- sapply(x at gen, function(e) length(e$snp))
-    nbNa <- sapply(NA.posi(x), length)
-    naPosi <- unlist(NA.posi(x))
-    lowerTriSize <- (nInd(x)*(nInd(x)-1))/2
-    resSize <- lowerTriSize + nInd(x)
+        if(scale){
+            s <- sqrt(glVar(x,alleleAsUnit=alleleAsUnit))
+            if(any(s<1e-10)) {
+                warning("Null variances have been detected; corresponding alleles won't be standardized.")
+            }
+        } else {
+            s <- rep(1, nLoc(x))
+        }
 
-    ## CALL C FUNCTION ##
-    temp <- .C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(x), nLoc(x), ploidy(x),
-               as.double(mu), as.double(s), as.integer(!alleleAsUnit), double(resSize), PACKAGE="adegenet")[[12]]
+        vecbyte <- unlist(lapply(x at gen, function(e) e$snp))
+        nbVec <- sapply(x at gen, function(e) length(e$snp))
+        nbNa <- sapply(NA.posi(x), length)
+        naPosi <- unlist(NA.posi(x))
+        lowerTriSize <- (nInd(x)*(nInd(x)-1))/2
+        resSize <- lowerTriSize + nInd(x)
 
+        ## CALL C FUNCTION ##
+        temp <- .C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(x), nLoc(x), ploidy(x),
+                   as.double(mu), as.double(s), as.integer(!alleleAsUnit), double(resSize), PACKAGE="adegenet")[[12]]
+    } else { # USE MULTIPLE CORES
+        x <- seploc(x, n.block = n.cores) # one block per core (x is now a list of genlight)
+        temp <- list()
+        i <- 0
+        for(block in x){
+            i <- i+1
+            ## GET INPUTS TO C PROCEDURE ##
+            if(center){
+                mu <- glMean(block,alleleAsUnit=alleleAsUnit)
+            } else {
+                mu <- rep(0, nLoc(block))
+            }
+
+            if(scale){
+                s <- sqrt(glVar(block,alleleAsUnit=alleleAsUnit))
+                if(any(s<1e-10)) {
+                    warning("Null variances have been detected; corresponding alleles won't be standardized.")
+                }
+            } else {
+                s <- rep(1, nLoc(block))
+            }
+
+            vecbyte <- unlist(lapply(block at gen, function(e) e$snp))
+            nbVec <- sapply(block at gen, function(e) length(e$snp))
+            nbNa <- sapply(NA.posi(block), length)
+            naPosi <- unlist(NA.posi(block))
+            lowerTriSize <- (nInd(block)*(nInd(block)-1))/2
+            resSize <- lowerTriSize + nInd(block)
+
+            ## CALL C FUNCTION ##
+            temp[[i]] <- .C("GLdotProd", vecbyte, nbVec, length(block at gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(block), nLoc(block), ploidy(block),
+                       as.double(mu), as.double(s), as.integer(!alleleAsUnit), double(resSize), PACKAGE="adegenet")[[12]]
+        }
+
+
+        ## POOL BLOCK RESULTS TOGETHER ##
+        temp <- Reduce("+", temp)
+    }
+
     res <- temp[1:lowerTriSize]
-    attr(res,"Size") <- nInd(x)
+    attr(res,"Size") <- N
     attr(res,"Diag") <- FALSE
     attr(res,"Upper") <- FALSE
     class(res) <- "dist"
     res <- as.matrix(res)
     diag(res) <- temp[(lowerTriSize+1):length(temp)]
 
-    colnames(res) <- rownames(res) <- indNames(x)
+    colnames(res) <- rownames(res) <- ind.names
     return(res)
 } # end glDotProd
 
@@ -694,3 +746,9 @@
 ## res3 <- apply(as.matrix(x)/ploidy(x),2,sum,na.rm=TRUE)
 ## all.equal(res1,res3)
 ## all.equal(res2,res3)
+
+
+## TEST PARALLELE C COMPUTATIONS IN GLDOTPROD
+## toto <- glDotProd(x,multi=TRUE)
+## titi <- glDotProd(x,multi=FALSE)
+## all.equal(toto,titi)



More information about the adegenet-commits mailing list