[adegenet-commits] r795 - in pkg: R src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Feb 9 12:35:57 CET 2011


Author: jombart
Date: 2011-02-09 12:35:57 +0100 (Wed, 09 Feb 2011)
New Revision: 795

Modified:
   pkg/R/glFunctions.R
   pkg/src/SNPbin.c
Log:
glDotProd works but issues remains with NAs.


Modified: pkg/R/glFunctions.R
===================================================================
--- pkg/R/glFunctions.R	2011-02-08 18:52:34 UTC (rev 794)
+++ pkg/R/glFunctions.R	2011-02-09 11:35:57 UTC (rev 795)
@@ -163,6 +163,9 @@
 
     if(scale){
         s <- sqrt(glVar(x,alleleAsUnit=FALSE))
+        if(any(s<1e-10)) {
+            warning("Null variances have been detected; corresponding alleles won't be standardized.")
+        }
     } else {
         s <- rep(1, nLoc(x))
     }

Modified: pkg/src/SNPbin.c
===================================================================
--- pkg/src/SNPbin.c	2011-02-08 18:52:34 UTC (rev 794)
+++ pkg/src/SNPbin.c	2011-02-09 11:35:57 UTC (rev 795)
@@ -26,9 +26,9 @@
 #include <R_ext/Utils.h>
 #include "adesub.h"
 
+#define NEARZERO 0.0000000001
 
 
-
 /*
    ========================
    === CLASS DEFINITION ===
@@ -310,7 +310,7 @@
 	for(i=0;i<P;i++){
 		if(snpbin_isna(x,i) == 0 && snpbin_isna(y,i) == 0){
 			/* res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]); */
-			res = res + ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
+			res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
 			/* printf("\ntemp value of increment: %f", ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i])); */
 			/* printf("\ntemp value of result: %f", res); */
 		}
@@ -341,8 +341,8 @@
 		out.x[i] = makesnpbin(&gen[idxByteVec], byteveclength, &nbvecperind[i], nloc, &nbnaperind[i], &naposi[idxNAVec]);
 		idxByteVec += *byteveclength * nbvecperind[i]; /* update index in byte array */
 		idxNAVec +=  nbnaperind[i]; /* update index in byte array */
-		printf("\nimported genotype %i: ", i+1);
-		printsnpbin(&out.x[i]);
+		/* printf("\nimported genotype %i: ", i+1); */
+		/* printsnpbin(&out.x[i]); */
 	}
 	
 	/* printf("step 3"); */
@@ -362,15 +362,20 @@
 void GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, double *mean, double *sd, double *res){
 	struct genlightC dat;
 	int i, j, k=0;
-	/* printf("\nstep 1\n"); */
 
+	/* Check variance vector: do not divide by 0 */
+	for(i=0;i< *nloc;i++){
+		if(sd[i] < NEARZERO){
+			sd[i] = 1;
+		}
+	}
+
 	dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc);
 
-	/* printf("\nstep 2\n"); */
 	/* Lower triangle - without the diagonal */
 	for(i=0; i< (*nind-1); i++){
 		for(j=i+1; j< *nind; j++){
-			printf("\n == pair %i-%i ==\n", i+1,j+1);
+			/* printf("\n == pair %i-%i ==\n", i+1,j+1); */
 			res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd);
 			++k;
 		}
@@ -378,7 +383,7 @@
 
 	/* add the diagonal to the end of the array */
 	for(i=0; i< *nind; i++){
-		printf("\n == pair %i-%i == \n", i+1,i+1);
+		/* printf("\n == pair %i-%i == \n", i+1,i+1); */
 		res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd);
 		++k;
 	}
@@ -425,23 +430,15 @@
 
 
 ## === DOT PRODUCTS === ##
+
+#### NO LONGER NEEDED ####
+
 library(adegenet)
 library(ade4)
 dat <- rbind("a"=c(1,0,0), "b"=c(1,2,1), "c"=c(1,0,1))
 x <- new("genlight",dat)
 
-#### NO LONGER NEEDED ####
-##mu <- glMean(x,alleleAsUnit=FALSE)
-##s <- sqrt(glVar(x,alleleAsUnit=FALSE))
 
-##vecbyte <- unlist(lapply(x at gen, function(e) e$snp))
-##nbVec <- sapply(x at gen, function(e) length(e$snp))
-##nbNa <- sapply(NA.posi(x), length)
-##naposi <- unlist(NA.posi(x))
-##resSize <- (nInd(x)*(nInd(x)-1))/2 + nInd(x)
-
-##temp <- .C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp), nbNa, naposi, nInd(x), nLoc(x), as.double(rep(0,3)), as.double(rep(1,3)), double(resSize), PACKAGE="adegenet")[[10]]
-
 ## NOT CENTRED, NOT SCALED
 glDotProd(x)
 
@@ -455,28 +452,49 @@
 
 
 ## RANDOM DATA
-dat <- matrix(sample(0:1, 5*8, replace=TRUE), nrow=5)
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
 x <- new("genlight",dat)
-glDotProd(x)
+res1 <- glDotProd(x)
 
-as.matrix(x) %*% t(as.matrix(x))
+res2 <- as.matrix(x) %*% t(as.matrix(x))
 
+all(res1==res2)
 
+
 ## CENTRED, NOT SCALED
-glDotProd(x, cent=TRUE)
+res1 <- glDotProd(x, cent=TRUE)
 
-
 temp <- as.matrix(x) / ploidy(x)
 temp <- scalewt(temp, cent=TRUE, scale=FALSE)
 res2 <- temp %*% t(temp)
 res2
 
+all(abs(res1-res2)<1e-10)
 
-## centred, scaled
-.C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp), nbNa, naposi, nInd(x), nLoc(x), as.double(mu), as.double(s), double(nLoc(x)), PACKAGE="adegenet")
 
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
 
-.C("GLdotProd", raw(3), integer(1), integer(1), integer(1), integer(1), integer(1), integer(1), double(3), double(3), double(3), PACKAGE="adegenet")
- # GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, double *mean, double *sd, double *res)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+
+
+## TEST WITH NAs
+dat <- lapply(1:50, function(i) sample(c(0,1,NA), 1e4, prob=c(.5, .49, .01), replace=TRUE))
+names(dat) <- paste("indiv", 1:length(dat))
+print(object.size(dat), unit="aut") # size of the original data
+
+x <- new("genlight", dat) # conversion
+x
+
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+
 */
 



More information about the adegenet-commits mailing list