[adegenet-commits] r795 - in pkg: R src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Feb 9 12:35:57 CET 2011
Author: jombart
Date: 2011-02-09 12:35:57 +0100 (Wed, 09 Feb 2011)
New Revision: 795
Modified:
pkg/R/glFunctions.R
pkg/src/SNPbin.c
Log:
glDotProd works but issues remains with NAs.
Modified: pkg/R/glFunctions.R
===================================================================
--- pkg/R/glFunctions.R 2011-02-08 18:52:34 UTC (rev 794)
+++ pkg/R/glFunctions.R 2011-02-09 11:35:57 UTC (rev 795)
@@ -163,6 +163,9 @@
if(scale){
s <- sqrt(glVar(x,alleleAsUnit=FALSE))
+ if(any(s<1e-10)) {
+ warning("Null variances have been detected; corresponding alleles won't be standardized.")
+ }
} else {
s <- rep(1, nLoc(x))
}
Modified: pkg/src/SNPbin.c
===================================================================
--- pkg/src/SNPbin.c 2011-02-08 18:52:34 UTC (rev 794)
+++ pkg/src/SNPbin.c 2011-02-09 11:35:57 UTC (rev 795)
@@ -26,9 +26,9 @@
#include <R_ext/Utils.h>
#include "adesub.h"
+#define NEARZERO 0.0000000001
-
/*
========================
=== CLASS DEFINITION ===
@@ -310,7 +310,7 @@
for(i=0;i<P;i++){
if(snpbin_isna(x,i) == 0 && snpbin_isna(y,i) == 0){
/* res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]); */
- res = res + ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
+ res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
/* printf("\ntemp value of increment: %f", ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i])); */
/* printf("\ntemp value of result: %f", res); */
}
@@ -341,8 +341,8 @@
out.x[i] = makesnpbin(&gen[idxByteVec], byteveclength, &nbvecperind[i], nloc, &nbnaperind[i], &naposi[idxNAVec]);
idxByteVec += *byteveclength * nbvecperind[i]; /* update index in byte array */
idxNAVec += nbnaperind[i]; /* update index in byte array */
- printf("\nimported genotype %i: ", i+1);
- printsnpbin(&out.x[i]);
+ /* printf("\nimported genotype %i: ", i+1); */
+ /* printsnpbin(&out.x[i]); */
}
/* printf("step 3"); */
@@ -362,15 +362,20 @@
void GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, double *mean, double *sd, double *res){
struct genlightC dat;
int i, j, k=0;
- /* printf("\nstep 1\n"); */
+ /* Check variance vector: do not divide by 0 */
+ for(i=0;i< *nloc;i++){
+ if(sd[i] < NEARZERO){
+ sd[i] = 1;
+ }
+ }
+
dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc);
- /* printf("\nstep 2\n"); */
/* Lower triangle - without the diagonal */
for(i=0; i< (*nind-1); i++){
for(j=i+1; j< *nind; j++){
- printf("\n == pair %i-%i ==\n", i+1,j+1);
+ /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd);
++k;
}
@@ -378,7 +383,7 @@
/* add the diagonal to the end of the array */
for(i=0; i< *nind; i++){
- printf("\n == pair %i-%i == \n", i+1,i+1);
+ /* printf("\n == pair %i-%i == \n", i+1,i+1); */
res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd);
++k;
}
@@ -425,23 +430,15 @@
## === DOT PRODUCTS === ##
+
+#### NO LONGER NEEDED ####
+
library(adegenet)
library(ade4)
dat <- rbind("a"=c(1,0,0), "b"=c(1,2,1), "c"=c(1,0,1))
x <- new("genlight",dat)
-#### NO LONGER NEEDED ####
-##mu <- glMean(x,alleleAsUnit=FALSE)
-##s <- sqrt(glVar(x,alleleAsUnit=FALSE))
-##vecbyte <- unlist(lapply(x at gen, function(e) e$snp))
-##nbVec <- sapply(x at gen, function(e) length(e$snp))
-##nbNa <- sapply(NA.posi(x), length)
-##naposi <- unlist(NA.posi(x))
-##resSize <- (nInd(x)*(nInd(x)-1))/2 + nInd(x)
-
-##temp <- .C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp), nbNa, naposi, nInd(x), nLoc(x), as.double(rep(0,3)), as.double(rep(1,3)), double(resSize), PACKAGE="adegenet")[[10]]
-
## NOT CENTRED, NOT SCALED
glDotProd(x)
@@ -455,28 +452,49 @@
## RANDOM DATA
-dat <- matrix(sample(0:1, 5*8, replace=TRUE), nrow=5)
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
x <- new("genlight",dat)
-glDotProd(x)
+res1 <- glDotProd(x)
-as.matrix(x) %*% t(as.matrix(x))
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+all(res1==res2)
+
## CENTRED, NOT SCALED
-glDotProd(x, cent=TRUE)
+res1 <- glDotProd(x, cent=TRUE)
-
temp <- as.matrix(x) / ploidy(x)
temp <- scalewt(temp, cent=TRUE, scale=FALSE)
res2 <- temp %*% t(temp)
res2
+all(abs(res1-res2)<1e-10)
-## centred, scaled
-.C("GLdotProd", vecbyte, nbVec, length(x at gen[[1]]@snp), nbNa, naposi, nInd(x), nLoc(x), as.double(mu), as.double(s), double(nLoc(x)), PACKAGE="adegenet")
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
-.C("GLdotProd", raw(3), integer(1), integer(1), integer(1), integer(1), integer(1), integer(1), double(3), double(3), double(3), PACKAGE="adegenet")
- # GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, double *mean, double *sd, double *res)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+
+
+## TEST WITH NAs
+dat <- lapply(1:50, function(i) sample(c(0,1,NA), 1e4, prob=c(.5, .49, .01), replace=TRUE))
+names(dat) <- paste("indiv", 1:length(dat))
+print(object.size(dat), unit="aut") # size of the original data
+
+x <- new("genlight", dat) # conversion
+x
+
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+
*/
More information about the adegenet-commits
mailing list