[adegenet-commits] r802 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 10 12:35:09 CET 2011


Author: jombart
Date: 2011-02-10 12:35:09 +0100 (Thu, 10 Feb 2011)
New Revision: 802

Modified:
   pkg/src/GLfunctions.c
   pkg/src/snpbin.c
   pkg/src/snpbin.h
Log:
Gruiiiiiii !
Dot products in C works, with frequencies or absolute numbers of alleles.


Modified: pkg/src/GLfunctions.c
===================================================================
--- pkg/src/GLfunctions.c	2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/GLfunctions.c	2011-02-10 11:35:09 UTC (rev 802)
@@ -33,20 +33,40 @@
 
 	dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc, ploidy);
 
-	/* Lower triangle - without the diagonal */
-	for(i=0; i< (*nind-1); i++){
-		for(j=i+1; j< *nind; j++){
-			/* printf("\n == pair %i-%i ==\n", i+1,j+1); */
-			res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd, freq);
+	if(*freq){
+		/* === working on frequencies === */
+		/* Lower triangle - without the diagonal */
+		for(i=0; i< (*nind-1); i++){
+			for(j=i+1; j< *nind; j++){
+				/* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+				res[k] = snpbin_dotprod_freq(&dat.x[i], &dat.x[j], mean, sd);
+				++k;
+			}
+		}
+
+		/* add the diagonal to the end of the array */
+		for(i=0; i< *nind; i++){
+			/* printf("\n == pair %i-%i == \n", i+1,i+1); */
+			res[k] = snpbin_dotprod_freq(&dat.x[i], &dat.x[i], mean, sd);
 			++k;
 		}
-	}
+	} else {
+		/* === working on frequencies === */
+		/* Lower triangle - without the diagonal */
+		for(i=0; i< (*nind-1); i++){
+			for(j=i+1; j< *nind; j++){
+				/* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+				res[k] = snpbin_dotprod_int(&dat.x[i], &dat.x[j], mean, sd);
+				++k;
+			}
+		}
 
-	/* add the diagonal to the end of the array */
-	for(i=0; i< *nind; i++){
-		/* printf("\n == pair %i-%i == \n", i+1,i+1); */
-		res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd, freq);
-		++k;
+		/* add the diagonal to the end of the array */
+		for(i=0; i< *nind; i++){
+			/* printf("\n == pair %i-%i == \n", i+1,i+1); */
+			res[k] = snpbin_dotprod_int(&dat.x[i], &dat.x[i], mean, sd);
+			++k;
+		}
 	}
 }
 
@@ -59,7 +79,7 @@
 
 /*
 
-## === DOT PRODUCTS === ##
+## === DOT PRODUCTS ALLELE COUNTS === ##
 
 library(adegenet)
 library(ade4)
@@ -67,63 +87,91 @@
 x <- new("genlight",dat)
 
 
-## NOT CENTRED, NOT SCALED
-glDotProd(x)
+## RANDOM DATA
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+res1 <- glDotProd(x, alle=TRUE)
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+all(res1==res2)
 
-res2 <- as.matrix(x) %*% t(as.matrix(x))
+
+## CENTRED, NOT SCALED
+res1 <- glDotProd(x, cent=TRUE, alle=TRUE)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=FALSE)
+res2 <- temp %*% t(temp)
 res2
+all(abs(res1-res2)<1e-10)
 
-## DATA > 8 SNPs
-dat <- rbind(rep(c(1,0,1), c(8,10,5)))
-x <- new("genlight",dat)
-glDotProd(x)
 
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE, alle=TRUE)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+all(abs(res1-res2)<1e-10)
 
+
+## TEST WITH NAs
+library(adegenet)
+library(ade4)
+dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
+x <- new("genlight", dat) # conversion
+x
+res1 <- glDotProd(x, alle=TRUE)
+t(data.frame(dat))
+res1
+
+
+
+
+
+
+
+## === DOT PRODUCTS ALLELE FREQUENCIES === ##
+
+library(adegenet)
+library(ade4)
+
+
 ## RANDOM DATA
-dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+dat <- rbind(matrix(sample(0:1, 3*1000, replace=TRUE), nrow=3), 
+             matrix(sample(0:2, 2*1000, replace=TRUE), nrow=2))
 x <- new("genlight",dat)
 res1 <- glDotProd(x)
-
-res2 <- as.matrix(x) %*% t(as.matrix(x))
-
+temp <- as.matrix(x) / ploidy(x)
+res2 <- temp %*% t(temp)
 all(res1==res2)
 
 
 ## CENTRED, NOT SCALED
-res1 <- glDotProd(x, cent=TRUE)
-
-temp <- as.matrix(x) / ploidy(x)
+res1 <- glDotProd(x, cent=TRUE, alle=FALSE)
 temp <- scalewt(temp, cent=TRUE, scale=FALSE)
 res2 <- temp %*% t(temp)
 res2
-
 all(abs(res1-res2)<1e-10)
 
 
 ## CENTRED, SCALED
-res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
-
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE, alle=FALSE)
 temp <- as.matrix(x) / ploidy(x)
 temp <- scalewt(temp, cent=TRUE, scale=TRUE)
 res2 <- temp %*% t(temp)
 res2
-
 all(abs(res1-res2)<1e-10)
 
 
 ## TEST WITH NAs
 library(adegenet)
 library(ade4)
-
 dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
-
 x <- new("genlight", dat) # conversion
 x
-
-res1 <- glDotProd(x)
-t(data.frame(dat))
+res1 <- glDotProd(x, alle=FALSE)
+temp <- as.matrix(x)/ploidy(x)
+temp
 res1
 
-
 */
 

Modified: pkg/src/snpbin.c
===================================================================
--- pkg/src/snpbin.c	2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/snpbin.c	2011-02-10 11:35:09 UTC (rev 802)
@@ -112,7 +112,30 @@
 
 
 
+/* Maps one byte from 0-255 to sequences of 8 (binary) double values */
+void byteToBinDouble(unsigned char in, double *out){
+	short rest, i, temp;
 
+	rest = (int) in;
+
+	/* initialize all values to 0*/
+	for(i=0;i<=7;i++)
+		out[i]=0.0;
+
+	for(i=7;i>=0;i--){
+		temp = pow(2, i);
+		if(rest >= temp) {
+			out[i] = 1.0;
+			rest = rest- temp;
+			if(rest == 0) break;
+		}
+	}
+}
+
+
+
+
+
 /* Maps an array of values from 0-255 to sequences of 8 binary values */
 /* Input are unsigned char (hexadecimal), outputs are integers */
 void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres){
@@ -177,6 +200,29 @@
 
 
 
+void bytesToDouble(unsigned char *vecbytes, int *veclength, int *nbvec, double *vecres){
+	int i, j, k, idres=0; /* idres: index in vecres*/
+	double *temp;
+	temp = (double *) calloc(8, sizeof(double));
+
+
+	for(k=0;k<*nbvec;k++){ /* for all input vector */
+		idres = 0;
+		for(i=0;i<*veclength;i++){ /* for one input vector */
+			byteToBinDouble(vecbytes[i+ k* *veclength], temp); /* byte -> 8 double (0/1)*/
+			for(j=0;j<=7;j++){ /* fill in the result*/
+				vecres[j+idres] += temp[j];
+			}
+			idres = idres + 8;
+		}
+	}
+	free(temp);
+} /* end bytesToInt */
+
+
+
+
+
 /* 
    === MAP BINARY SNPS TO 1->256 SCALE ===
    - vecsnp: vector of integers (0/1)
@@ -265,10 +311,10 @@
 /* transform a snpbin into a vector of frequencies (double) */
 void snpbin2freq(struct snpbin *x, double *out){
 	double ploid = (double) ploidy(x);
+	bytesToDouble(x->bytevec, x->byteveclength, x->bytevecnb, out);
 	int i;
- 	bytesToInt(x->bytevec, x->byteveclength, x->bytevecnb, (int *) out);
-	out = (double *) out; /* casting back to double */
-	for(i=0; i< nLoc(x); i++){
+ 	
+	for(i=0; i < nLoc(x); i++){
 		out[i] = out[i] / ploid;
 	}
 /*reminders:
@@ -322,27 +368,67 @@
 /* Function to compute one dot products between two individuals */
 /* centring and scaling is always used */
 /* but need to pass vectors of 0 and 1*/
-double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double *sd, bool *freq){
+double snpbin_dotprod_int(struct snpbin *x, struct snpbin *y, double *mean, double *sd){
 	/* define variables, allocate memory */
 	int P = nLoc(x), i;
 	short isna;
 	double res = 0.0;
-	double *vecx, *vecy;
-	vecx = (double *) calloc(P, sizeof(double));
-	vecy = (double *) calloc(P, sizeof(double));
+	int *vecx, *vecy;
+	vecx = (int *) calloc(P, sizeof(int));
+	vecy = (int *) calloc(P, sizeof(int));
 
+	/* conversion to integers or frequencies*/
+	snpbin2intvec(x, (int *) vecx);
+	snpbin2intvec(y, (int *) vecy);
 
-	/* conversion to integers or frequencies*/
-	if(*freq){
-		snpbin2freq(x, vecx);
-		snpbin2freq(y, vecy);
-	} else {
-		snpbin2intvec(x, (int *) vecx);
-		snpbin2intvec(y, (int *) vecy);
+
+	/* printf("\nvector x: \n"); */
+	/* for(i=0;i<P;i++){ */
+	/* 	printf("%i", vecx[i]); */
+	/* } */
+
+	/* printf("\nvector y: \n"); */
+	/* for(i=0;i<P;i++){ */
+	/* 	printf("%i", vecy[i]); */
+	/* } */
+
+	/* compute dot product */
+	int count=0;
+	for(i=0;i<P;i++){
+		if(snpbin_isna(x,i) == 0 && snpbin_isna(y,i) == 0){
+			/* res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]); */
+			res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
+			/* printf("\ntemp value of increment: %f", ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i])); */
+			/* printf("\ntemp value of result: %f", res); */
+		}
 	}
 
+	/* free memory */
+	free(vecx);
+	free(vecy);
 
+	return res;
+}
 
+
+
+
+
+
+double snpbin_dotprod_freq(struct snpbin *x, struct snpbin *y, double *mean, double *sd){
+	/* define variables, allocate memory */
+	int P = nLoc(x), i;
+	short isna;
+	double res = 0.0;
+	double *vecx, *vecy;
+	vecx = (double *) calloc(P, sizeof(double));
+	vecy = (double *) calloc(P, sizeof(double));
+	
+	/* conversion to integers or frequencies*/
+	snpbin2freq(x, vecx);
+	snpbin2freq(y, vecy);
+	
+
 	/* printf("\nvector x: \n"); */
 	/* for(i=0;i<P;i++){ */
 	/* 	printf("%i", vecx[i]); */
@@ -374,6 +460,8 @@
 
 
 
+
+
 /* Function to convert a 'genlight' object (R side) into an array of 'snpbin' (C side) */
 /* Each component of the genlight is concatenated into a single vector */
 /* and then used to create different 'snpbin' on the C side */

Modified: pkg/src/snpbin.h
===================================================================
--- pkg/src/snpbin.h	2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/snpbin.h	2011-02-10 11:35:09 UTC (rev 802)
@@ -46,6 +46,7 @@
 
 
 void byteToBinInt(unsigned char in, int *out);
+void byteToBinDouble(unsigned char in, double *out);
 void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
 struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy);
 
@@ -60,6 +61,7 @@
 */
 
 void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres);
+void bytesToDouble(unsigned char *vecbytes, int *veclength, int *nbvec, double *vecres);
 void binIntToBytes(int *vecsnp, int *vecsize, unsigned char *vecres, int *ressize);
 
 
@@ -78,7 +80,8 @@
 void snpbin2freq(struct snpbin *x, double *out);
 void printsnpbin(struct snpbin *x);
 short int snpbin_isna(struct snpbin *x, int i);
-double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double *sd, bool *freq);
+double snpbin_dotprod_int(struct snpbin *x, struct snpbin *y, double *mean, double *sd);
+double snpbin_dotprod_freq(struct snpbin *x, struct snpbin *y, double *mean, double *sd);
 struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy);
 
 



More information about the adegenet-commits mailing list