[adegenet-commits] r802 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Feb 10 12:35:09 CET 2011
Author: jombart
Date: 2011-02-10 12:35:09 +0100 (Thu, 10 Feb 2011)
New Revision: 802
Modified:
pkg/src/GLfunctions.c
pkg/src/snpbin.c
pkg/src/snpbin.h
Log:
Gruiiiiiii !
Dot products in C works, with frequencies or absolute numbers of alleles.
Modified: pkg/src/GLfunctions.c
===================================================================
--- pkg/src/GLfunctions.c 2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/GLfunctions.c 2011-02-10 11:35:09 UTC (rev 802)
@@ -33,20 +33,40 @@
dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc, ploidy);
- /* Lower triangle - without the diagonal */
- for(i=0; i< (*nind-1); i++){
- for(j=i+1; j< *nind; j++){
- /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
- res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd, freq);
+ if(*freq){
+ /* === working on frequencies === */
+ /* Lower triangle - without the diagonal */
+ for(i=0; i< (*nind-1); i++){
+ for(j=i+1; j< *nind; j++){
+ /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+ res[k] = snpbin_dotprod_freq(&dat.x[i], &dat.x[j], mean, sd);
+ ++k;
+ }
+ }
+
+ /* add the diagonal to the end of the array */
+ for(i=0; i< *nind; i++){
+ /* printf("\n == pair %i-%i == \n", i+1,i+1); */
+ res[k] = snpbin_dotprod_freq(&dat.x[i], &dat.x[i], mean, sd);
++k;
}
- }
+ } else {
+ /* === working on frequencies === */
+ /* Lower triangle - without the diagonal */
+ for(i=0; i< (*nind-1); i++){
+ for(j=i+1; j< *nind; j++){
+ /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+ res[k] = snpbin_dotprod_int(&dat.x[i], &dat.x[j], mean, sd);
+ ++k;
+ }
+ }
- /* add the diagonal to the end of the array */
- for(i=0; i< *nind; i++){
- /* printf("\n == pair %i-%i == \n", i+1,i+1); */
- res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd, freq);
- ++k;
+ /* add the diagonal to the end of the array */
+ for(i=0; i< *nind; i++){
+ /* printf("\n == pair %i-%i == \n", i+1,i+1); */
+ res[k] = snpbin_dotprod_int(&dat.x[i], &dat.x[i], mean, sd);
+ ++k;
+ }
}
}
@@ -59,7 +79,7 @@
/*
-## === DOT PRODUCTS === ##
+## === DOT PRODUCTS ALLELE COUNTS === ##
library(adegenet)
library(ade4)
@@ -67,63 +87,91 @@
x <- new("genlight",dat)
-## NOT CENTRED, NOT SCALED
-glDotProd(x)
+## RANDOM DATA
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+res1 <- glDotProd(x, alle=TRUE)
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+all(res1==res2)
-res2 <- as.matrix(x) %*% t(as.matrix(x))
+
+## CENTRED, NOT SCALED
+res1 <- glDotProd(x, cent=TRUE, alle=TRUE)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=FALSE)
+res2 <- temp %*% t(temp)
res2
+all(abs(res1-res2)<1e-10)
-## DATA > 8 SNPs
-dat <- rbind(rep(c(1,0,1), c(8,10,5)))
-x <- new("genlight",dat)
-glDotProd(x)
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE, alle=TRUE)
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+all(abs(res1-res2)<1e-10)
+
+## TEST WITH NAs
+library(adegenet)
+library(ade4)
+dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
+x <- new("genlight", dat) # conversion
+x
+res1 <- glDotProd(x, alle=TRUE)
+t(data.frame(dat))
+res1
+
+
+
+
+
+
+
+## === DOT PRODUCTS ALLELE FREQUENCIES === ##
+
+library(adegenet)
+library(ade4)
+
+
## RANDOM DATA
-dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+dat <- rbind(matrix(sample(0:1, 3*1000, replace=TRUE), nrow=3),
+ matrix(sample(0:2, 2*1000, replace=TRUE), nrow=2))
x <- new("genlight",dat)
res1 <- glDotProd(x)
-
-res2 <- as.matrix(x) %*% t(as.matrix(x))
-
+temp <- as.matrix(x) / ploidy(x)
+res2 <- temp %*% t(temp)
all(res1==res2)
## CENTRED, NOT SCALED
-res1 <- glDotProd(x, cent=TRUE)
-
-temp <- as.matrix(x) / ploidy(x)
+res1 <- glDotProd(x, cent=TRUE, alle=FALSE)
temp <- scalewt(temp, cent=TRUE, scale=FALSE)
res2 <- temp %*% t(temp)
res2
-
all(abs(res1-res2)<1e-10)
## CENTRED, SCALED
-res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
-
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE, alle=FALSE)
temp <- as.matrix(x) / ploidy(x)
temp <- scalewt(temp, cent=TRUE, scale=TRUE)
res2 <- temp %*% t(temp)
res2
-
all(abs(res1-res2)<1e-10)
## TEST WITH NAs
library(adegenet)
library(ade4)
-
dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
-
x <- new("genlight", dat) # conversion
x
-
-res1 <- glDotProd(x)
-t(data.frame(dat))
+res1 <- glDotProd(x, alle=FALSE)
+temp <- as.matrix(x)/ploidy(x)
+temp
res1
-
*/
Modified: pkg/src/snpbin.c
===================================================================
--- pkg/src/snpbin.c 2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/snpbin.c 2011-02-10 11:35:09 UTC (rev 802)
@@ -112,7 +112,30 @@
+/* Maps one byte from 0-255 to sequences of 8 (binary) double values */
+void byteToBinDouble(unsigned char in, double *out){
+ short rest, i, temp;
+ rest = (int) in;
+
+ /* initialize all values to 0*/
+ for(i=0;i<=7;i++)
+ out[i]=0.0;
+
+ for(i=7;i>=0;i--){
+ temp = pow(2, i);
+ if(rest >= temp) {
+ out[i] = 1.0;
+ rest = rest- temp;
+ if(rest == 0) break;
+ }
+ }
+}
+
+
+
+
+
/* Maps an array of values from 0-255 to sequences of 8 binary values */
/* Input are unsigned char (hexadecimal), outputs are integers */
void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres){
@@ -177,6 +200,29 @@
+void bytesToDouble(unsigned char *vecbytes, int *veclength, int *nbvec, double *vecres){
+ int i, j, k, idres=0; /* idres: index in vecres*/
+ double *temp;
+ temp = (double *) calloc(8, sizeof(double));
+
+
+ for(k=0;k<*nbvec;k++){ /* for all input vector */
+ idres = 0;
+ for(i=0;i<*veclength;i++){ /* for one input vector */
+ byteToBinDouble(vecbytes[i+ k* *veclength], temp); /* byte -> 8 double (0/1)*/
+ for(j=0;j<=7;j++){ /* fill in the result*/
+ vecres[j+idres] += temp[j];
+ }
+ idres = idres + 8;
+ }
+ }
+ free(temp);
+} /* end bytesToInt */
+
+
+
+
+
/*
=== MAP BINARY SNPS TO 1->256 SCALE ===
- vecsnp: vector of integers (0/1)
@@ -265,10 +311,10 @@
/* transform a snpbin into a vector of frequencies (double) */
void snpbin2freq(struct snpbin *x, double *out){
double ploid = (double) ploidy(x);
+ bytesToDouble(x->bytevec, x->byteveclength, x->bytevecnb, out);
int i;
- bytesToInt(x->bytevec, x->byteveclength, x->bytevecnb, (int *) out);
- out = (double *) out; /* casting back to double */
- for(i=0; i< nLoc(x); i++){
+
+ for(i=0; i < nLoc(x); i++){
out[i] = out[i] / ploid;
}
/*reminders:
@@ -322,27 +368,67 @@
/* Function to compute one dot products between two individuals */
/* centring and scaling is always used */
/* but need to pass vectors of 0 and 1*/
-double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double *sd, bool *freq){
+double snpbin_dotprod_int(struct snpbin *x, struct snpbin *y, double *mean, double *sd){
/* define variables, allocate memory */
int P = nLoc(x), i;
short isna;
double res = 0.0;
- double *vecx, *vecy;
- vecx = (double *) calloc(P, sizeof(double));
- vecy = (double *) calloc(P, sizeof(double));
+ int *vecx, *vecy;
+ vecx = (int *) calloc(P, sizeof(int));
+ vecy = (int *) calloc(P, sizeof(int));
+ /* conversion to integers or frequencies*/
+ snpbin2intvec(x, (int *) vecx);
+ snpbin2intvec(y, (int *) vecy);
- /* conversion to integers or frequencies*/
- if(*freq){
- snpbin2freq(x, vecx);
- snpbin2freq(y, vecy);
- } else {
- snpbin2intvec(x, (int *) vecx);
- snpbin2intvec(y, (int *) vecy);
+
+ /* printf("\nvector x: \n"); */
+ /* for(i=0;i<P;i++){ */
+ /* printf("%i", vecx[i]); */
+ /* } */
+
+ /* printf("\nvector y: \n"); */
+ /* for(i=0;i<P;i++){ */
+ /* printf("%i", vecy[i]); */
+ /* } */
+
+ /* compute dot product */
+ int count=0;
+ for(i=0;i<P;i++){
+ if(snpbin_isna(x,i) == 0 && snpbin_isna(y,i) == 0){
+ /* res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]); */
+ res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
+ /* printf("\ntemp value of increment: %f", ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i])); */
+ /* printf("\ntemp value of result: %f", res); */
+ }
}
+ /* free memory */
+ free(vecx);
+ free(vecy);
+ return res;
+}
+
+
+
+
+
+double snpbin_dotprod_freq(struct snpbin *x, struct snpbin *y, double *mean, double *sd){
+ /* define variables, allocate memory */
+ int P = nLoc(x), i;
+ short isna;
+ double res = 0.0;
+ double *vecx, *vecy;
+ vecx = (double *) calloc(P, sizeof(double));
+ vecy = (double *) calloc(P, sizeof(double));
+
+ /* conversion to integers or frequencies*/
+ snpbin2freq(x, vecx);
+ snpbin2freq(y, vecy);
+
+
/* printf("\nvector x: \n"); */
/* for(i=0;i<P;i++){ */
/* printf("%i", vecx[i]); */
@@ -374,6 +460,8 @@
+
+
/* Function to convert a 'genlight' object (R side) into an array of 'snpbin' (C side) */
/* Each component of the genlight is concatenated into a single vector */
/* and then used to create different 'snpbin' on the C side */
Modified: pkg/src/snpbin.h
===================================================================
--- pkg/src/snpbin.h 2011-02-09 19:26:02 UTC (rev 801)
+++ pkg/src/snpbin.h 2011-02-10 11:35:09 UTC (rev 802)
@@ -46,6 +46,7 @@
void byteToBinInt(unsigned char in, int *out);
+void byteToBinDouble(unsigned char in, double *out);
void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy);
@@ -60,6 +61,7 @@
*/
void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres);
+void bytesToDouble(unsigned char *vecbytes, int *veclength, int *nbvec, double *vecres);
void binIntToBytes(int *vecsnp, int *vecsize, unsigned char *vecres, int *ressize);
@@ -78,7 +80,8 @@
void snpbin2freq(struct snpbin *x, double *out);
void printsnpbin(struct snpbin *x);
short int snpbin_isna(struct snpbin *x, int i);
-double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double *sd, bool *freq);
+double snpbin_dotprod_int(struct snpbin *x, struct snpbin *y, double *mean, double *sd);
+double snpbin_dotprod_freq(struct snpbin *x, struct snpbin *y, double *mean, double *sd);
struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy);
More information about the adegenet-commits
mailing list