[adegenet-commits] r790 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Feb 8 15:51:17 CET 2011


Author: jombart
Date: 2011-02-08 15:51:16 +0100 (Tue, 08 Feb 2011)
New Revision: 790

Modified:
   pkg/src/SNPbin.c
   pkg/src/SNPbin.h
Log:
moving forward into the class mirroring in C.


Modified: pkg/src/SNPbin.c
===================================================================
--- pkg/src/SNPbin.c	2011-02-04 12:35:04 UTC (rev 789)
+++ pkg/src/SNPbin.c	2011-02-08 14:51:16 UTC (rev 790)
@@ -11,6 +11,10 @@
   {0,1}^8 |-> {0,...,255}
   x -> x_1 * 2^0 + ... + x_8 * 2^7 = \sum_i x_i * 2^(i-1)
 
+
+  # Function named as 'SNPbin...' or 'GL...' are to be called directly from R.
+  # The structure 'snpbin' is a C representation of the class 'SNPbin'.
+  # Function named as 'snpbin...' are made to be called internally.
 */
 
 
@@ -26,13 +30,56 @@
 
 
 /*
-   ==========================
-   === INTERNAL FUNCTIONS ===
-   ==========================
+   ========================
+   === CLASS DEFINITION ===
+   ========================
 */
 
+/* 'bytevecnb' arrays of bytes concatenated into a single array */
+/* of dim 'byteveclength' x 'bytevecnb' */
+/* nloc is the number of SNPs - used for recoding to integers */
+/* naposi indicates the positions of NAs */
+/* nanb is the length of naposi */
 
-/* Maps one value from 0-255 to sequences of 8 binary values */
+struct snpbin{
+	unsigned char *bytevec;
+	int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi; /* all but naposi have length 1 */
+};
+
+
+
+
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi) {
+	struct snpbin out;
+
+	out.bytevec = bytevec;
+	out.byteveclength = byteveclength;
+	out.bytevecnb = bytevecnb;
+	out.nloc = nloc;
+	out.nanb = nanb;
+	out.naposi = naposi;
+	return out;
+};
+
+
+
+struct genlightC{
+	struct snpbin *x;
+	int *nind;
+};
+
+
+
+
+
+/*
+   ===========================
+   === AUXILIARY FUNCTIONS ===
+   ===========================
+*/
+
+
+/* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
 void byteToBinInt(unsigned char in, int *out){
 	short int rest, i, temp;
 
@@ -56,13 +103,33 @@
 
 
 
+/* Maps an array of values from 0-255 to sequences of 8 binary values */
+/* Input are unsigned char (hexadecimal), outputs are integers */
+void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres){
+	int i, j, idres=0, *temp; /* idres: index in vecres*/
 
+	temp = (int *) calloc(8, sizeof(int));
 
+	for(i=0;i<*vecsize;i++){
+		byteToBinInt(vecbytes[i], temp);
+		for(j=0;j<=7;j++){
+			vecres[j+idres] = temp[j];
+		}
+		idres = idres + 8;
+	}
 
+	free(temp);
+} /* end binIntToBytes*/
 
 
 
 
+
+
+
+
+
+
 /*
    ===============================
    === MAIN EXTERNAL FUNCTIONS ===
@@ -70,6 +137,36 @@
 */
 
 
+
+/* Maps an array of values from 0-255 to integers representing counts of alleles */
+/* This is done by adding arrays of 0-1 for indiv with ploidy > 1*/
+/* Input are unsigned char (hexadecimal), outputs are integers */
+/* veclength is the length of one vector of bytes */
+/* nbvec is the nb of input vectors*/
+/* input 'vecbytes' is actually concatenated, ie of size veclength * nbvec */
+void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres){
+	int i, j, k, idres=0, *temp; /* idres: index in vecres*/
+
+	temp = (int *) calloc(8, sizeof(int));
+
+
+	for(k=0;k<*nbvec;k++){ /* for all input vector */
+		idres = 0;
+		for(i=0;i<*veclength;i++){ /* for one input vector */
+			byteToBinInt(vecbytes[i+ k* *veclength], temp); /* byte -> 8 int (0/1)*/
+			for(j=0;j<=7;j++){ /* fill in the result*/
+				vecres[j+idres] += temp[j];
+			}
+			idres = idres + 8;
+		}
+	}
+	free(temp);
+} /* end bytesToInt */
+
+
+
+
+
 /* 
    === MAP BINARY SNPS TO 1->256 SCALE ===
    - vecsnp: vector of integers (0/1)
@@ -96,12 +193,6 @@
 
 
 
-	/* 
-	   =============
-	   MAIN FUNCTION
-	   ============= 
-	*/
-	
 	/* INDICES */
 	/* i: idx of snp */
 	/* j: idx of binBasis (1:8) */
@@ -123,7 +214,7 @@
 	/* free memory */
 	freeintvec(binBasis);
 
-} /* end sptips */
+} /* end binIntToBytes */
 
 
 
@@ -132,95 +223,124 @@
 
 
 
-/* Maps an array of values from 0-255 to sequences of 8 binary values */
-/* Input are unsigned char (hexadecimal), outputs are integers */
-void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres){
-	int i, j, idres=0, *temp; /* idres: index in vecres*/
-	
-	temp = (int *) calloc(8, sizeof(int));
+/*
+   =====================
+   === CLASS METHODS ===
+   =====================
+*/
 
-	for(i=0;i<*vecsize;i++){
-		byteToBinInt(vecbytes[i], temp);
-		for(j=0;j<=7;j++){
-			vecres[j+idres] = temp[j];
-		}
-		idres = idres + 8;
-	}
+int nLoc(struct snpbin *x){
+	return *(x->nloc);
+}
 
-	free(temp);
-} /* end binIntToBytes*/
 
 
+/* transform a snpbin into a vector of integers */
+void snpbin2intvec(struct snpbin *x, int *out){
+	bytesToInt(x->bytevec, x->byteveclength, x->bytevecnb, out);
+}
 
 
 
 
+short int snpbin_isna(struct snpbin *x, int i){
+	int j = 0;
+	if(*(x->nanb) < 1 || i > nLoc(x)) return 0;
 
+	while(j < *(x->nanb)){
+		if( i == (x->naposi)[j]) return 1;
+		j++;
+	}
 
-/* Maps an array of values from 0-255 to integers representing counts of alleles */
-/* This is done by adding arrays of 0-1 for indiv with ploidy > 1*/
-/* Input are unsigned char (hexadecimal), outputs are integers */
-/* veclength is the length of one vector of bytes */
-/* nbvec is the nb of input vectors*/
-/* input 'vecbytes' is actually concatenated, ie of size veclength * nbvec */
-void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres){
-	int i, j, k, idres=0, *temp; /* idres: index in vecres*/
+	return 0;
+}
 
-	temp = (int *) calloc(8, sizeof(int));
 
 
-	for(k=0;k<*nbvec;k++){ /* for all input vector */
-		idres = 0;
-		for(i=0;i<*veclength;i++){ /* for one input vector */
-			byteToBinInt(vecbytes[i+ k* *veclength], temp); /* byte -> 8 int (0/1)*/
-			for(j=0;j<=7;j++){ /* fill in the result*/
-				vecres[j+idres] = vecres[j+idres] + temp[j];
-			}
-			idres = idres + 8;
-		}
+
+
+/* Function to compute one dot products between two individuals */
+/* centring and scaling is always used */
+/* but need to pass vectors of 0 and 1*/
+double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double*sd){
+	/* define variables, allocate memory */
+	int P = nLoc(x), i, *vecx, *vecy;
+	short int isna;
+	double res = 0.0;
+	vecx = (int *) calloc(P, sizeof(int));
+	vecy = (int *) calloc(P, sizeof(int));
+
+	/* conversion to integers */
+	snpbin2intvec(x, vecx);
+	snpbin2intvec(y, vecy);
+
+	/* compute dot product */
+	for(i=0;i<P;i++){
+		if(snpbin_isna(x,i) == 0 && snpbin_isna(y,i) == 0)
+			res += ((vecx[i]-mean[i])/sd[i]) * ((vecy[i]-mean[i])/sd[i]);
 	}
-	free(temp);
-} /* end bytesToInt */
 
+	/* free memory */
+	free(vecx);
+	free(vecy);
 
+	return res;
+}
 
 
 
 
+/* Function to convert a 'genlight' object (R side ) into an array of 'snpbin' (C side) */
+/* Each component of the genlight is concatenated into a single vector */
+/* and then used to create different 'snpbin' on the C side */
+struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc){
+	/* declare variables and allocate memory */
+	int i, j, idxByteVec=0, idxNAVec=0;
+	struct genlightC out;
+	out.x = (struct snpbin *) calloc(*nind, sizeof(struct snpbin));
 
+	/* create the list of snpbin */
+	for(i=0; i < *nind; i++){
+		out.x[i] = makesnpbin(&gen[idxByteVec], byteveclength, &nbvecperind[i], nloc, &nbnaperind[i], &naposi[idxNAVec]);
+		idxByteVec += *byteveclength * nbvecperind[i]; /* update index in byte array */
+		idxNAVec +=  nbnaperind[i]; /* update index in byte array */
 
-/* Simple test function */
-/* Test: increases for a raw (unsigned char) vector */
-void testRaw(unsigned char *a, int *n){
-	int i;
-	for(i=0; i<*n; i++){
-		a[i] = (unsigned char)(i);
 	}
+
+	*out.nind = *nind;
+	return out;
 }
 
 
 
 
+/* Function to compute all dot products between individuals */
+/* centring and scaling is always used */
+/* but need to pass vectors of 0 and 1*/
+void GLdotProd(unsigned char *gen, int *nbvecperind, int *nbnaperind, int *naposi, int *nind, int *nloc, double *res){
+	
+}
 
 
 
-/* Function to compute all dot products between individuals */
-/* No centring, no scaling */
-/* a: 2-dim array, dim n x p*/
-/* naposi: 2-dim array, dim n x ...*/
-/* nbna: array of nb of NAs for each individual*/
-void dotProd(unsigned char **a, int *n, int *p, int **naposi, int *nbna){
-	/* define variables, allocate memory */
+/*
+   =========================
+   === TESTING FUNCTIONS ===
+   =========================
+*/
 
-
-	/* free memory */
-	
+/* Simple test function */
+/* Test: increases for a raw (unsigned char) vector */
+void testRaw(unsigned char *a, int *n){
+	int i;
+	for(i=0; i<*n; i++){
+		a[i] = (unsigned char)(i);
+	}
 }
 
 
 
 
-
 /* TESTING in R */
 
 /*

Modified: pkg/src/SNPbin.h
===================================================================
--- pkg/src/SNPbin.h	2011-02-04 12:35:04 UTC (rev 789)
+++ pkg/src/SNPbin.h	2011-02-08 14:51:16 UTC (rev 790)
@@ -9,7 +9,9 @@
 /* EXTERNAL */
 void binIntToBytes(int *vecsnp, int *vecsize, unsigned char *vecres, int *ressize);
 void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
+void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres);
 
 /* INTERNAL */
 void byteToBinInt(unsigned char in, int out[8]);
 void testRaw(unsigned char *a, int *n);
+double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double*sd);



More information about the adegenet-commits mailing list