[adegenet-commits] r799 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Feb 9 14:16:29 CET 2011


Author: jombart
Date: 2011-02-09 14:16:29 +0100 (Wed, 09 Feb 2011)
New Revision: 799

Added:
   pkg/src/GLfunctions.c
   pkg/src/GLfunctions.h
Modified:
   pkg/src/snpbin.c
   pkg/src/snpbin.h
Log:
Reorganised the code.


Added: pkg/src/GLfunctions.c
===================================================================
--- pkg/src/GLfunctions.c	                        (rev 0)
+++ pkg/src/GLfunctions.c	2011-02-09 13:16:29 UTC (rev 799)
@@ -0,0 +1,129 @@
+/*
+  Coded by Thibaut Jombart (tjombart at imperial.ac.uk), December 2010.
+  Distributed with the adephylo package for the R software.
+  Licence: GPL >=2.
+
+  Functions based on snpbin and genlightC classes, which mirror the R classes SNPbin and genlight on the C side.
+*/
+
+
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "snpbin.h"
+
+
+
+/* Function to compute all dot products between individuals */
+/* centring and scaling is always used */
+/* but need to pass vectors of 0 and 1*/
+void GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy, double *mean, double *sd, double *res){
+	struct genlightC dat;
+	int i, j, k=0;
+
+	/* Check variance vector: do not divide by 0 */
+	for(i=0;i< *nloc;i++){
+		if(sd[i] < NEARZERO){
+			sd[i] = 1;
+		}
+	}
+
+	dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc, ploidy);
+
+	/* Lower triangle - without the diagonal */
+	for(i=0; i< (*nind-1); i++){
+		for(j=i+1; j< *nind; j++){
+			/* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+			res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd);
+			++k;
+		}
+	}
+
+	/* add the diagonal to the end of the array */
+	for(i=0; i< *nind; i++){
+		/* printf("\n == pair %i-%i == \n", i+1,i+1); */
+		res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd);
+		++k;
+	}
+}
+
+
+
+
+
+
+/* TESTING in R */
+
+/*
+
+## === DOT PRODUCTS === ##
+
+library(adegenet)
+library(ade4)
+dat <- rbind("a"=c(1,0,0), "b"=c(1,2,1), "c"=c(1,0,1))
+x <- new("genlight",dat)
+
+
+## NOT CENTRED, NOT SCALED
+glDotProd(x)
+
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+res2
+
+## DATA > 8 SNPs
+dat <- rbind(rep(c(1,0,1), c(8,10,5)))
+x <- new("genlight",dat)
+glDotProd(x)
+
+
+## RANDOM DATA
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+res1 <- glDotProd(x)
+
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+
+all(res1==res2)
+
+
+## CENTRED, NOT SCALED
+res1 <- glDotProd(x, cent=TRUE)
+
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=FALSE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
+
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+## TEST WITH NAs
+library(adegenet)
+library(ade4)
+
+dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
+
+x <- new("genlight", dat) # conversion
+x
+
+res1 <- glDotProd(x)
+t(data.frame(dat))
+res1
+
+
+*/
+

Added: pkg/src/GLfunctions.h
===================================================================
--- pkg/src/GLfunctions.h	                        (rev 0)
+++ pkg/src/GLfunctions.h	2011-02-09 13:16:29 UTC (rev 799)
@@ -0,0 +1,5 @@
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>

Modified: pkg/src/snpbin.c
===================================================================
--- pkg/src/snpbin.c	2011-02-09 13:03:57 UTC (rev 798)
+++ pkg/src/snpbin.c	2011-02-09 13:16:29 UTC (rev 799)
@@ -22,6 +22,8 @@
 #include <time.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stdio.h>
+#include "adesub.h"
 #include "snpbin.h"
 
 /* #define NEARZERO 0.0000000001 */
@@ -50,28 +52,6 @@
 
 
 
-/* struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) { */
-/* 	struct snpbin out; */
-/* 	int i; */
-
-/* 	out.bytevec = bytevec; */
-/* 	out.byteveclength = byteveclength; */
-/* 	out.bytevecnb = bytevecnb; */
-/* 	out.nloc = nloc; */
-/* 	out.nanb = nanb; */
-/* 	/\* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] *\/ */
-/* 	out.naposi = naposi; */
-/* 	if(*nanb > 0){ */
-/* 		for(i=0;i< *nanb; i++){ */
-/* 			out.naposi[i] = out.naposi[i] - 1; */
-/* 		} */
-/* 	} */
-/* 	out.ploidy = ploidy; */
-/* 	return out; */
-/* }; */
-
-
-
 /* struct genlightC{ */
 /* 	struct snpbin *x; */
 /* 	int *nind; */
@@ -88,6 +68,27 @@
 */
 
 
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) {
+	struct snpbin out;
+	int i;
+
+	out.bytevec = bytevec;
+	out.byteveclength = byteveclength;
+	out.bytevecnb = bytevecnb;
+	out.nloc = nloc;
+	out.nanb = nanb;
+	/* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
+	out.naposi = naposi;
+	if(*nanb > 0){
+		for(i=0;i< *nanb; i++){
+			out.naposi[i] = out.naposi[i] - 1;
+		}
+	}
+	out.ploidy = ploidy;
+	return out;
+};
+
+
 /* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
 void byteToBinInt(unsigned char in, int *out){
 	short int rest, i, temp;

Modified: pkg/src/snpbin.h
===================================================================
--- pkg/src/snpbin.h	2011-02-09 13:03:57 UTC (rev 798)
+++ pkg/src/snpbin.h	2011-02-09 13:16:29 UTC (rev 799)
@@ -2,21 +2,28 @@
 #include <time.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stdio.h>
 
 
-#define NEARZERO 0.0000000001 */
+#define NEARZERO 0.0000000001
 #define TRUE 1
 #define FALSE 0
 
 typedef short bool;
 
 
+
 /*
    =========================
    === CLASS DEFINITIONS ===
    =========================
 */
 
+/* 'bytevecnb' arrays of bytes concatenated into a single array */
+/* of dim 'byteveclength' x 'bytevecnb' */
+/* nloc is the number of SNPs - used for recoding to integers */
+/* naposi indicates the positions of NAs */
+/* nanb is the length of naposi */
 struct snpbin{
 	unsigned char *bytevec;
 	int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi, *ploidy; /* all but naposi have length 1 */
@@ -24,29 +31,6 @@
 
 
 
-
-struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) {
-	struct snpbin out;
-	int i;
-
-	out.bytevec = bytevec;
-	out.byteveclength = byteveclength;
-	out.bytevecnb = bytevecnb;
-	out.nloc = nloc;
-	out.nanb = nanb;
-	/* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
-	out.naposi = naposi;
-	if(*nanb > 0){
-		for(i=0;i< *nanb; i++){
-			out.naposi[i] = out.naposi[i] - 1;
-		}
-	}
-	out.ploidy = ploidy;
-	return out;
-};
-
-
-
 struct genlightC{
 	struct snpbin *x;
 	int *nind;
@@ -61,19 +45,14 @@
 */
 
 
-/* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
 void byteToBinInt(unsigned char in, int *out);
-
-
-/* Maps an array of values from 0-255 to sequences of 8 binary values */
-/* Input are unsigned char (hexadecimal), outputs are integers */
 void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy);
 
 
 
 
 
-
 /*
    ===============================
    === MAIN EXTERNAL FUNCTIONS ===



More information about the adegenet-commits mailing list