[adegenet-commits] r799 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Feb 9 14:16:29 CET 2011
Author: jombart
Date: 2011-02-09 14:16:29 +0100 (Wed, 09 Feb 2011)
New Revision: 799
Added:
pkg/src/GLfunctions.c
pkg/src/GLfunctions.h
Modified:
pkg/src/snpbin.c
pkg/src/snpbin.h
Log:
Reorganised the code.
Added: pkg/src/GLfunctions.c
===================================================================
--- pkg/src/GLfunctions.c (rev 0)
+++ pkg/src/GLfunctions.c 2011-02-09 13:16:29 UTC (rev 799)
@@ -0,0 +1,129 @@
+/*
+ Coded by Thibaut Jombart (tjombart at imperial.ac.uk), December 2010.
+ Distributed with the adephylo package for the R software.
+ Licence: GPL >=2.
+
+ Functions based on snpbin and genlightC classes, which mirror the R classes SNPbin and genlight on the C side.
+*/
+
+
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "snpbin.h"
+
+
+
+/* Function to compute all dot products between individuals */
+/* centring and scaling is always used */
+/* but need to pass vectors of 0 and 1*/
+void GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy, double *mean, double *sd, double *res){
+ struct genlightC dat;
+ int i, j, k=0;
+
+ /* Check variance vector: do not divide by 0 */
+ for(i=0;i< *nloc;i++){
+ if(sd[i] < NEARZERO){
+ sd[i] = 1;
+ }
+ }
+
+ dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc, ploidy);
+
+ /* Lower triangle - without the diagonal */
+ for(i=0; i< (*nind-1); i++){
+ for(j=i+1; j< *nind; j++){
+ /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
+ res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd);
+ ++k;
+ }
+ }
+
+ /* add the diagonal to the end of the array */
+ for(i=0; i< *nind; i++){
+ /* printf("\n == pair %i-%i == \n", i+1,i+1); */
+ res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd);
+ ++k;
+ }
+}
+
+
+
+
+
+
+/* TESTING in R */
+
+/*
+
+## === DOT PRODUCTS === ##
+
+library(adegenet)
+library(ade4)
+dat <- rbind("a"=c(1,0,0), "b"=c(1,2,1), "c"=c(1,0,1))
+x <- new("genlight",dat)
+
+
+## NOT CENTRED, NOT SCALED
+glDotProd(x)
+
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+res2
+
+## DATA > 8 SNPs
+dat <- rbind(rep(c(1,0,1), c(8,10,5)))
+x <- new("genlight",dat)
+glDotProd(x)
+
+
+## RANDOM DATA
+dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
+x <- new("genlight",dat)
+res1 <- glDotProd(x)
+
+res2 <- as.matrix(x) %*% t(as.matrix(x))
+
+all(res1==res2)
+
+
+## CENTRED, NOT SCALED
+res1 <- glDotProd(x, cent=TRUE)
+
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=FALSE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+## CENTRED, SCALED
+res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
+
+temp <- as.matrix(x) / ploidy(x)
+temp <- scalewt(temp, cent=TRUE, scale=TRUE)
+res2 <- temp %*% t(temp)
+res2
+
+all(abs(res1-res2)<1e-10)
+
+
+## TEST WITH NAs
+library(adegenet)
+library(ade4)
+
+dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
+
+x <- new("genlight", dat) # conversion
+x
+
+res1 <- glDotProd(x)
+t(data.frame(dat))
+res1
+
+
+*/
+
Added: pkg/src/GLfunctions.h
===================================================================
--- pkg/src/GLfunctions.h (rev 0)
+++ pkg/src/GLfunctions.h 2011-02-09 13:16:29 UTC (rev 799)
@@ -0,0 +1,5 @@
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
Modified: pkg/src/snpbin.c
===================================================================
--- pkg/src/snpbin.c 2011-02-09 13:03:57 UTC (rev 798)
+++ pkg/src/snpbin.c 2011-02-09 13:16:29 UTC (rev 799)
@@ -22,6 +22,8 @@
#include <time.h>
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
+#include "adesub.h"
#include "snpbin.h"
/* #define NEARZERO 0.0000000001 */
@@ -50,28 +52,6 @@
-/* struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) { */
-/* struct snpbin out; */
-/* int i; */
-
-/* out.bytevec = bytevec; */
-/* out.byteveclength = byteveclength; */
-/* out.bytevecnb = bytevecnb; */
-/* out.nloc = nloc; */
-/* out.nanb = nanb; */
-/* /\* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] *\/ */
-/* out.naposi = naposi; */
-/* if(*nanb > 0){ */
-/* for(i=0;i< *nanb; i++){ */
-/* out.naposi[i] = out.naposi[i] - 1; */
-/* } */
-/* } */
-/* out.ploidy = ploidy; */
-/* return out; */
-/* }; */
-
-
-
/* struct genlightC{ */
/* struct snpbin *x; */
/* int *nind; */
@@ -88,6 +68,27 @@
*/
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) {
+ struct snpbin out;
+ int i;
+
+ out.bytevec = bytevec;
+ out.byteveclength = byteveclength;
+ out.bytevecnb = bytevecnb;
+ out.nloc = nloc;
+ out.nanb = nanb;
+ /* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
+ out.naposi = naposi;
+ if(*nanb > 0){
+ for(i=0;i< *nanb; i++){
+ out.naposi[i] = out.naposi[i] - 1;
+ }
+ }
+ out.ploidy = ploidy;
+ return out;
+};
+
+
/* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
void byteToBinInt(unsigned char in, int *out){
short int rest, i, temp;
Modified: pkg/src/snpbin.h
===================================================================
--- pkg/src/snpbin.h 2011-02-09 13:03:57 UTC (rev 798)
+++ pkg/src/snpbin.h 2011-02-09 13:16:29 UTC (rev 799)
@@ -2,21 +2,28 @@
#include <time.h>
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
-#define NEARZERO 0.0000000001 */
+#define NEARZERO 0.0000000001
#define TRUE 1
#define FALSE 0
typedef short bool;
+
/*
=========================
=== CLASS DEFINITIONS ===
=========================
*/
+/* 'bytevecnb' arrays of bytes concatenated into a single array */
+/* of dim 'byteveclength' x 'bytevecnb' */
+/* nloc is the number of SNPs - used for recoding to integers */
+/* naposi indicates the positions of NAs */
+/* nanb is the length of naposi */
struct snpbin{
unsigned char *bytevec;
int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi, *ploidy; /* all but naposi have length 1 */
@@ -24,29 +31,6 @@
-
-struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) {
- struct snpbin out;
- int i;
-
- out.bytevec = bytevec;
- out.byteveclength = byteveclength;
- out.bytevecnb = bytevecnb;
- out.nloc = nloc;
- out.nanb = nanb;
- /* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
- out.naposi = naposi;
- if(*nanb > 0){
- for(i=0;i< *nanb; i++){
- out.naposi[i] = out.naposi[i] - 1;
- }
- }
- out.ploidy = ploidy;
- return out;
-};
-
-
-
struct genlightC{
struct snpbin *x;
int *nind;
@@ -61,19 +45,14 @@
*/
-/* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
void byteToBinInt(unsigned char in, int *out);
-
-
-/* Maps an array of values from 0-255 to sequences of 8 binary values */
-/* Input are unsigned char (hexadecimal), outputs are integers */
void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy);
-
/*
===============================
=== MAIN EXTERNAL FUNCTIONS ===
More information about the adegenet-commits
mailing list