[adegenet-commits] r797 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Feb 9 13:58:31 CET 2011
Author: jombart
Date: 2011-02-09 13:58:31 +0100 (Wed, 09 Feb 2011)
New Revision: 797
Modified:
pkg/src/SNPbin.c
pkg/src/SNPbin.h
Log:
reorganising the code.
Modified: pkg/src/SNPbin.c
===================================================================
--- pkg/src/SNPbin.c 2011-02-09 12:18:50 UTC (rev 796)
+++ pkg/src/SNPbin.c 2011-02-09 12:58:31 UTC (rev 797)
@@ -22,58 +22,60 @@
#include <time.h>
#include <string.h>
#include <stdlib.h>
-#include <R.h>
-#include <R_ext/Utils.h>
-#include "adesub.h"
+#include "snpbin.h"
-#define NEARZERO 0.0000000001
+/* #define NEARZERO 0.0000000001 */
+/* #define TRUE 1 */
+/* #define FALSE 0 */
+/* typedef short bool; */
-/*
- ========================
- === CLASS DEFINITION ===
- ========================
-*/
+/* /\* */
+/* ========================= */
+/* === CLASS DEFINITIONS === */
+/* ========================= */
+/* *\/ */
-/* 'bytevecnb' arrays of bytes concatenated into a single array */
-/* of dim 'byteveclength' x 'bytevecnb' */
-/* nloc is the number of SNPs - used for recoding to integers */
-/* naposi indicates the positions of NAs */
-/* nanb is the length of naposi */
+/* /\* 'bytevecnb' arrays of bytes concatenated into a single array *\/ */
+/* /\* of dim 'byteveclength' x 'bytevecnb' *\/ */
+/* /\* nloc is the number of SNPs - used for recoding to integers *\/ */
+/* /\* naposi indicates the positions of NAs *\/ */
+/* /\* nanb is the length of naposi *\/ */
-struct snpbin{
- unsigned char *bytevec;
- int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi; /* all but naposi have length 1 */
-};
+/* struct snpbin{ */
+/* unsigned char *bytevec; */
+/* int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi, *ploidy; /\* all but naposi have length 1 *\/ */
+/* }; */
-struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi) {
- struct snpbin out;
- int i;
+/* struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) { */
+/* struct snpbin out; */
+/* int i; */
- out.bytevec = bytevec;
- out.byteveclength = byteveclength;
- out.bytevecnb = bytevecnb;
- out.nloc = nloc;
- out.nanb = nanb;
- /* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
- out.naposi = naposi;
- if(*nanb > 0){
- for(i=0;i< *nanb; i++){
- out.naposi[i] = out.naposi[i] - 1;
- }
- }
- return out;
-};
+/* out.bytevec = bytevec; */
+/* out.byteveclength = byteveclength; */
+/* out.bytevecnb = bytevecnb; */
+/* out.nloc = nloc; */
+/* out.nanb = nanb; */
+/* /\* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] *\/ */
+/* out.naposi = naposi; */
+/* if(*nanb > 0){ */
+/* for(i=0;i< *nanb; i++){ */
+/* out.naposi[i] = out.naposi[i] - 1; */
+/* } */
+/* } */
+/* out.ploidy = ploidy; */
+/* return out; */
+/* }; */
-struct genlightC{
- struct snpbin *x;
- int *nind;
-};
+/* struct genlightC{ */
+/* struct snpbin *x; */
+/* int *nind; */
+/* }; */
@@ -241,16 +243,40 @@
}
+int ploidy(struct snpbin *x){
+ return *(x->ploidy);
+}
+
+
+
+
/* transform a snpbin into a vector of integers */
void snpbin2intvec(struct snpbin *x, int *out){
bytesToInt(x->bytevec, x->byteveclength, x->bytevecnb, out);
-/*reminders:
--void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres){
+/*reminders:
+- void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres){
- snpbin: unsigned char *bytevec; int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi; */
}
+
+/* transform a snpbin into a vector of frequencies (double) */
+void snpbin2freq(struct snpbin *x, double *out){
+ double ploid = (double) ploidy(x);
+ int i;
+ bytesToInt(x->bytevec, x->byteveclength, x->bytevecnb, (int *) out);
+ out = (double *) out; /* casting back to double */
+ for(i=0; i< nLoc(x); i++){
+ out[i] = out[i] / ploid;
+ }
+/*reminders:
+- void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres){
+- snpbin: unsigned char *bytevec; int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi; */
+}
+
+
+
/* print a snpbin object - used for debugging */
void printsnpbin(struct snpbin *x){
int i, *temp;
@@ -341,7 +367,7 @@
/* Function to convert a 'genlight' object (R side) into an array of 'snpbin' (C side) */
/* Each component of the genlight is concatenated into a single vector */
/* and then used to create different 'snpbin' on the C side */
-struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc){
+struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy){
/* declare variables and allocate memory */
int i, j, idxByteVec=0, idxNAVec=0;
struct genlightC out;
@@ -350,7 +376,7 @@
/* create the list of snpbin */
/* printf("\n nind: %d\n", *nind); */
for(i=0; i < *nind; i++){
- out.x[i] = makesnpbin(&gen[idxByteVec], byteveclength, &nbvecperind[i], nloc, &nbnaperind[i], &naposi[idxNAVec]);
+ out.x[i] = makesnpbin(&gen[idxByteVec], byteveclength, &nbvecperind[i], nloc, &nbnaperind[i], &naposi[idxNAVec], &ploidy[i]);
idxByteVec += *byteveclength * nbvecperind[i]; /* update index in byte array */
idxNAVec += nbnaperind[i]; /* update index in byte array */
/* printf("\nimported genotype %i: ", i+1); */
@@ -368,43 +394,9 @@
-/* Function to compute all dot products between individuals */
-/* centring and scaling is always used */
-/* but need to pass vectors of 0 and 1*/
-void GLdotProd(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, double *mean, double *sd, double *res){
- struct genlightC dat;
- int i, j, k=0;
- /* Check variance vector: do not divide by 0 */
- for(i=0;i< *nloc;i++){
- if(sd[i] < NEARZERO){
- sd[i] = 1;
- }
- }
- dat = genlightTogenlightC(gen, nbvecperind, byteveclength, nbnaperind, naposi, nind, nloc);
- /* Lower triangle - without the diagonal */
- for(i=0; i< (*nind-1); i++){
- for(j=i+1; j< *nind; j++){
- /* printf("\n == pair %i-%i ==\n", i+1,j+1); */
- res[k] = snpbin_dotprod(&dat.x[i], &dat.x[j], mean, sd);
- ++k;
- }
- }
-
- /* add the diagonal to the end of the array */
- for(i=0; i< *nind; i++){
- /* printf("\n == pair %i-%i == \n", i+1,i+1); */
- res[k] = snpbin_dotprod(&dat.x[i], &dat.x[i], mean, sd);
- ++k;
- }
-}
-
-
-
-
-
/*
=========================
=== TESTING FUNCTIONS ===
@@ -441,75 +433,5 @@
.C("bytesToInt",as.raw(c(12,11)), 1L, 2L, integer(8), PACKAGE="adegenet")
-## === DOT PRODUCTS === ##
-
-#### NO LONGER NEEDED ####
-
-library(adegenet)
-library(ade4)
-dat <- rbind("a"=c(1,0,0), "b"=c(1,2,1), "c"=c(1,0,1))
-x <- new("genlight",dat)
-
-
-## NOT CENTRED, NOT SCALED
-glDotProd(x)
-
-res2 <- as.matrix(x) %*% t(as.matrix(x))
-res2
-
-## DATA > 8 SNPs
-dat <- rbind(rep(c(1,0,1), c(8,10,5)))
-x <- new("genlight",dat)
-glDotProd(x)
-
-
-## RANDOM DATA
-dat <- matrix(sample(0:1, 5*1000, replace=TRUE), nrow=5)
-x <- new("genlight",dat)
-res1 <- glDotProd(x)
-
-res2 <- as.matrix(x) %*% t(as.matrix(x))
-
-all(res1==res2)
-
-
-## CENTRED, NOT SCALED
-res1 <- glDotProd(x, cent=TRUE)
-
-temp <- as.matrix(x) / ploidy(x)
-temp <- scalewt(temp, cent=TRUE, scale=FALSE)
-res2 <- temp %*% t(temp)
-res2
-
-all(abs(res1-res2)<1e-10)
-
-
-## CENTRED, SCALED
-res1 <- glDotProd(x, cent=TRUE, scale=TRUE)
-
-temp <- as.matrix(x) / ploidy(x)
-temp <- scalewt(temp, cent=TRUE, scale=TRUE)
-res2 <- temp %*% t(temp)
-res2
-
-all(abs(res1-res2)<1e-10)
-
-
-
-
-## TEST WITH NAs
-library(adegenet)
-library(ade4)
-
-dat <- list(a=c(1,NA,0,0,2), b=c(1,2,3,4,0), c=c(NA,0,1,NA,2))
-
-x <- new("genlight", dat) # conversion
-x
-
-res1 <- glDotProd(x)
-t(data.frame(dat))
-res1
-
-
*/
Modified: pkg/src/SNPbin.h
===================================================================
--- pkg/src/SNPbin.h 2011-02-09 12:18:50 UTC (rev 796)
+++ pkg/src/SNPbin.h 2011-02-09 12:58:31 UTC (rev 797)
@@ -2,16 +2,118 @@
#include <time.h>
#include <string.h>
#include <stdlib.h>
-#include <R.h>
-#include <R_ext/Utils.h>
-#include "adesub.h"
-/* EXTERNAL */
-void binIntToBytes(int *vecsnp, int *vecsize, unsigned char *vecres, int *ressize);
+
+#define NEARZERO 0.0000000001 */
+#define TRUE 1
+#define FALSE 0
+
+typedef short bool;
+
+
+/*
+ =========================
+ === CLASS DEFINITIONS ===
+ =========================
+*/
+
+struct snpbin{
+ unsigned char *bytevec;
+ int *byteveclength, *bytevecnb, *nloc, *nanb, *naposi, *ploidy; /* all but naposi have length 1 */
+};
+
+
+
+
+struct snpbin makesnpbin(unsigned char *bytevec, int *byteveclength, int *bytevecnb, int *nloc, int *nanb, int *naposi, int *ploidy) {
+ struct snpbin out;
+ int i;
+
+ out.bytevec = bytevec;
+ out.byteveclength = byteveclength;
+ out.bytevecnb = bytevecnb;
+ out.nloc = nloc;
+ out.nanb = nanb;
+ /* need to decrease the indices of NAs by 1, e.g. [1-10]->[0-9] */
+ out.naposi = naposi;
+ if(*nanb > 0){
+ for(i=0;i< *nanb; i++){
+ out.naposi[i] = out.naposi[i] - 1;
+ }
+ }
+ out.ploidy = ploidy;
+ return out;
+};
+
+
+
+struct genlightC{
+ struct snpbin *x;
+ int *nind;
+};
+
+
+
+/*
+ ===========================
+ === AUXILIARY FUNCTIONS ===
+ ===========================
+*/
+
+
+/* Maps one byte from 0-255 to sequences of 8 (binary) integers values */
+void byteToBinInt(unsigned char in, int *out);
+
+
+/* Maps an array of values from 0-255 to sequences of 8 binary values */
+/* Input are unsigned char (hexadecimal), outputs are integers */
void bytesToBinInt(unsigned char *vecbytes, int *vecsize, int *vecres);
+
+
+
+
+
+
+/*
+ ===============================
+ === MAIN EXTERNAL FUNCTIONS ===
+ ===============================
+*/
+
void bytesToInt(unsigned char *vecbytes, int *veclength, int *nbvec, int *vecres);
+void binIntToBytes(int *vecsnp, int *vecsize, unsigned char *vecres, int *ressize);
-/* INTERNAL */
-void byteToBinInt(unsigned char in, int out[8]);
+
+
+
+
+/*
+ =====================
+ === CLASS METHODS ===
+ =====================
+*/
+
+int nLoc(struct snpbin *x);
+int ploidy(struct snpbin *x);
+void snpbin2intvec(struct snpbin *x, int *out);
+void snpbin2freq(struct snpbin *x, double *out);
+void printsnpbin(struct snpbin *x);
+short int snpbin_isna(struct snpbin *x, int i);
+double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double *sd);
+struct genlightC genlightTogenlightC(unsigned char *gen, int *nbvecperind, int *byteveclength, int *nbnaperind, int *naposi, int *nind, int *nloc, int *ploidy);
+
+
+
+
+
+
+
+
+/*
+ =========================
+ === TESTING FUNCTIONS ===
+ =========================
+*/
+
+
void testRaw(unsigned char *a, int *n);
-double snpbin_dotprod(struct snpbin *x, struct snpbin *y, double *mean, double*sd);
More information about the adegenet-commits
mailing list