[adegenet-commits] r745 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Dec 24 18:06:03 CET 2010


Author: jombart
Date: 2010-12-24 18:06:03 +0100 (Fri, 24 Dec 2010)
New Revision: 745

Modified:
   pkg/R/SNPbin.R
Log:
Adapted the class for any level of ploidy.


Modified: pkg/R/SNPbin.R
===================================================================
--- pkg/R/SNPbin.R	2010-12-22 17:44:21 UTC (rev 744)
+++ pkg/R/SNPbin.R	2010-12-24 17:06:03 UTC (rev 745)
@@ -9,11 +9,12 @@
 ###############
 ## SNPbin class
 ###############
-setClass("SNPbin", representation(snp = "raw",
+setClass("SNPbin", representation(snp = "list",
                                   n.loc = "integer",
                                   NA.posi = "integer",
-                                  label = "charOrNULL"),
-         prototype(snp = raw(0), n.loc = integer(0), label = NULL))
+                                  label = "charOrNULL",
+                                  ploidy = "integer"),
+         prototype(snp = list(0), n.loc = integer(0), label = NULL, ploidy = 1L))
 
 
 
@@ -24,8 +25,9 @@
 setClass("genlight", representation(tab = "list",
                                     n.loc = "integer",
                                     ind.names = "charOrNULL",
-                                    loc.names = "charOrNULL"),
-         prototype(tab = list(0), n.loc = integer(0), ind.names = NULL, loc.names = NULL))
+                                    loc.names = "charOrNULL",
+                                    ploidy = "integer"),
+         prototype(tab = list(0), n.loc = integer(0), ind.names = NULL, loc.names = NULL, ploidy = 1L))
 
 
 
@@ -51,21 +53,53 @@
 
     ## handle snp data ##
     if(!is.null(input$snp)){
+        ## a vector of raw is provided
         if(is.raw(input$snp)){
-            x at snp <-input$snp
+            x at snp <-list(input$snp)
         }
 
+        ## a list of raw vectors is provided
+        if(is.list(input$snp)){
+            if(all(sapply(input$snp, class)=="raw")){
+                x at snp <- input$snp
+            }
+        }
+
+        ## a numeric/integer vector is provided
         ## conversion from a vector of 0/1 (integers)
         if(is.numeric(input$snp) | is.integer(input$snp)){
-            temp <- na.omit(unique(input$snp))
-            if(!all(temp %in% c(0,1))){
-                stop("Values of SNPs are not all 0, 1, or NA")
+            input$snp <- as.integer(input$snp)
+            ## determine ploidy
+            if(is.null(input$ploidy)){
+                input$ploidy <- max(input$snp, na.rm=TRUE)
             }
+            input$ploidy <- as.integer(input$ploidy)
+            if(input$ploidy<1) stop("Ploidy is less than 1")
 
-            temp <- .bin2raw(input$snp)
-            x at snp <- temp$snp
-            x at n.loc <- temp$n.loc
-            x at NA.posi <- temp$NA.posi
+            ## check values in the vector
+            if(any(input$snp<0 | input$snp>input$ploidy, na.rm=TRUE)){
+                stop("Values of SNPs < 0 or > ploidy")
+            }
+
+
+            ## handle ploidy (may have to split info into binary vectors)
+            x at snp <- list()
+            i <- max(input$snp, na.rm=TRUE) # determine max nb of alleles
+            if(i > 1){ # haplotype can be 0/1/2/...
+                j <- 0 # index for the length of the list @snp
+                while(i > 0){
+                    j <- j+1 # update length of the result
+                    temp <- as.integer(input$snp==i)
+                    x at snp[[j]] <- .bin2raw(temp)$snp # make a vector of 1
+                    input$snp <- input$snp-temp # deflate data (subtract the recoded alleles)
+                    i <- max(input$snp, na.rm=TRUE) # update the max nb of alleles
+                }
+            } else { # haplotype is only 0/1/NA
+                x at snp[[1]] <- .bin2raw(input$snp)$snp
+            }
+            x at n.loc <- length(input$snp)
+            x at NA.posi <- which(is.na(input$snp))
+            x at ploidy <- input$ploidy
             return(x)
         }
     }
@@ -84,6 +118,12 @@
         x at NA.posi <- as.integer(input$NA.posi)
     }
 
+
+    ## handle ploidy ##
+    if(!is.null(input$ploidy)){
+        x at ploidy <- as.integer(input$ploidy)
+    }
+
     return(x)
 }) # end SNPbin constructor
 
@@ -111,12 +151,13 @@
 ###############
 setMethod ("show", "SNPbin", function(object){
     cat(" === S4 class SNPbin ===")
-    if(!is.null(x at label)) {
-        cat("\n", x at label)
+    if(!is.null(object at label)) {
+        cat("\n", object at label)
     }
     cat("\n", nLoc(object), "SNPs coded as bits")
+    cat("\n Ploidy:", object at ploidy)
     temp <- round(length(object at NA.posi)/nLoc(object) *100,2)
-    cat("\n", length(object at NA.posi), " (", temp," %) missing data\n", sep="")
+    cat("\n ", length(object at NA.posi), " (", temp," %) missing data\n", sep="")
 }) # end show method
 
 
@@ -130,6 +171,11 @@
 })
 
 
+setMethod("nLoc","genlight", function(x,...){
+    return(x at n.loc)
+})
+
+
 setMethod("$","SNPbin",function(x,name) {
     return(slot(x,name))
 })
@@ -160,7 +206,7 @@
 setMethod("[", signature(x="SNPbin", i="ANY"), function(x, i) {
     if (missing(i)) i <- TRUE
     temp <- .SNPbin2int(x) # data as integers with NAs
-    x <- new("SNPbin", snp=temp[i], label=x at label)
+    x <- new("SNPbin", snp=temp[i], label=x at label, ploidy=x at ploidy)
     return(x)
 }) # end [] for SNPbin
 
@@ -182,9 +228,7 @@
 ## each byte takes a value on [0,255]
 
 ## function to code multiple SNPs on a byte
-## 8 combinations of SNPs can be coded on a single byte
-## we use bytes values from [1,128]
-## 200 is a missing value
+## 8 combinations of SNPs can be coded onto a single byte (0->255)
 .bin2raw <- function(vecSnp){
     ## was required in pure-R version
     ## SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
@@ -221,15 +265,28 @@
 
 
 
+###########
+## .raw2bin
+###########
+## convert vector of raw to 0/1 integers
+.raw2bin <- function(x){
+    SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
+    colnames(SNPCOMB) <- NULL
+    res <- unlist(lapply(as.integer(x), function(i) SNPCOMB[i+1,]))
+} # end .raw2bin
+
+
+
+
+
 #############
 ## .SNPbin2int
 #############
 ## convert SNPbin to integers (0/1)
 .SNPbin2int <- function(x){
-    SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
-    colnames(SNPCOMB) <- NULL
-    res <- unlist(lapply(as.integer(x at snp), function(i) SNPCOMB[i+1,]))
-    res <- res[1:x at n.loc]
+    res <- lapply(x at snp, .raw2bin)
+    res <- lapply(res, function(e) e[1:x at n.loc])
+    res <- as.integer(Reduce("+", res))
     if(length(x at NA.posi)>0){
         res[x at NA.posi] <- NA
     }
@@ -268,9 +325,34 @@
 ################################
 ## testing :
 ##
-## dat <- sample(c(0,1,NA), 1e6, prob=c(.5, .495, .005), replace=TRUE)
+## HAPLOID DATA
+##
+## library(adegenet)
+
+## dat <- sample(c(0L,1L,NA), 1e6, prob=c(.5, .495, .005), replace=TRUE)
 ## x <- new("SNPbin", dat)
 
+## identical(as(x, "integer"),dat) # SHOULD NORMALLY BE TRUE
+## all(as(x, "integer") == dat, na.rm=TRUE) # MUST BE TRUE
+
+## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
+
+
+
+ ## DIPLOID DATA
+## dat <- sample(c(0:2,NA), 1e6, prob=c(.4, .4, .195 ,.005), replace=TRUE)
+## x <- new("SNPbin", dat)
+
 ## identical(as(x, "integer"),dat) # MUST BE TRUE
 
 ## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
+
+
+
+ ## POLYLOID DATA
+## dat <- sample(c(0:5,NA), 1e6, prob=c(rep(.995/6,6), 0.005), replace=TRUE)
+##  x <- new("SNPbin", dat)
+
+## identical(as(x, "integer"),dat) # MUST BE TRUE
+
+## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION



More information about the adegenet-commits mailing list