[adegenet-commits] r745 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Dec 24 18:06:03 CET 2010
Author: jombart
Date: 2010-12-24 18:06:03 +0100 (Fri, 24 Dec 2010)
New Revision: 745
Modified:
pkg/R/SNPbin.R
Log:
Adapted the class for any level of ploidy.
Modified: pkg/R/SNPbin.R
===================================================================
--- pkg/R/SNPbin.R 2010-12-22 17:44:21 UTC (rev 744)
+++ pkg/R/SNPbin.R 2010-12-24 17:06:03 UTC (rev 745)
@@ -9,11 +9,12 @@
###############
## SNPbin class
###############
-setClass("SNPbin", representation(snp = "raw",
+setClass("SNPbin", representation(snp = "list",
n.loc = "integer",
NA.posi = "integer",
- label = "charOrNULL"),
- prototype(snp = raw(0), n.loc = integer(0), label = NULL))
+ label = "charOrNULL",
+ ploidy = "integer"),
+ prototype(snp = list(0), n.loc = integer(0), label = NULL, ploidy = 1L))
@@ -24,8 +25,9 @@
setClass("genlight", representation(tab = "list",
n.loc = "integer",
ind.names = "charOrNULL",
- loc.names = "charOrNULL"),
- prototype(tab = list(0), n.loc = integer(0), ind.names = NULL, loc.names = NULL))
+ loc.names = "charOrNULL",
+ ploidy = "integer"),
+ prototype(tab = list(0), n.loc = integer(0), ind.names = NULL, loc.names = NULL, ploidy = 1L))
@@ -51,21 +53,53 @@
## handle snp data ##
if(!is.null(input$snp)){
+ ## a vector of raw is provided
if(is.raw(input$snp)){
- x at snp <-input$snp
+ x at snp <-list(input$snp)
}
+ ## a list of raw vectors is provided
+ if(is.list(input$snp)){
+ if(all(sapply(input$snp, class)=="raw")){
+ x at snp <- input$snp
+ }
+ }
+
+ ## a numeric/integer vector is provided
## conversion from a vector of 0/1 (integers)
if(is.numeric(input$snp) | is.integer(input$snp)){
- temp <- na.omit(unique(input$snp))
- if(!all(temp %in% c(0,1))){
- stop("Values of SNPs are not all 0, 1, or NA")
+ input$snp <- as.integer(input$snp)
+ ## determine ploidy
+ if(is.null(input$ploidy)){
+ input$ploidy <- max(input$snp, na.rm=TRUE)
}
+ input$ploidy <- as.integer(input$ploidy)
+ if(input$ploidy<1) stop("Ploidy is less than 1")
- temp <- .bin2raw(input$snp)
- x at snp <- temp$snp
- x at n.loc <- temp$n.loc
- x at NA.posi <- temp$NA.posi
+ ## check values in the vector
+ if(any(input$snp<0 | input$snp>input$ploidy, na.rm=TRUE)){
+ stop("Values of SNPs < 0 or > ploidy")
+ }
+
+
+ ## handle ploidy (may have to split info into binary vectors)
+ x at snp <- list()
+ i <- max(input$snp, na.rm=TRUE) # determine max nb of alleles
+ if(i > 1){ # haplotype can be 0/1/2/...
+ j <- 0 # index for the length of the list @snp
+ while(i > 0){
+ j <- j+1 # update length of the result
+ temp <- as.integer(input$snp==i)
+ x at snp[[j]] <- .bin2raw(temp)$snp # make a vector of 1
+ input$snp <- input$snp-temp # deflate data (subtract the recoded alleles)
+ i <- max(input$snp, na.rm=TRUE) # update the max nb of alleles
+ }
+ } else { # haplotype is only 0/1/NA
+ x at snp[[1]] <- .bin2raw(input$snp)$snp
+ }
+ x at n.loc <- length(input$snp)
+ x at NA.posi <- which(is.na(input$snp))
+ x at ploidy <- input$ploidy
return(x)
}
}
@@ -84,6 +118,12 @@
x at NA.posi <- as.integer(input$NA.posi)
}
+
+ ## handle ploidy ##
+ if(!is.null(input$ploidy)){
+ x at ploidy <- as.integer(input$ploidy)
+ }
+
return(x)
}) # end SNPbin constructor
@@ -111,12 +151,13 @@
###############
setMethod ("show", "SNPbin", function(object){
cat(" === S4 class SNPbin ===")
- if(!is.null(x at label)) {
- cat("\n", x at label)
+ if(!is.null(object at label)) {
+ cat("\n", object at label)
}
cat("\n", nLoc(object), "SNPs coded as bits")
+ cat("\n Ploidy:", object at ploidy)
temp <- round(length(object at NA.posi)/nLoc(object) *100,2)
- cat("\n", length(object at NA.posi), " (", temp," %) missing data\n", sep="")
+ cat("\n ", length(object at NA.posi), " (", temp," %) missing data\n", sep="")
}) # end show method
@@ -130,6 +171,11 @@
})
+setMethod("nLoc","genlight", function(x,...){
+ return(x at n.loc)
+})
+
+
setMethod("$","SNPbin",function(x,name) {
return(slot(x,name))
})
@@ -160,7 +206,7 @@
setMethod("[", signature(x="SNPbin", i="ANY"), function(x, i) {
if (missing(i)) i <- TRUE
temp <- .SNPbin2int(x) # data as integers with NAs
- x <- new("SNPbin", snp=temp[i], label=x at label)
+ x <- new("SNPbin", snp=temp[i], label=x at label, ploidy=x at ploidy)
return(x)
}) # end [] for SNPbin
@@ -182,9 +228,7 @@
## each byte takes a value on [0,255]
## function to code multiple SNPs on a byte
-## 8 combinations of SNPs can be coded on a single byte
-## we use bytes values from [1,128]
-## 200 is a missing value
+## 8 combinations of SNPs can be coded onto a single byte (0->255)
.bin2raw <- function(vecSnp){
## was required in pure-R version
## SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
@@ -221,15 +265,28 @@
+###########
+## .raw2bin
+###########
+## convert vector of raw to 0/1 integers
+.raw2bin <- function(x){
+ SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
+ colnames(SNPCOMB) <- NULL
+ res <- unlist(lapply(as.integer(x), function(i) SNPCOMB[i+1,]))
+} # end .raw2bin
+
+
+
+
+
#############
## .SNPbin2int
#############
## convert SNPbin to integers (0/1)
.SNPbin2int <- function(x){
- SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
- colnames(SNPCOMB) <- NULL
- res <- unlist(lapply(as.integer(x at snp), function(i) SNPCOMB[i+1,]))
- res <- res[1:x at n.loc]
+ res <- lapply(x at snp, .raw2bin)
+ res <- lapply(res, function(e) e[1:x at n.loc])
+ res <- as.integer(Reduce("+", res))
if(length(x at NA.posi)>0){
res[x at NA.posi] <- NA
}
@@ -268,9 +325,34 @@
################################
## testing :
##
-## dat <- sample(c(0,1,NA), 1e6, prob=c(.5, .495, .005), replace=TRUE)
+## HAPLOID DATA
+##
+## library(adegenet)
+
+## dat <- sample(c(0L,1L,NA), 1e6, prob=c(.5, .495, .005), replace=TRUE)
## x <- new("SNPbin", dat)
+## identical(as(x, "integer"),dat) # SHOULD NORMALLY BE TRUE
+## all(as(x, "integer") == dat, na.rm=TRUE) # MUST BE TRUE
+
+## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
+
+
+
+ ## DIPLOID DATA
+## dat <- sample(c(0:2,NA), 1e6, prob=c(.4, .4, .195 ,.005), replace=TRUE)
+## x <- new("SNPbin", dat)
+
## identical(as(x, "integer"),dat) # MUST BE TRUE
## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
+
+
+
+ ## POLYLOID DATA
+## dat <- sample(c(0:5,NA), 1e6, prob=c(rep(.995/6,6), 0.005), replace=TRUE)
+## x <- new("SNPbin", dat)
+
+## identical(as(x, "integer"),dat) # MUST BE TRUE
+
+## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
More information about the adegenet-commits
mailing list