[adegenet-commits] r770 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Jan 20 21:09:31 CET 2011


Author: jombart
Date: 2011-01-20 21:09:30 +0100 (Thu, 20 Jan 2011)
New Revision: 770

Modified:
   pkg/R/SNPbin.R
   pkg/R/import.R
Log:
Finished a first version of read.snp, which works pretty fine.


Modified: pkg/R/SNPbin.R
===================================================================
--- pkg/R/SNPbin.R	2011-01-19 09:44:52 UTC (rev 769)
+++ pkg/R/SNPbin.R	2011-01-20 20:09:30 UTC (rev 770)
@@ -287,7 +287,7 @@
                 warning("Inconsistent length for loc.all - ignoring this argument.")
             } else {
                 ## check string consistency (format is e.g. "a/t")
-                if(any(grep("^[[:alpha:]]{1}/[[:alpha:]]{1}$", input$loc.all) != 1:length(x at gen))){
+                if(any(grep("^[[:alpha:]]{1}/[[:alpha:]]{1}$", input$loc.all) != 1:nLoc(x at gen[[1]]))){
                     input$loc.all <- gsub("[[:space:]]","", input$loc.all)
                     warning("Miss-formed strings in loc.all (must be e.g. 'c/g') - ignoring this argument.")
                 } else {

Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2011-01-19 09:44:52 UTC (rev 769)
+++ pkg/R/import.R	2011-01-20 20:09:30 UTC (rev 770)
@@ -710,3 +710,105 @@
     return(invisible())
 }
 
+
+
+
+
+
+
+#######################
+# Function read.snp
+#######################
+read.snp <- function(file, quiet=FALSE, ...){
+    ##ext <- .readExt(file)
+    ##ext <- toupper(ext)
+    if(!quiet) cat("\n Converting data from a binary SNP data file to a genlight object... \n\n")
+
+    call <- match.call()
+
+
+    ## HANDLE THE COMMENTS ##
+    if(!quiet) cat("\n Reading comments... \n")
+
+    count <- 0L
+    i <- 0
+    while(count < 2L){
+        txt <- scan(file,what="character",sep="\n",quiet=TRUE, skip=i, nmax=1, blank.lines.skip=FALSE)
+        if(length(grep(">>>>", txt)>0)){
+            count <- count + 1L
+        }
+        i <- i+1
+    }
+
+    lines.to.skip <- i
+
+
+    ## READ GENERAL DATA (>>) ##
+    if(!quiet) cat("\n Reading general information... \n")
+
+    misc.info <- list()
+
+    txt <- scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip, nmax=1)
+
+    while(length(grep(">>", txt))>0){
+        itemName <- gsub(">>","", txt)
+        itemName <- gsub("(^[[:space:]]+)|([[:space:]]+$)", "", itemName)
+        misc.info[itemName] <- scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip + 1, nmax=1)
+        lines.to.skip <-lines.to.skip + 2
+        txt <- scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip, nmax=1)
+    }
+
+    ## transform each character string into a vector
+    misc.info <- sapply(misc.info, function(e) unlist(strsplit(e,"[[:space:]]+")))
+
+    ## READ GENOTYPE DATA ##
+    ## one genotype is read/converted at a time to spare RAM
+    if(!quiet) cat("\n Reading genotypes... \n")
+
+    res <- list() # this will be a list of SNPbin objects
+
+    txt <- scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip, nmax=1)
+
+    while(length(grep(">", txt))>0){
+        indName <- gsub(">","", txt)
+        indName <- gsub("(^[[:space:]]+)|([[:space:]]+$)", "", indName)
+        temp <- strsplit(scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip + 1, nmax=1), "")[[1]]
+        temp <- suppressWarnings(as.integer(temp))
+        res[[length(res)+1]] <- new("SNPbin", temp)
+        names(res)[length(res)] <- indName
+        lines.to.skip <-lines.to.skip + 2
+        txt <- scan(file,what="character",sep="\n",quiet=TRUE, skip=lines.to.skip, nmax=1)
+    }
+
+
+    ## CHECK CONSISTENCY ##
+    n.loc <- unique(sapply(res, nLoc))
+    n.ind <- length(res)
+
+    if(length(n.loc)>1) {
+        print(n.loc)
+        stop("Differing numbers of loci detected between individuals.")
+    }
+
+    if(!is.null(misc.info$position) && length(misc.info$position)!=n.loc) stop("vector of positions of the SNPs does not match the number of SNPs")
+    if(!is.null(misc.info$allele) && length(misc.info$allele)!=n.loc) stop("vector of alleles of the SNPs does not match the number of SNPs")
+    if(!is.null(misc.info$chromosome) && length(misc.info$chromosome)!=n.loc) stop("vector of chromosomes of the SNPs does not match the number of SNPs")
+    if(!is.null(misc.info$population) && length(misc.info$population)!=n.ind) stop("vector of population of the individuals does not match the number of individuals")
+    if(!is.null(misc.info$ploidy) && length(misc.info$ploidy)>1 && length(misc.info$ploidy)!=n.ind) stop("vector of ploidy of the individuals has more than one value but does not match the number of individuals")
+
+
+    ## BUILD OUTPUT ##
+    ind.names <- names(res)
+    if(!is.null(misc.info$chromosome)){
+        other <- list(chromosome = misc.info$chromosome)
+    } else {
+        other <- list()
+    }
+
+    res <- new("genlight", gen=res, ind.names=ind.names, loc.names=misc.info$position, loc.all=misc.info$allele, ploidy=misc.info$ploidy, other=other)
+
+    if(!quiet) cat("\n...done.\n\n")
+
+    return(res)
+
+} # end read.snp



More information about the adegenet-commits mailing list