[Seqinr-commits] r1501 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Nov 3 11:37:37 CET 2008
Author: lobry
Date: 2008-11-03 11:37:37 +0100 (Mon, 03 Nov 2008)
New Revision: 1501
Added:
pkg/R/read.abif.R
Log:
new function read.abif
Added: pkg/R/read.abif.R
===================================================================
--- pkg/R/read.abif.R (rev 0)
+++ pkg/R/read.abif.R 2008-11-03 10:37:37 UTC (rev 1501)
@@ -0,0 +1,149 @@
+read.abif <- function(filename, max.bytes.in.file = file.info(filename)$size,
+ pied.de.pilote = 1.2, verbose = FALSE){
+ #
+ # Define some shortcuts:
+ #
+ SInt32 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 4, ...)
+ SInt16 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 2, ...)
+ SInt8 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 1, ...)
+ UInt32 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 4, ...)
+ UInt16 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 2, ...)
+ UInt8 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 1, ...)
+ f32 <- function(f, ...) readBin(f, what = "numeric", size = 4, ...)
+ f64 <- function(f, ...) readBin(f, what = "numeric", size = 8, ...)
+ #
+ # Load raw data in memory:
+ #
+ fc <- file(filename, open = "rb")
+ rawdata <- readBin(fc, what = "raw", n = pied.de.pilote*max.bytes.in.file)
+ if(verbose) print(paste("number of bytes in file", filename, "is", length(rawdata)))
+ close(fc)
+ #
+ # Make a list to store results:
+ #
+ res <- list(Header = NULL, Directory = NA, Data = NA)
+ #
+ # Header section is 128 bytes long, located at a fixed position at the
+ # beginning of the file. We essentially need the number of item and dataoffset
+ #
+ res$Header$abif <- rawToChar(rawdata[1:4])
+ if(res$Header$abif != "ABIF") stop("file not in ABIF format")
+ if(verbose) print("OK: File is in ABIF format")
+
+ res$Header$version <- SInt16(rawdata[5:6])
+ if(verbose) print(paste("File in ABIF version", res$Header$version/100))
+
+ res$Header$DirEntry.name <- rawdata[7:10]
+ if(verbose) print(paste("DirEntry name: ", rawToChar(res$Header$DirEntry.name)))
+
+ res$Header$DirEntry.number <- SInt32(rawdata[11:14])
+ if(verbose) print(paste("DirEntry number: ", res$Header$DirEntry.number))
+
+ res$Header$DirEntry.elementtype <- SInt16(rawdata[15:16])
+ if(verbose) print(paste("DirEntry elementtype: ", res$Header$DirEntry.elementtype))
+
+ res$Header$DirEntry.elementsize <- SInt16(rawdata[17:18])
+ if(verbose) print(paste("DirEntry elementsize: ", res$Header$DirEntry.elementsize))
+
+ # This one is important:
+ res$Header$numelements <- SInt32(rawdata[19:22])
+ if(verbose) print(paste("DirEntry numelements: ", res$Header$numelements))
+
+ # This one is important too:
+ res$Header$dataoffset <- SInt32(rawdata[27:30])
+ if(verbose) print(paste("DirEntry dataoffset: ", res$Header$dataoffset))
+ dataoffset <- res$Header$dataoffset + 1 # start position is 1 in R vectors
+
+ res$Header$datahandle <- SInt32(rawdata[31:34])
+ if(verbose) print(paste("DirEntry datahandle: ", res$Header$datahandle))
+
+ res$Header$unused <- SInt16(rawdata[35:128], n = 47)
+ # Should be ingnored and set to zero
+ res$Header$unused[1:length(res$Header$unused)] <- 0
+ if(verbose) print(paste("DirEntry unused: ", length(res$Header$unused), "2-byte integers"))
+
+ #
+ # The directory is located at the offset specified in the header,
+ # and consist of an array of directory entries.
+ # We scan the directory to put values in a data.frame:
+ #
+ dirdf <- data.frame(list(name = character(0)))
+ dirdf$name <- as.character(dirdf$name) # force to characters
+
+ for(i in seq_len(res$Header$numelements)){
+ deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
+ direntry <- rawdata[deb:(deb + res$Header$DirEntry.elementsize)]
+ dirdf[i, "name"] <- rawToChar(direntry[1:4])
+ dirdf[i, "tagnumber"] <- SInt32(direntry[5:8])
+ dirdf[i, "elementtype"] <- SInt16(direntry[9:10])
+ dirdf[i, "elementsize"] <- SInt16(direntry[11:12])
+ dirdf[i, "numelements"] <- SInt32(direntry[13:16])
+ dirdf[i, "datasize"] <- SInt32(direntry[17:20])
+ dirdf[i, "dataoffset"] <- SInt32(direntry[21:24])
+ }
+ if(verbose){
+ print("Element found:")
+ print(dirdf$name)
+ }
+ #
+ # Save Directory and make a list to store data:
+ #
+ res$Directory <- dirdf
+ res$Data <- vector("list", nrow(dirdf))
+ names(res$Data) <- paste(dirdf$name, dirdf$tagnumber, sep = ".")
+ #
+ # Data extraction:
+ #
+ for(i in seq_len(res$Header$numelements)){
+ deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
+ # Short data are stored in dataoffset directly:
+ if(dirdf[i, "datasize"] > 4){
+ debinraw <- dirdf[i, "dataoffset"] + 1
+ } else {
+ debinraw <- deb + 20
+ }
+ elementtype <- dirdf[i, "elementtype"]
+ numelements <- dirdf[i, "numelements"]
+ elementsize <- dirdf[i, "elementsize"]
+ data <- rawdata[debinraw:(debinraw + numelements*elementsize)]
+ # unsigned 8 bits integer:
+ if(elementtype == 1) res$Data[[i]] <- UInt8(data, n = numelements)
+ # char or signed 8 bits integer
+ if(elementtype == 2) res$Data[[i]] <- suppressWarnings(rawToChar(data))
+ # unsigned 16 bits integer:
+ if(elementtype == 3) res$Data[[i]] <- UInt16(data, n = numelements)
+ # short:
+ if(elementtype == 4) res$Data[[i]] <- SInt16(data, n = numelements)
+ # long:
+ if(elementtype == 5) res$Data[[i]] <- SInt32(data, n = numelements)
+ # float:
+ if(elementtype == 7) res$Data[[i]] <- f32(data, n = numelements)
+ # double:
+ if(elementtype == 8) res$Data[[i]] <- f64(data, n = numelements)
+ # date:
+ if(elementtype == 10)
+ res$Data[[i]] <- list(year = SInt16(data, n = 1),
+ month = UInt8(data[-(1:2)], n = 1), day = UInt8(data[-(1:3)], n = 1))
+ # time:
+ if(elementtype == 11)
+ res$Data[[i]] <- list(hour = UInt8(data, n = 1),
+ minute = UInt8(data[-1], n = 1), second = UInt8(data[-(1:2)], n = 1),
+ hsecond = UInt8(data[-(1:3)], n = 1))
+ # pString:
+ if(elementtype == 18){
+ n <- SInt8(rawdata[debinraw])
+ pString <- rawToChar(rawdata[(debinraw+1):(debinraw+n)])
+ res$Data[[i]] <- pString
+ }
+ # cString:
+ if(elementtype == 19) res$Data[[i]] <- rawToChar(data[1:(length(data) - 1) ])
+ # user:
+ if(elementtype >= 1024) res$Data[[i]] <- data
+ # legacy:
+ if(elementtype %in% c(12, 13))
+ warning("unimplemented legacy type found in file")
+ if(elementtype %in% c(6, 9, 14, 15, 16, 17, 20, 128, 256, 384))
+ warning("unsupported legacy type found in file")
+ }
+ return(res)
+}
More information about the Seqinr-commits
mailing list