[Seqinr-commits] r1565 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Mar 11 11:01:14 CET 2009


Author: lobry
Date: 2009-03-11 11:01:12 +0100 (Wed, 11 Mar 2009)
New Revision: 1565

Added:
   pkg/R/readBins.R
   pkg/R/readPanels.R
Log:
new utility functions

Added: pkg/R/readBins.R
===================================================================
--- pkg/R/readBins.R	                        (rev 0)
+++ pkg/R/readBins.R	2009-03-11 10:01:12 UTC (rev 1565)
@@ -0,0 +1,29 @@
+readBins <- function(file, 
+  colnames = c("allele.name", "size.bp", "minus.bp", "plus.bp")){
+  src <- readLines(file)
+  iPanel <- which(substr(src, start = 1, stop = 11) == "Panel Name\t")
+  mycon <- textConnection(src[1:3])
+  infos <- read.table(mycon, sep = "\t", fill = TRUE, header = FALSE)
+  close(mycon)
+  result <- list(infos = infos)
+
+  starts <- iPanel + 1
+  stops  <- c(iPanel[-1] - 1, length(src))
+  for(i in seq_len(length(iPanel))){
+  	  locsrc <- src[starts[i]:stops[i]]
+  	  iMark <- which(substr(locsrc, start = 1, stop = 12) == "Marker Name\t")
+  	  locres <- vector(mode = "list", length = length(iMark))
+  	  locstarts <- iMark + 1
+  	  locstops <- c(iMark[-1] - 1, length(locsrc))
+  	  for(j in seq_len(length(iMark))){
+  	  	  mycon <- textConnection(locsrc[locstarts[j]:locstops[j]])
+  	  	  locres[[j]] <- read.table(mycon, sep = "\t")
+  	  	  colnames(locres[[j]]) <- colnames
+  	  	  close(mycon)
+  	  	  names(locres)[j] <- unlist(strsplit(locsrc[iMark[j]], split = "\t"))[2]
+  	  	}
+    result[[i+1]] <- locres
+    names(result)[i+1] <- unlist(strsplit(src[iPanel[i]], split = "\t"))[2]
+  }
+  return(result)
+}

Added: pkg/R/readPanels.R
===================================================================
--- pkg/R/readPanels.R	                        (rev 0)
+++ pkg/R/readPanels.R	2009-03-11 10:01:12 UTC (rev 1565)
@@ -0,0 +1,37 @@
+readPanels <- function(file,
+  colnames = c("marker", "dye.col", "min.bp", "max.bp", "exp.pcg", "repeat.bp",
+    "stutter.pc", "uknw", "allele names")){
+  src <- readLines(file)
+  iPanel <- which(substr(src, start = 1, stop = 6) == "Panel\t")
+
+  infos <- src[1:(iPanel[1] - 1)]
+  result <- list(infos = infos)
+
+  starts <- iPanel + 1
+  stops  <- c(iPanel[-1] - 1, length(src))
+  
+  for(i in seq_len(length(iPanel))){
+	 mycon <- textConnection(src[starts[i]:stops[i]])
+    result[[i+1]] <- read.table(mycon, sep = "\t", quote = "")
+    close(mycon)
+    # remove empty columns
+    tokeep <- rep(TRUE, ncol(result[[i+1]]))
+    for(j in 1:ncol(result[[i+1]])){
+      if(all(is.na(result[[i+1]][,j]))){
+      	tokeep[j] <- FALSE
+      }
+    }
+    result[[i+1]] <- result[[i+1]][, tokeep]
+    # There must be 9 columns
+    if(ncol(result[[i+1]]) != 9) stop("wrong column number")
+    colnames(result[[i+1]]) <- colnames
+    
+    headeritems <- unlist(strsplit(src[iPanel[i]], split = "\t"))
+    # remove empty elements
+    if(any(nchar(headeritems) == 0)){
+      headeritems <- headeritems[-which(nchar(headeritems) == 0)]
+    }
+    names(result)[i+1] <- headeritems[2]
+  }
+  return(result)
+}



More information about the Seqinr-commits mailing list