[CHNOSZ-commits] r305 - in pkg/CHNOSZ: . R inst

Tue Feb 27 02:20:38 CET 2018

Author: jedick
Date: 2018-02-27 02:20:37 +0100 (Tue, 27 Feb 2018)
New Revision: 305

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/util.fasta.R
   pkg/CHNOSZ/inst/NEWS
Log:
read.fasta(): handle archive::archive_read input connections


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2018-02-17 17:47:07 UTC (rev 304)
+++ pkg/CHNOSZ/DESCRIPTION	2018-02-27 01:20:37 UTC (rev 305)
@@ -1,6 +1,6 @@
-Date: 2018-02-18
+Date: 2018-02-27
 Package: CHNOSZ
-Version: 1.1.3-12
+Version: 1.1.3-13
 Title: Thermodynamic Calculations for Geobiochemistry
 Authors at R: c(
     person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),

Modified: pkg/CHNOSZ/R/util.fasta.R
===================================================================
--- pkg/CHNOSZ/R/util.fasta.R	2018-02-17 17:47:07 UTC (rev 304)
+++ pkg/CHNOSZ/R/util.fasta.R	2018-02-27 01:20:37 UTC (rev 305)
@@ -14,11 +14,23 @@
   #   fas: fasta entry
   # value of 'id' is used for 'protein' in output table,
   #   otherwise ID is parsed from FASTA header (can take a while)
+  
+  # check if the file is in an archive (https://github.com/jimhester/archive)
+  if("archive_read" %in% class(file)) {
+    is.archive <- TRUE
+    filebase <- gsub("]", "", basename(summary(file)$description))
+  } else {
+    is.archive <- FALSE
+    filebase <- basename(file)
+  }
   if(is.null(lines)) {
-    message("read.fasta: reading ", basename(file), " ... ", appendLF=FALSE)
-    #lines <- readLines(file)
+    message("read.fasta: reading ", filebase, " ... ", appendLF=FALSE)
     is.nix <- Sys.info()[[1]]=="Linux"
-    if(is.nix) {
+    if(is.archive) {
+      # we can't use scan here?
+      lines <- readLines(file)
+    } else if(is.nix) {
+      # retrieve contents using system command (seems slightly faster even than scan())
       # figure out whether to use 'cat', 'zcat' or 'xzcat'
       suffix <- substr(file,nchar(file)-2,nchar(file))
       if(suffix==".gz") mycat <- "zcat"
@@ -49,7 +61,7 @@
   sequences <- lapply(iseq, seqfun)
   # organism name is from file name
   # (basename minus extension)
-  bnf <- strsplit(basename(file),split=".",fixed=TRUE)[[1]][1]
+  bnf <- strsplit(filebase,split=".",fixed=TRUE)[[1]][1]
   organism <- bnf
   # protein/gene name is from header line for entry
   # (strip the ">" and go to the first space)
@@ -60,7 +72,7 @@
     # stop if the first character is not ">"
     # or the first two charaters are "> "
     if(substr(f1,1,1)!=">" | length(grep("^> ",f1)>0))
-      stop(paste("file",basename(file),"line",j,"doesn't begin with FASTA header '>'."))
+      stop(paste("file",filebase,"line",j,"doesn't begin with FASTA header '>'."))
     # discard the leading '>'
     f2 <- substr(f1, 2, nchar(f1))
     # keep everything before the first space

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2018-02-17 17:47:07 UTC (rev 304)
+++ pkg/CHNOSZ/inst/NEWS	2018-02-27 01:20:37 UTC (rev 305)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.1.3-12 (2018-02-18)
+CHANGES IN CHNOSZ 1.1.3-13 (2018-02-27)
 ---------------------------------------
 
 - Lines in 1-D diagram()s can optionally be drawn as splines using the
@@ -31,6 +31,9 @@
 - Add C implementation of counting occurrences of all letters in a
   string (src/count_letters.c) to speed up operation of count.aa().
 
+- read.fasta(): add support for file connections created using
+  archive::archive_read (https://github.com/jimhester/archive).
+
 CHANGES IN CHNOSZ 1.1.3 (2017-11-13)
 ------------------------------------