[CHNOSZ-commits] r753 - in pkg/CHNOSZ: . R inst man vignettes

Fri Nov 18 01:40:29 CET 2022

Author: jedick
Date: 2022-11-18 01:40:29 +0100 (Fri, 18 Nov 2022)
New Revision: 753

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/add.protein.R
   pkg/CHNOSZ/R/subcrt.R
   pkg/CHNOSZ/inst/NEWS.Rd
   pkg/CHNOSZ/man/add.protein.Rd
   pkg/CHNOSZ/vignettes/anintro.Rmd
Log:
Put 'sequence' argument first in seq2aa()


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/DESCRIPTION	2022-11-18 00:40:29 UTC (rev 753)
@@ -1,6 +1,6 @@
-Date: 2022-10-16
+Date: 2022-11-18
 Package: CHNOSZ
-Version: 1.9.9-44
+Version: 1.9.9-45
 Title: Thermodynamic Calculations and Diagrams for Geochemistry
 Authors at R: c(
     person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),

Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/R/add.protein.R	2022-11-18 00:40:29 UTC (rev 753)
@@ -1,17 +1,16 @@
 # CHNOSZ/add.protein.R
-# calculate properties of proteins 20061109 jmd
-# reorganize protein functions 20120513
+# Calculate properties of proteins 20061109 jmd
+# Reorganize protein functions 20120513
 
-# Calculate amino acid counts from a sequence
-seq2aa <- function(protein, sequence) {
+# Count numbers of amino acids in a sequence
+seq2aa <- function(sequence, protein = NA) {
   # $emove newlines and whitespace
   sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
   # Make a data frame from counting the amino acids in the sequence
   caa <- count.aa(sequence)
   colnames(caa) <- aminoacids(3)
-  ip <- pinfo(protein)
   # Now make the data frame
-  po <- strsplit(protein, "_")[[1]]
+  po <- strsplit(as.character(protein), "_")[[1]]
   aa <- data.frame(protein = po[1], organism = po[2], ref = NA, abbrv = NA, stringsAsFactors = FALSE)
   # chains = 1 for any sequence, chains = 0 for no sequence
   chains <- sum(nchar(sequence) > 0)

Modified: pkg/CHNOSZ/R/subcrt.R
===================================================================
--- pkg/CHNOSZ/R/subcrt.R	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/R/subcrt.R	2022-11-18 00:40:29 UTC (rev 753)
@@ -399,6 +399,8 @@
           Tmax <- min(T[T > Ttr])
           if(warn.above) message(paste("subcrt: temperature(s) of", Tmax, "K and above exceed limit for", myname, mystate, status.Ttr))
         }
+        # Use variable-pressure standard Gibbs energy for gases if varP is TRUE (not the default)
+        if(mystate == "gas" & thermo$opt$varP) p.cgl[[ncgl[i]]]$G <- p.cgl[[ncgl[i]]]$G - convert(log10(P), "G", T = T)
       }
     }
     outprops <- c(outprops,p.cgl)
@@ -411,12 +413,6 @@
     outprops <- c(outprops, rep(p.H2O, sum(isH2O == TRUE)))
   }
 
-  # Use variable-pressure standard Gibbs energy for gases
-  isgas <- reaction$state %in% "gas" 
-  if(any(isgas) & "G" %in% eosprop & thermo$opt$varP) {
-    for(i in which(isgas)) outprops[[i]]$G <- outprops[[i]]$G - convert(log10(P), "G", T = T)
-  }
-
   # logK
   if('logK' %in% calcprop) {
     for(i in 1:length(outprops)) {

Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/inst/NEWS.Rd	2022-11-18 00:40:29 UTC (rev 753)
@@ -12,7 +12,7 @@
 % links to vignettes 20220723
 \newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
 
-\section{Changes in CHNOSZ version 1.9.9-43 (2022-10-15)}{
+\section{Changes in CHNOSZ version 1.9.9-45 (2022-11-18)}{
 
   \subsection{MAJOR USER-VISIBLE CHANGES}{
     \itemize{
@@ -195,6 +195,9 @@
       \item \code{axis.label()} produces labels with units delimited by
       parentheses instead of a comma.
 
+      \item \code{seq2aa} now has the \strong{sequence} argument first and a
+      default of NA for \strong{protein} (the protein name).
+
     }
   }
 

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/man/add.protein.Rd	2022-11-18 00:40:29 UTC (rev 753)
@@ -10,7 +10,7 @@
 
 \usage{
   add.protein(aa, as.residue = FALSE)
-  seq2aa(protein, sequence)
+  seq2aa(sequence, protein = NA)
   aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
 }
 
@@ -17,8 +17,8 @@
 \arguments{
   \item{aa}{data frame, amino acid composition in the format of \code{thermo()$protein}}
   \item{as.residue}{logical, normalize by protein length?}
-  \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
   \item{sequence}{character, protein sequence}
+  \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}()$protein})}
   \item{abundance}{numeric, abundances of proteins}
   \item{average}{logical, return the weighted average of amino acid counts?}
   \item{organism}{character, name of organism}
@@ -31,8 +31,9 @@
 The purpose of the functions described here is to identify proteins and work with their amino acid compositions.
 From the amino acid compositions, the thermodynamic properties of the proteins can be estimated by group additivity.
 
-\code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, corresponding to the provided \code{sequence}.
-Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
+\code{seq2aa} returns a data frame of amino acid composition for the provided \code{sequence}, in the format of \code{thermo()$protein}.
+In this function, the value of the \code{protein} argument is put into the \code{protein} column of the result.
+If there is an underscore (e.g. \samp{LYSC_CHICK}), it is used to split the text, and the two parts are put into the \code{protein} and \code{organism} columns.
 
 Given amino acid compositions returned by \code{seq2aa}, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
 The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
@@ -54,7 +55,7 @@
 \dontshow{reset()}
 # Get the amino acid composition of a protein sequence
 # (Human Gastric juice peptide 1)
-aa <- seq2aa("GAJU_HUMAN", "LAAGKVEDSD")
+aa <- seq2aa("LAAGKVEDSD", "GAJU_HUMAN")
 # Add the protein to CHNOSZ
 ip <- add.protein(aa)
 # Calculate the protein length and chemical formula
@@ -62,10 +63,10 @@
 as.chemical.formula(protein.formula(ip)) # "C41H69N11O18"
 
 # Calculate a formula without using add.protein
-aa <- seq2aa("pentapeptide_test", "ANLSG")
+aa <- seq2aa("ANLSG", "pentapeptide_test")
 as.chemical.formula(protein.formula(aa))
 
-# Add the amino acid compositions of several poliovirus protein subunits
+# Sum the amino acid compositions of several poliovirus protein subunits
 file <- system.file("extdata/protein/POLG.csv", package = "CHNOSZ")
 aa <- read.csv(file, as.is = TRUE)
 aasum(aa, protein = "POLG_sum")

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2022-11-18 00:40:29 UTC (rev 753)
@@ -1444,13 +1444,13 @@
 See also <span style="color:blue">`?count.aa`</span>, which can process both protein and nucleic acid sequences.
 ```
 ```{r seq2aa}
-aa_PRIO <- seq2aa("PRIO_HUMAN", "
+aa_PRIO <- seq2aa("
 MANLGCWMLVLFVATWSDLGLCKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGGWGQP
 HGGGWGQPHGGGWGQPHGGGWGQPHGGGWGQGGGTHSQWNKPSKPKTNMKHMAGAAAAGA
 VVGGLGGYMLGSAMSRPIIHFGSDYEDRYYRENMHRYPNQVYYRPMDEYSNQNNFVHDCV
 NITIKQHTVTTTTKGENFTETDVKMMERVVEQMCITQYERESQAYYQRGSSMVLFSSPPV
 ILLISFLIFLIVG
-")
+", "PRIO_HUMAN")
 ```
 <span style="color:green">`uniprot.aa()`</span> returns the amino acid composition of a single amino acid sequence downloaded from UniProt.
 To get sequences for many proteins, use R's `lapply()`, `do.call()`, and `rbind()`:
@@ -1465,7 +1465,7 @@
 ```
 
 ```{r uniprot_aa_offline, echo=FALSE}
-aa_ALAT1 <- seq2aa("ALAT1_HUMAN", "
+aa_ALAT1 <- seq2aa("
 MASSTGDRSQAVRHGLRAKVLTLDGMNPRVRRVEYAVRGPIVQRALELEQELRQGVKKPF
 TEVIRANIGDAQAMGQRPITFLRQVLALCVNPDLLSSPNFPDDAKKRAERILQACGGHSL
 GAYSVSSGIQLIREDVARYIERRDGGIPADPNNVFLSTGASDAIVTVLKLLVAGEGHTRT
@@ -1475,8 +1475,8 @@
 VVSPPAPTDPSFAQFQAEKQAVLAELAAKAKLTEQVFNEAPGISCNPVQGAMYSFPRVQL
 PPRAVERAQELGLAPDMFFCLRLLEETGICVVPGSGFGQREGTYHFRMTILPPLEKLRLL
 LEKLSRFHAKFTLEYS
-")
-aa_CO1A1 <- seq2aa("CO1A1_HUMAN", "
+", "ALAT1_HUMAN")
+aa_CO1A1 <- seq2aa("
 MFSFVDLRLLLLLAATALLTHGQEEGQVEGQDEDIPPITCVQNGLRYHDRDVWKPEPCRI
 CVCDNGKVLCDDVICDETKNCPGAEVPEGECCPVCPDGSESPTDQETTGVEGPKGDTGPR
 GPRGPAGPPGRDGIPGQPGLPGPPGPPGPPGPPGLGGNFAPQLSYGYDEKSTGGISVPGP
@@ -1502,7 +1502,7 @@
 KRHVWFGESMTDGFQFEYGGQGSDPADVAIQLTFLRLMSTEASQNITYHCKNSVAYMDQQ
 TGNLKKALLLQGSNEIEIRAEGNSRFTYSVTVDGCTSHTGAWGKTVIEYKTTKTSRLPII
 DVAPLDVGAPDQEFGFDVGPVCFL
-")
+", "CO1A1_HUMAN")
 aa_UniProt <- rbind(aa_ALAT1, aa_CO1A1)
 aa_UniProt$abbrv <- c("ALAT1", "CO1A1")
 ```