[CHNOSZ-commits] r753 - in pkg/CHNOSZ: . R inst man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Nov 18 01:40:29 CET 2022
Author: jedick
Date: 2022-11-18 01:40:29 +0100 (Fri, 18 Nov 2022)
New Revision: 753
Modified:
pkg/CHNOSZ/DESCRIPTION
pkg/CHNOSZ/R/add.protein.R
pkg/CHNOSZ/R/subcrt.R
pkg/CHNOSZ/inst/NEWS.Rd
pkg/CHNOSZ/man/add.protein.Rd
pkg/CHNOSZ/vignettes/anintro.Rmd
Log:
Put 'sequence' argument first in seq2aa()
Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/DESCRIPTION 2022-11-18 00:40:29 UTC (rev 753)
@@ -1,6 +1,6 @@
-Date: 2022-10-16
+Date: 2022-11-18
Package: CHNOSZ
-Version: 1.9.9-44
+Version: 1.9.9-45
Title: Thermodynamic Calculations and Diagrams for Geochemistry
Authors at R: c(
person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),
Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/R/add.protein.R 2022-11-18 00:40:29 UTC (rev 753)
@@ -1,17 +1,16 @@
# CHNOSZ/add.protein.R
-# calculate properties of proteins 20061109 jmd
-# reorganize protein functions 20120513
+# Calculate properties of proteins 20061109 jmd
+# Reorganize protein functions 20120513
-# Calculate amino acid counts from a sequence
-seq2aa <- function(protein, sequence) {
+# Count numbers of amino acids in a sequence
+seq2aa <- function(sequence, protein = NA) {
# $emove newlines and whitespace
sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
# Make a data frame from counting the amino acids in the sequence
caa <- count.aa(sequence)
colnames(caa) <- aminoacids(3)
- ip <- pinfo(protein)
# Now make the data frame
- po <- strsplit(protein, "_")[[1]]
+ po <- strsplit(as.character(protein), "_")[[1]]
aa <- data.frame(protein = po[1], organism = po[2], ref = NA, abbrv = NA, stringsAsFactors = FALSE)
# chains = 1 for any sequence, chains = 0 for no sequence
chains <- sum(nchar(sequence) > 0)
Modified: pkg/CHNOSZ/R/subcrt.R
===================================================================
--- pkg/CHNOSZ/R/subcrt.R 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/R/subcrt.R 2022-11-18 00:40:29 UTC (rev 753)
@@ -399,6 +399,8 @@
Tmax <- min(T[T > Ttr])
if(warn.above) message(paste("subcrt: temperature(s) of", Tmax, "K and above exceed limit for", myname, mystate, status.Ttr))
}
+ # Use variable-pressure standard Gibbs energy for gases if varP is TRUE (not the default)
+ if(mystate == "gas" & thermo$opt$varP) p.cgl[[ncgl[i]]]$G <- p.cgl[[ncgl[i]]]$G - convert(log10(P), "G", T = T)
}
}
outprops <- c(outprops,p.cgl)
@@ -411,12 +413,6 @@
outprops <- c(outprops, rep(p.H2O, sum(isH2O == TRUE)))
}
- # Use variable-pressure standard Gibbs energy for gases
- isgas <- reaction$state %in% "gas"
- if(any(isgas) & "G" %in% eosprop & thermo$opt$varP) {
- for(i in which(isgas)) outprops[[i]]$G <- outprops[[i]]$G - convert(log10(P), "G", T = T)
- }
-
# logK
if('logK' %in% calcprop) {
for(i in 1:length(outprops)) {
Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/inst/NEWS.Rd 2022-11-18 00:40:29 UTC (rev 753)
@@ -12,7 +12,7 @@
% links to vignettes 20220723
\newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
-\section{Changes in CHNOSZ version 1.9.9-43 (2022-10-15)}{
+\section{Changes in CHNOSZ version 1.9.9-45 (2022-11-18)}{
\subsection{MAJOR USER-VISIBLE CHANGES}{
\itemize{
@@ -195,6 +195,9 @@
\item \code{axis.label()} produces labels with units delimited by
parentheses instead of a comma.
+ \item \code{seq2aa} now has the \strong{sequence} argument first and a
+ default of NA for \strong{protein} (the protein name).
+
}
}
Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/man/add.protein.Rd 2022-11-18 00:40:29 UTC (rev 753)
@@ -10,7 +10,7 @@
\usage{
add.protein(aa, as.residue = FALSE)
- seq2aa(protein, sequence)
+ seq2aa(sequence, protein = NA)
aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
}
@@ -17,8 +17,8 @@
\arguments{
\item{aa}{data frame, amino acid composition in the format of \code{thermo()$protein}}
\item{as.residue}{logical, normalize by protein length?}
- \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
\item{sequence}{character, protein sequence}
+ \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}()$protein})}
\item{abundance}{numeric, abundances of proteins}
\item{average}{logical, return the weighted average of amino acid counts?}
\item{organism}{character, name of organism}
@@ -31,8 +31,9 @@
The purpose of the functions described here is to identify proteins and work with their amino acid compositions.
From the amino acid compositions, the thermodynamic properties of the proteins can be estimated by group additivity.
-\code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, corresponding to the provided \code{sequence}.
-Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
+\code{seq2aa} returns a data frame of amino acid composition for the provided \code{sequence}, in the format of \code{thermo()$protein}.
+In this function, the value of the \code{protein} argument is put into the \code{protein} column of the result.
+If there is an underscore (e.g. \samp{LYSC_CHICK}), it is used to split the text, and the two parts are put into the \code{protein} and \code{organism} columns.
Given amino acid compositions returned by \code{seq2aa}, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
@@ -54,7 +55,7 @@
\dontshow{reset()}
# Get the amino acid composition of a protein sequence
# (Human Gastric juice peptide 1)
-aa <- seq2aa("GAJU_HUMAN", "LAAGKVEDSD")
+aa <- seq2aa("LAAGKVEDSD", "GAJU_HUMAN")
# Add the protein to CHNOSZ
ip <- add.protein(aa)
# Calculate the protein length and chemical formula
@@ -62,10 +63,10 @@
as.chemical.formula(protein.formula(ip)) # "C41H69N11O18"
# Calculate a formula without using add.protein
-aa <- seq2aa("pentapeptide_test", "ANLSG")
+aa <- seq2aa("ANLSG", "pentapeptide_test")
as.chemical.formula(protein.formula(aa))
-# Add the amino acid compositions of several poliovirus protein subunits
+# Sum the amino acid compositions of several poliovirus protein subunits
file <- system.file("extdata/protein/POLG.csv", package = "CHNOSZ")
aa <- read.csv(file, as.is = TRUE)
aasum(aa, protein = "POLG_sum")
Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd 2022-10-18 09:49:47 UTC (rev 752)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd 2022-11-18 00:40:29 UTC (rev 753)
@@ -1444,13 +1444,13 @@
See also <span style="color:blue">`?count.aa`</span>, which can process both protein and nucleic acid sequences.
```
```{r seq2aa}
-aa_PRIO <- seq2aa("PRIO_HUMAN", "
+aa_PRIO <- seq2aa("
MANLGCWMLVLFVATWSDLGLCKKRPKPGGWNTGGSRYPGQGSPGGNRYPPQGGGGWGQP
HGGGWGQPHGGGWGQPHGGGWGQPHGGGWGQGGGTHSQWNKPSKPKTNMKHMAGAAAAGA
VVGGLGGYMLGSAMSRPIIHFGSDYEDRYYRENMHRYPNQVYYRPMDEYSNQNNFVHDCV
NITIKQHTVTTTTKGENFTETDVKMMERVVEQMCITQYERESQAYYQRGSSMVLFSSPPV
ILLISFLIFLIVG
-")
+", "PRIO_HUMAN")
```
<span style="color:green">`uniprot.aa()`</span> returns the amino acid composition of a single amino acid sequence downloaded from UniProt.
To get sequences for many proteins, use R's `lapply()`, `do.call()`, and `rbind()`:
@@ -1465,7 +1465,7 @@
```
```{r uniprot_aa_offline, echo=FALSE}
-aa_ALAT1 <- seq2aa("ALAT1_HUMAN", "
+aa_ALAT1 <- seq2aa("
MASSTGDRSQAVRHGLRAKVLTLDGMNPRVRRVEYAVRGPIVQRALELEQELRQGVKKPF
TEVIRANIGDAQAMGQRPITFLRQVLALCVNPDLLSSPNFPDDAKKRAERILQACGGHSL
GAYSVSSGIQLIREDVARYIERRDGGIPADPNNVFLSTGASDAIVTVLKLLVAGEGHTRT
@@ -1475,8 +1475,8 @@
VVSPPAPTDPSFAQFQAEKQAVLAELAAKAKLTEQVFNEAPGISCNPVQGAMYSFPRVQL
PPRAVERAQELGLAPDMFFCLRLLEETGICVVPGSGFGQREGTYHFRMTILPPLEKLRLL
LEKLSRFHAKFTLEYS
-")
-aa_CO1A1 <- seq2aa("CO1A1_HUMAN", "
+", "ALAT1_HUMAN")
+aa_CO1A1 <- seq2aa("
MFSFVDLRLLLLLAATALLTHGQEEGQVEGQDEDIPPITCVQNGLRYHDRDVWKPEPCRI
CVCDNGKVLCDDVICDETKNCPGAEVPEGECCPVCPDGSESPTDQETTGVEGPKGDTGPR
GPRGPAGPPGRDGIPGQPGLPGPPGPPGPPGPPGLGGNFAPQLSYGYDEKSTGGISVPGP
@@ -1502,7 +1502,7 @@
KRHVWFGESMTDGFQFEYGGQGSDPADVAIQLTFLRLMSTEASQNITYHCKNSVAYMDQQ
TGNLKKALLLQGSNEIEIRAEGNSRFTYSVTVDGCTSHTGAWGKTVIEYKTTKTSRLPII
DVAPLDVGAPDQEFGFDVGPVCFL
-")
+", "CO1A1_HUMAN")
aa_UniProt <- rbind(aa_ALAT1, aa_CO1A1)
aa_UniProt$abbrv <- c("ALAT1", "CO1A1")
```
More information about the CHNOSZ-commits
mailing list