[CHNOSZ-commits] r748 - in pkg/CHNOSZ: . R inst man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Sep 29 11:08:20 CEST 2022


Author: jedick
Date: 2022-09-29 11:08:20 +0200 (Thu, 29 Sep 2022)
New Revision: 748

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/NAMESPACE
   pkg/CHNOSZ/R/add.protein.R
   pkg/CHNOSZ/inst/NEWS.Rd
   pkg/CHNOSZ/man/add.protein.Rd
Log:
Replace aasum() and add example


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/DESCRIPTION	2022-09-29 09:08:20 UTC (rev 748)
@@ -1,6 +1,6 @@
 Date: 2022-09-29
 Package: CHNOSZ
-Version: 1.9.9-39
+Version: 1.9.9-40
 Title: Thermodynamic Calculations and Diagrams for Geochemistry
 Authors at R: c(
     person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),

Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE	2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/NAMESPACE	2022-09-29 09:08:20 UTC (rev 748)
@@ -25,7 +25,7 @@
   "getrank", "parent", "sciname", "allparents", "getnodes", "getnames",
   "protein.OBIGT", "which.pmax",
   "equil.boltzmann", "equil.reaction", "find.tp",
-  "ionize.aa", "MP90.cp",
+  "ionize.aa", "MP90.cp", "aasum",
   "qqr", "RMSD", "CVRMSD", "spearman", "DGmix", "DDGmix", "DGtr",
   "ratlab",
 # demos

Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R	2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/R/add.protein.R	2022-09-29 09:08:20 UTC (rev 748)
@@ -2,9 +2,7 @@
 # calculate properties of proteins 20061109 jmd
 # reorganize protein functions 20120513
 
-# add.protein - add amino acid counts to thermo()$protein (returns iprotein)
-# seq2aa - calculate amino acid counts from a sequence
-
+# Calculate amino acid counts from a sequence
 seq2aa <- function(protein, sequence) {
   # remove newlines and whitespace
   sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
@@ -21,6 +19,7 @@
   return(aa)
 }
 
+# Add amino acid counts to thermo()$protein (returns iprotein)
 add.protein <- function(aa, as.residue = FALSE) {
   # Add a properly constructed data frame of 
   # amino acid counts to thermo()$protein
@@ -53,3 +52,35 @@
   if(any(ip.present)) message("add.protein: replaced ", sum(ip.present), " existing protein(s) in thermo()$protein")
   return(ip)
 }
+
+# Combine amino acid counts (sum, average, or weighted sum by abundance)
+aasum <- function(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL) {
+  # Returns the sum of the amino acid counts in aa,
+  #   multiplied by the abundances of the proteins
+  abundance <- rep(abundance, length.out=nrow(aa))
+  # Drop any NA rows or abundances
+  ina.aa <- is.na(aa$chains)
+  ina.ab <- is.na(abundance)
+  ina <- ina.aa | ina.ab
+  if(any(ina)) {
+    aa <- aa[!ina, ]
+    abundance <- abundance[!ina]
+    message("aasum: dropped ", sum(ina), " proteins with NA composition and/or abundance")
+  }
+  # Multiply
+  aa[, 6:25] <- aa[, 6:25] * abundance
+  # Sum
+  out <- aa[1, ]
+  out[, 5:25] <- colSums(aa[, 5:25])
+  # Average if told to do so
+  if(average) {
+    # Polypeptide chains by number of proteins, residues by frequency
+    out[, 5] <- out[, 5]/nrow(aa)
+    out[, 6:25] <- out[, 6:25]/sum(abundance)
+  }
+  # Add protein and organism names if given
+  if(!is.null(protein)) out$protein <- protein
+  if(!is.null(organism)) out$organism <- organism
+  return(out)
+}
+

Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd	2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/inst/NEWS.Rd	2022-09-29 09:08:20 UTC (rev 748)
@@ -12,7 +12,7 @@
 % links to vignettes 20220723
 \newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
 
-\section{Changes in CHNOSZ version 1.9.9-39 (2022-09-29)}{
+\section{Changes in CHNOSZ version 1.9.9-40 (2022-09-29)}{
 
   \subsection{MAJOR USER-VISIBLE CHANGES}{
     \itemize{
@@ -132,8 +132,6 @@
       \item \code{EOSregress()} and the associated demo and vignette have been
       removed.
 
-      \item Remove \code{aasum()}.
-
     }
   }
 

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/man/add.protein.Rd	2022-09-29 09:08:20 UTC (rev 748)
@@ -2,6 +2,7 @@
 \name{add.protein}
 \alias{add.protein}
 \alias{seq2aa}
+\alias{aasum}
 \title{Amino Acid Compositions of Proteins}
 \description{
   Functions to get amino acid compositions and add them to protein list for use by other functions.
@@ -10,6 +11,7 @@
 \usage{
   add.protein(aa, as.residue = FALSE)
   seq2aa(protein, sequence)
+  aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
 }
 
 \arguments{
@@ -17,6 +19,9 @@
   \item{as.residue}{logical, normalize by protein length?}
   \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
   \item{sequence}{character, protein sequence}
+  \item{abundance}{numeric, abundances of proteins}
+  \item{average}{logical, return the weighted average of amino acid counts?}
+  \item{organism}{character, name of organism}
 }
 
 \details{
@@ -29,12 +34,22 @@
 \code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, corresponding to the provided \code{sequence}.
 Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
 
-Given amino acid compositions returned by the \code{*aa} functions described above, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
+Given amino acid compositions returned by \code{seq2aa}, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
+The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
 Set \code{as.residue} to TRUE to normalize by protein length; each input amino acid composition is divided by the corresponding number of residues, with the result that the sum of amino acid frequencies for each protein is 1.
-The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
-The value returned by this function is the rownumbers of \code{thermo()$protein} that are added and/or replaced.
+
+\code{aasum} returns a data frame representing the sum of amino acid compositions in the rows of the input \code{aa} data frame.
+The amino acid compositions are multiplied by the indicated \code{abundance}; that argument is recycled to match the number of rows of \code{aa}.
+If \code{average} is TRUE the final sum is divided by the number of input compositions.
+The name used in the output is taken from the first row of \code{aa} or from \code{protein} and \code{organism} if they are specified.
 }
 
+\value{
+For \code{seq2aa}, a data frame of amino acid composition and identifying information for proteins.
+For \code{add.protein}, the rownumbers of \code{thermo()$protein} that are added and/or replaced.
+For \code{aasum}, a one-row data frame of amino acid composition and identifying information.
+}
+
 \examples{
 \dontshow{reset()}
 # Get the amino acid composition of a protein sequence
@@ -49,10 +64,15 @@
 # Calculate a formula without using add.protein
 aa <- seq2aa("pentapeptide_test", "ANLSG")
 as.chemical.formula(protein.formula(aa))
+
+# Add the amino acid compositions of several poliovirus protein subunits
+file <- system.file("extdata/protein/POLG.csv", package = "CHNOSZ")
+aa <- read.csv(file, as.is = TRUE)
+aasum(aa, protein = "POLG_sum")
 }
 
 \seealso{
-\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions.
+\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions that can be used with \code{add.protein}.
 
 \code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
 }



More information about the CHNOSZ-commits mailing list