[CHNOSZ-commits] r748 - in pkg/CHNOSZ: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Sep 29 11:08:20 CEST 2022
Author: jedick
Date: 2022-09-29 11:08:20 +0200 (Thu, 29 Sep 2022)
New Revision: 748
Modified:
pkg/CHNOSZ/DESCRIPTION
pkg/CHNOSZ/NAMESPACE
pkg/CHNOSZ/R/add.protein.R
pkg/CHNOSZ/inst/NEWS.Rd
pkg/CHNOSZ/man/add.protein.Rd
Log:
Replace aasum() and add example
Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION 2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/DESCRIPTION 2022-09-29 09:08:20 UTC (rev 748)
@@ -1,6 +1,6 @@
Date: 2022-09-29
Package: CHNOSZ
-Version: 1.9.9-39
+Version: 1.9.9-40
Title: Thermodynamic Calculations and Diagrams for Geochemistry
Authors at R: c(
person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),
Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE 2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/NAMESPACE 2022-09-29 09:08:20 UTC (rev 748)
@@ -25,7 +25,7 @@
"getrank", "parent", "sciname", "allparents", "getnodes", "getnames",
"protein.OBIGT", "which.pmax",
"equil.boltzmann", "equil.reaction", "find.tp",
- "ionize.aa", "MP90.cp",
+ "ionize.aa", "MP90.cp", "aasum",
"qqr", "RMSD", "CVRMSD", "spearman", "DGmix", "DDGmix", "DGtr",
"ratlab",
# demos
Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R 2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/R/add.protein.R 2022-09-29 09:08:20 UTC (rev 748)
@@ -2,9 +2,7 @@
# calculate properties of proteins 20061109 jmd
# reorganize protein functions 20120513
-# add.protein - add amino acid counts to thermo()$protein (returns iprotein)
-# seq2aa - calculate amino acid counts from a sequence
-
+# Calculate amino acid counts from a sequence
seq2aa <- function(protein, sequence) {
# remove newlines and whitespace
sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
@@ -21,6 +19,7 @@
return(aa)
}
+# Add amino acid counts to thermo()$protein (returns iprotein)
add.protein <- function(aa, as.residue = FALSE) {
# Add a properly constructed data frame of
# amino acid counts to thermo()$protein
@@ -53,3 +52,35 @@
if(any(ip.present)) message("add.protein: replaced ", sum(ip.present), " existing protein(s) in thermo()$protein")
return(ip)
}
+
+# Combine amino acid counts (sum, average, or weighted sum by abundance)
+aasum <- function(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL) {
+ # Returns the sum of the amino acid counts in aa,
+ # multiplied by the abundances of the proteins
+ abundance <- rep(abundance, length.out=nrow(aa))
+ # Drop any NA rows or abundances
+ ina.aa <- is.na(aa$chains)
+ ina.ab <- is.na(abundance)
+ ina <- ina.aa | ina.ab
+ if(any(ina)) {
+ aa <- aa[!ina, ]
+ abundance <- abundance[!ina]
+ message("aasum: dropped ", sum(ina), " proteins with NA composition and/or abundance")
+ }
+ # Multiply
+ aa[, 6:25] <- aa[, 6:25] * abundance
+ # Sum
+ out <- aa[1, ]
+ out[, 5:25] <- colSums(aa[, 5:25])
+ # Average if told to do so
+ if(average) {
+ # Polypeptide chains by number of proteins, residues by frequency
+ out[, 5] <- out[, 5]/nrow(aa)
+ out[, 6:25] <- out[, 6:25]/sum(abundance)
+ }
+ # Add protein and organism names if given
+ if(!is.null(protein)) out$protein <- protein
+ if(!is.null(organism)) out$organism <- organism
+ return(out)
+}
+
Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd 2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/inst/NEWS.Rd 2022-09-29 09:08:20 UTC (rev 748)
@@ -12,7 +12,7 @@
% links to vignettes 20220723
\newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
-\section{Changes in CHNOSZ version 1.9.9-39 (2022-09-29)}{
+\section{Changes in CHNOSZ version 1.9.9-40 (2022-09-29)}{
\subsection{MAJOR USER-VISIBLE CHANGES}{
\itemize{
@@ -132,8 +132,6 @@
\item \code{EOSregress()} and the associated demo and vignette have been
removed.
- \item Remove \code{aasum()}.
-
}
}
Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd 2022-09-29 08:05:52 UTC (rev 747)
+++ pkg/CHNOSZ/man/add.protein.Rd 2022-09-29 09:08:20 UTC (rev 748)
@@ -2,6 +2,7 @@
\name{add.protein}
\alias{add.protein}
\alias{seq2aa}
+\alias{aasum}
\title{Amino Acid Compositions of Proteins}
\description{
Functions to get amino acid compositions and add them to protein list for use by other functions.
@@ -10,6 +11,7 @@
\usage{
add.protein(aa, as.residue = FALSE)
seq2aa(protein, sequence)
+ aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
}
\arguments{
@@ -17,6 +19,9 @@
\item{as.residue}{logical, normalize by protein length?}
\item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
\item{sequence}{character, protein sequence}
+ \item{abundance}{numeric, abundances of proteins}
+ \item{average}{logical, return the weighted average of amino acid counts?}
+ \item{organism}{character, name of organism}
}
\details{
@@ -29,12 +34,22 @@
\code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, corresponding to the provided \code{sequence}.
Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
-Given amino acid compositions returned by the \code{*aa} functions described above, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
+Given amino acid compositions returned by \code{seq2aa}, \code{add.protein} adds them to \code{thermo()$protein} for use by other functions in CHNOSZ.
+The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
Set \code{as.residue} to TRUE to normalize by protein length; each input amino acid composition is divided by the corresponding number of residues, with the result that the sum of amino acid frequencies for each protein is 1.
-The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo()$protein} are replaced.
-The value returned by this function is the rownumbers of \code{thermo()$protein} that are added and/or replaced.
+
+\code{aasum} returns a data frame representing the sum of amino acid compositions in the rows of the input \code{aa} data frame.
+The amino acid compositions are multiplied by the indicated \code{abundance}; that argument is recycled to match the number of rows of \code{aa}.
+If \code{average} is TRUE the final sum is divided by the number of input compositions.
+The name used in the output is taken from the first row of \code{aa} or from \code{protein} and \code{organism} if they are specified.
}
+\value{
+For \code{seq2aa}, a data frame of amino acid composition and identifying information for proteins.
+For \code{add.protein}, the rownumbers of \code{thermo()$protein} that are added and/or replaced.
+For \code{aasum}, a one-row data frame of amino acid composition and identifying information.
+}
+
\examples{
\dontshow{reset()}
# Get the amino acid composition of a protein sequence
@@ -49,10 +64,15 @@
# Calculate a formula without using add.protein
aa <- seq2aa("pentapeptide_test", "ANLSG")
as.chemical.formula(protein.formula(aa))
+
+# Add the amino acid compositions of several poliovirus protein subunits
+file <- system.file("extdata/protein/POLG.csv", package = "CHNOSZ")
+aa <- read.csv(file, as.is = TRUE)
+aasum(aa, protein = "POLG_sum")
}
\seealso{
-\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions.
+\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions that can be used with \code{add.protein}.
\code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
}
More information about the CHNOSZ-commits
mailing list