[CHNOSZ-commits] r138 - in pkg/CHNOSZ: . R inst inst/extdata/cpetc man vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 9 17:49:55 CET 2017


Author: jedick
Date: 2017-02-09 17:49:54 +0100 (Thu, 09 Feb 2017)
New Revision: 138

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/util.expression.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/inst/extdata/cpetc/PM90.csv
   pkg/CHNOSZ/man/extdata.Rd
   pkg/CHNOSZ/man/ionize.aa.Rd
   pkg/CHNOSZ/vignettes/anintro.Rmd
   pkg/CHNOSZ/vignettes/vig.bib
Log:
anintro.Rmd: add proteins: group additivity and ionization


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/DESCRIPTION	2017-02-09 16:49:54 UTC (rev 138)
@@ -1,6 +1,6 @@
 Date: 2017-02-09
 Package: CHNOSZ
-Version: 1.0.8-27
+Version: 1.0.8-28
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/R/util.expression.R
===================================================================
--- pkg/CHNOSZ/R/util.expression.R	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/R/util.expression.R	2017-02-09 16:49:54 UTC (rev 138)
@@ -87,11 +87,9 @@
     else if(thischar %in% letters) thisexpr <- substitute(""[italic(a)], list(a=thischar))
     else thisexpr <- substitute(a, list(a=thischar))
     # D for greek Delta
-    # A for bold A (affinity)
     # p for subscript italic P (in Cp)
     # 0 for degree sign (but not immediately following a number e.g. 2.303)
     if(thischar=='D') thisexpr <- substitute(Delta)
-    if(thischar=='A') thisexpr <- substitute(bold(A))
     if(thischar=='p') thisexpr <- substitute(a[italic(P)], list(a=""))
     if(thischar=='0' & !can.be.numeric(prevchar)) thisexpr <- substitute(degree)
     # put it together

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/inst/NEWS	2017-02-09 16:49:54 UTC (rev 138)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.0.8-27 (2017-02-09)
+CHANGES IN CHNOSZ 1.0.8-28 (2017-02-09)
 ---------------------------------------
 
 - Add "AA" as a keyword for preset species in basis() (cysteine,
@@ -58,6 +58,9 @@
 - With new default arguments, thermo.axis() only plots the axis tick
   marks and lines (useful for redrawing the axis on filled diagrams).
 
+- Remove the bold formatting for "A" in expr.property(); now "A" is
+  italicized like most other uppercase letters.
+
 CHANGES IN CHNOSZ 1.0.8 (2016-05-28)
 ------------------------------------
 

Modified: pkg/CHNOSZ/inst/extdata/cpetc/PM90.csv
===================================================================
--- pkg/CHNOSZ/inst/extdata/cpetc/PM90.csv	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/inst/extdata/cpetc/PM90.csv	2017-02-09 16:49:54 UTC (rev 138)
@@ -1,25 +1,7 @@
-protein,T,Cp
-CYC_BOVIN,5,21400
-CYC_BOVIN,25,23800
-CYC_BOVIN,50,25200
-CYC_BOVIN,75,26000
-CYC_BOVIN,100,26100
-CYC_BOVIN,125,26300
-LYSC_CHICK,5,25200
-LYSC_CHICK,25,27500
-LYSC_CHICK,50,29500
-LYSC_CHICK,75,31100
-LYSC_CHICK,100,31400
-LYSC_CHICK,125,31500
-MYG_PHYCA,5,31900
-MYG_PHYCA,25,34900
-MYG_PHYCA,50,36900
-MYG_PHYCA,75,39200
-MYG_PHYCA,100,39300
-MYG_PHYCA,125,39500
-RNAS1_BOVIN,5,23100
-RNAS1_BOVIN,25,26000
-RNAS1_BOVIN,50,27700
-RNAS1_BOVIN,75,29200
-RNAS1_BOVIN,100,29800
-RNAS1_BOVIN,125,30100
+T,CYC_BOVIN,LYSC_CHICK,MYG_PHYCA,RNAS1_BOVIN
+5,21400,25200,31900,23100
+25,23800,27500,34900,26000
+50,25200,29500,36900,27700
+75,26000,31100,39200,29200
+100,26100,31400,39300,29800
+125,26300,31500,39500,30100

Modified: pkg/CHNOSZ/man/extdata.Rd
===================================================================
--- pkg/CHNOSZ/man/extdata.Rd	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/man/extdata.Rd	2017-02-09 16:49:54 UTC (rev 138)
@@ -28,7 +28,7 @@
 
   Files in \code{cpetc} contain heat capacity and other thermodynamic data and parameters:
   \itemize{
-    \item \code{PM90.csv} Heat capacities of four unfolded aqueous proteins taken from Privalov and Makhatadze, 1990. Names of proteins are in the first column, temperature in \eqn{^{\circ}}{°}C in the second, and heat capacities in J mol\eqn{^{-1}}{^-1} K\eqn{^{-1}}{^-1} in the third. See \code{\link{ionize.aa}} for an example that uses this file.
+    \item \code{PM90.csv} Heat capacities of four unfolded aqueous proteins taken from Privalov and Makhatadze, 1990. Temperature in \eqn{^{\circ}}{°}C is in the first column, and heat capacities of the proteins in J mol\eqn{^{-1}}{^-1} K\eqn{^{-1}}{^-1} in the remaining columns. See \code{\link{ionize.aa}} for an example that uses this file.
     \item \code{RH95.csv} Heat capacity data for iron taken from Robie and Hemingway, 1995. Temperature in Kelvin is in the first column, heat capacity in J K\eqn{^{-1}}{^-1} mol\eqn{^{-1}}{^-1} in the second. See \code{\link{subcrt}} for an example that uses this file.
     \item \code{RT71.csv} pH titration measurements for unfolded lysozyme (\samp{LYSC_CHICK}) taken from Roxby and Tanford, 1971. pH is in the first column, net charge in the second. See \code{\link{ionize.aa}} for an example that uses this file.
     \item \code{SOJSH.csv} Experimental equilibrium constants for the reaction NaCl(aq) = Na+ + Cl- as a function of temperature and pressure taken from Fig. 1 of Shock et al., 1992. Data were extracted from the figure using g3data (\url{http://www.frantz.fi/software/g3data.php}). See \code{\link{water}} for an example that uses this file.

Modified: pkg/CHNOSZ/man/ionize.aa.Rd
===================================================================
--- pkg/CHNOSZ/man/ionize.aa.Rd	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/man/ionize.aa.Rd	2017-02-09 16:49:54 UTC (rev 138)
@@ -85,8 +85,7 @@
 points(T, convert(MP90.cp("LYSC_CHICK", T), "cal"))
 # Privalov and Makhatadze's experimental values
 e <- read.csv(system.file("extdata/cpetc/PM90.csv", package="CHNOSZ"))
-e <- e[e$protein=="LYSC_CHICK",]
-points(e$T, convert(e$Cp, "cal"), pch=16)
+points(e$T, convert(e$LYSC_CHICK, "cal"), pch=16)
 legend("bottomright", pch=c(16, 1, NA, NA), lty=c(NA, NA, 1, 2),
   legend=c("PM90 experiment", "MP90 groups", 
   "DLH06 groups no ion", "DLH06 groups ionized"))

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-09 16:49:54 UTC (rev 138)
@@ -845,7 +845,7 @@
 Actually, the total concentration of carbon depends on the mixing ratio, ranging from about 10<sup>-2.2</sup> (seawater) to 10<sup>-2.6</sup> (vent fluid).
 The ability to vary the activity of the balanced basis species is not yet implemented in CHNOSZ, so a single value is used here.
 ```
-```{r groups_affinity, message=FALSE}
+```{r groups_affinity, message=FALSE, cache=TRUE}
 a <- affinity(T=T, O2=O2, pH=pH)
 e <- equilibrate(a, loga.balance=-2.5)
 ```
@@ -871,7 +871,7 @@
 
 That makes a diagram that is similar to Figure 6b of Shock and Schulte (1998).
 ```{marginfigure}
-Some differences with the original diagrams may be caused by artifacts introduced by digitization, combined with the sensitivity of the calculations to log*f*<sub>O<sub>2</sub></sub>.
+Some differences from the original diagrams may be caused by artifacts introduced by digitization, combined with the sensitivity of the calculations to log*f*<sub>O<sub>2</sub></sub>.
 ```
 It is also possible to plot the distribution of species within individual groups, such as alcohols and ketones.
 We do this by setting the names and line types for the *other* species to values that prevent them from being plotted:
@@ -922,14 +922,102 @@
 
 # Proteins
 
-## Sources of amino acid data
+Proteins in CHNOSZ are handled differently from other species.
+Amino acid group additivity is used to obtain the thermodynamic properties of proteins.
+Therefore, CHNOSZ has a data file with amino acid compositions of selected proteins, as well as functions for importing and downloading amino acid sequence data.
+When proteins in CHNOSZ are identified by name, they include an underscore, such as in `LYSC_CHICK` (chicken lysozyme C).
 
-## Group additivity and Ionization
+The length and chemical formula of one or more proteins are returned by `protein.length()` and `protein.formula()`.
+We can calculate the formula of the protein, and the per-residue formula, and show that both have the same average oxidation state of carbon:
+```{r formula_LYSC_CHICK}
+pl <- protein.length("LYSC_CHICK")
+pf <- protein.formula("LYSC_CHICK")
+list(length=pl, protein=pf, residue=pf/pl,
+     ZC_protein=ZC(pf), ZC_residue=ZC(pf/pl))
+```
 
-## Normalizing for different lengths
+## Group additivity and ionization
 
+The group additivity calculations for proteins are based on equations and data from @AH00, @DLH06, and @LD12.
+There are two major options for the calculations: whether to calculate properties for crystalline or aqueous groups, and, for the latter, whether to model the ionization of the sidechain and terminal groups as a function of pH (as well as *T* and *P*).
+By default, additivity of aqueous groups is used:
+```{r subcrt_LYSC_CHICK, message=FALSE}
+subcrt("LYSC_CHICK")$out[[1]][1:6, ]
+```
+
+<p>
+Let's compare experimental values of heat capacity of four proteins, from @PM90, with those calculated using group additivity.
+After dividing Privalov and Makhatadze's experimental values by the lengths of the proteins to get per-residue values, we convert those to calories, then plot them.
+The loop calculates the properties of each protein using group additivity, for aqueous and crystalline groups, then plots the per-residue values.
+```{r protein_Cp, fig.margin=TRUE, fig.width=4, fig.height=4, small.mar=TRUE, dpi=72, out.width="100%", echo=FALSE, message=FALSE, fig.cap='The heat capacity calculated by group additivity closely approximates experimental values for aqueous proteins. For a related figure showing the effects of ionization in the calculations, see <span style="color:blue">?ionize.aa</span>.', cache=TRUE, pngquant=pngquant}
+PM90 <- read.csv(system.file("extdata/cpetc/PM90.csv", package="CHNOSZ"))
+plength <- protein.length(colnames(PM90)[2:5])
+Cp_expt <- t(t(PM90[, 2:5]) / plength)
+matplot(PM90[, 1], convert(Cp_expt, "cal"), type="p", pch=19,
+        xlab=axis.label("T"), ylab=axis.label("Cp0"), ylim=c(28, 65))
+for(i in 1:4) {
+  pname <- colnames(Cp_expt)[i]
+  aq <- subcrt(pname, "aq", T=seq(0, 150))$out[[1]]
+  cr <- subcrt(pname, "cr", T=seq(0, 150))$out[[1]]
+  lines(aq$T, aq$Cp/plength[i], col=i)
+  lines(cr$T, cr$Cp/plength[i], col=i, lty=2)
+}
+legend("right", legend=colnames(Cp_expt),
+       col=1:4, pch=19, lty=1, bty="n", cex=0.9)
+legend("bottomright", legend=c("experimental", "calculated (aq)",
+       "calculated (cr)"), lty=c(NA, 1, 2), pch=c(19, NA, NA), bty="n")
+```
+```{r protein_Cp, eval=FALSE}
+```
+
+<p>
+Note that `subcrt()` has no provision for protein ionization.
+Instead, ionization is handled via `affinity()`, which calls `ionize.aa()` if a charged species is in the basis.
+```{marginfigure}
+Whether to calculate properties using aqueous or crystalline groups is determined by the value of `thermo\$opt\$state`; if it is changed from its default of `aq` to `cr`, no ionization is possible.
+```
+The following plot shows the calculated affinity of reaction between nonionized proteins and their ionized forms as a function of pH.
+The affinity is always positive, representing the strong energetic drive for ionization of proteins in aqueous solution.
+The degree of ionization of amino and carboxyl groups increase at low and high pH, respectively, accounting for the U-shaped lines.
+```{r protein_ionization, fig.margin=TRUE, fig.width=4, fig.height=4, small.mar=TRUE, dpi=72, out.width="100%", echo=FALSE, results="hide", message=FALSE, fig.cap='Affinity of ionization of proteins. See <span style="color:blue">demo(ionize)</span> for ionization properties calculated as a function of temperature and pH.', cache=TRUE, pngquant=pngquant}
+ip <- iprotein(c("CYC_BOVIN", "LYSC_CHICK", "MYG_PHYCA", "RNAS1_BOVIN"))
+basis("CHNOS+")
+a_ion <- affinity(pH=c(0, 14), iprotein=ip)
+basis("CHNOS")
+a_nonion <- affinity(iprotein=ip)
+plot(c(0, 14), c(50, 300), xlab="pH", ylab=axis.label("A"), type="n")
+for(i in 1:4) {
+  A_ion <- as.numeric(a_ion$values[[i]])
+  A_nonion <- as.numeric(a_nonion$values[[i]])
+  lines(a_ion$vals[[1]], A_ion - A_nonion, col=i)
+}
+legend("topright", legend=a_ion$species$name,
+       col=1:4, lty=1, bty="n", cex=0.9)
+```
+We calculate the affinities for the same four proteins, using both charged and uncharged sets of basis species to activate and suppress the ionization calculations.
+The ionized calculation returns a series of values (as a function of pH), but there is only one value of affinity returned by the nonionized calculation, so we need to use `as.numeric()` to avoid subtracting non-conformable arrays:
+```{r protein_ionization, eval=FALSE}
+```
+
+Above, we used the `iprotein` argument of `affinity()` to specify the proteins in the calculation, using their indices as returned by `iprotein()`.
+```{marginfigure}
+The `iprotein` index refers to the rownumber of `thermo\$protein`; this is distinct from the `ispecies` index, which refers to the rownumber of `thermo\$species`.
+```
+That approach utilizes some optimizations that can be realized due group additivity, and is useful for calculations involving many proteins.
+An alternative, but slower, approach is to identify the proteins to `species()`; this should produce results that are equivalent to using the `iprotein` argument:
+```{r species_protein, message=FALSE}
+species(c("CYC_BOVIN", "LYSC_CHICK", "MYG_PHYCA", "RNAS1_BOVIN"))
+a_nonion2 <- affinity()
+unlist(a_nonion2$values)
+unlist(a_nonion$values)
+```
+
 ## Compositional analysis (ZC)
 
+## Normalizing for different lengths
+
+## Adding amino acid data
+
 # Data options
 
 ## Source of data: browse.refs()
@@ -954,6 +1042,6 @@
 
 # Functions outside of the main workflow
 
-transfer, wjd, eqdata, RH2obigt, EOSregress
+transfer, wjd, eqdata, RH2obigt, EOSregress, anim.
 
 Gibbs energy minimization with amino acids: [Cob13]?

Modified: pkg/CHNOSZ/vignettes/vig.bib
===================================================================
--- pkg/CHNOSZ/vignettes/vig.bib	2017-02-09 12:58:00 UTC (rev 137)
+++ pkg/CHNOSZ/vignettes/vig.bib	2017-02-09 16:49:54 UTC (rev 138)
@@ -11,6 +11,17 @@
   doi       = {10.1149/1.1344532},
 }
 
+ at Article{AH00,
+  author    = {Amend, Jan P. and Helgeson, Harold C.},
+  journal   = {Biophysical Chemistry},
+  title     = {{C}alculation of the standard molal thermodynamic properties of aqueous biomolecules at elevated temperatures and pressures. {II}. {U}nfolded proteins},
+  year      = {2000},
+  volume    = {84},
+  number    = {2},
+  pages     = {105--136},
+  doi       = {10.1016/S0301-4622(00)00116-2},
+}
+
 @Book{BPJ85,
   author    = {Bard, A. J. and Parsons, R. and Jordan, J.},
   publisher = {M. Dekker},
@@ -188,14 +199,16 @@
   z9        = {41},
 }
 
- at Book{Pou49,
-  author    = {Pourbaix, Marcel J. N.},
-  publisher = {Edward Arnold \& Co.},
-  title     = {Thermodynamics of Dilute Aqueous Solutions},
-  year      = {1949},
-  address   = {London},
-  pages     = {136},
-  url       = {http://www.worldcat.org/oclc/1356445},
+ at Article{PM90,
+  author     = {Privalov, P. L. and Makhatadze, G. I.},
+  journal    = {Journal of Molecular Biology},
+  title      = {Heat capacity of proteins. {II}. {P}artial molar heat capacity of the unfolded polypeptide chain of proteins: {P}rotein unfolding effects},
+  year       = {1990},
+  volume     = {213},
+  number     = {2},
+  pages      = {385--391},
+  doi        = {10.1016/S0022-2836(05)80198-6},
+  size       = {7 p.},
 }
 
 @Article{SB01,



More information about the CHNOSZ-commits mailing list