[CHNOSZ-commits] r39 - in pkg/CHNOSZ: . R inst inst/tests man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Jan 26 06:26:37 CET 2013


Author: jedick
Date: 2013-01-26 06:26:34 +0100 (Sat, 26 Jan 2013)
New Revision: 39

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/equilibrate.R
   pkg/CHNOSZ/R/hkf.R
   pkg/CHNOSZ/R/makeup.R
   pkg/CHNOSZ/R/util.data.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/inst/tests/test-thermo.R
   pkg/CHNOSZ/man/affinity.Rd
   pkg/CHNOSZ/man/diagram.Rd
   pkg/CHNOSZ/man/equilibrate.Rd
   pkg/CHNOSZ/man/iprotein.Rd
   pkg/CHNOSZ/man/protein.info.Rd
   pkg/CHNOSZ/man/util.data.Rd
Log:
make calling mod.obigt() easier with just a formula and G of a species


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/DESCRIPTION	2013-01-26 05:26:34 UTC (rev 39)
@@ -1,6 +1,6 @@
-Date: 2013-01-10
+Date: 2013-01-26
 Package: CHNOSZ
-Version: 0.9-9.1
+Version: 0.9-9.2
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey M. Dick
 Maintainer: Jeffrey M. Dick <jmdick at asu.edu>

Modified: pkg/CHNOSZ/R/equilibrate.R
===================================================================
--- pkg/CHNOSZ/R/equilibrate.R	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/R/equilibrate.R	2013-01-26 05:26:34 UTC (rev 39)
@@ -3,7 +3,7 @@
 # of species in (metastable) equilibrium
 
 equilibrate <- function(aout, balance=NULL, loga.balance=NULL, 
-  normalize=FALSE, ispecies=1:length(aout$values)) {
+  ispecies=1:length(aout$values), normalize=FALSE, stay.normal=FALSE) {
   ### set up calculation of equilibrium activities of species from the affinities 
   ### of their formation reactions from basis species at known activities
   ### split from diagram() 20120925 jmd
@@ -39,10 +39,8 @@
     msgout(paste("equilibrate: logarithm of total", balance$description, "is", loga.balance, "\n"))
   }
   ## normalize -- normalize the molar formula by the balance coefficients
-#  # this is the default for systems of proteins as of 20091119
   m.balance <- n.balance
   isprotein <- grepl("_", as.character(aout$species$name))
-#  if(missing(normalize) & all(isprotein)) normalize <- TRUE
   if(normalize) {
     if(any(n.balance < 0)) stop("one or more negative balancing coefficients prohibit using normalized molar formulas")
     n.balance <- rep(1, nspecies)
@@ -59,7 +57,7 @@
   if(all(n.balance==1)) loga.equil <- equil.boltzmann(Astar, n.balance, loga.balance)
   else loga.equil <- equil.reaction(Astar, n.balance, loga.balance)
   ## if we normalized the formulas, get back to activities to species
-  if(normalize) {
+  if(normalize & !stay.normal) {
     loga.equil <- lapply(1:nspecies, function(i) {
       loga.equil[[i]] - log10(m.balance[i])
     })

Modified: pkg/CHNOSZ/R/hkf.R
===================================================================
--- pkg/CHNOSZ/R/hkf.R	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/R/hkf.R	2013-01-26 05:26:34 UTC (rev 39)
@@ -105,6 +105,8 @@
           p.a <- EOS$a1*(P-Pr) + EOS$a2*log((Psi+P)/(Psi+Pr)) + 
             (EOS$a3*(P-Pr) + EOS$a4*log((Psi+P)/(Psi+Pr)))/(T-Theta)
           p <- p.c + p.a
+          # at Tr,Pr, if the origination contribution is not NA, ensure the solvation contribution is 0, not NA
+          if(!is.na(GHS$G)) p[T==Tr & P==Pr] <- 0
         # nonsolvation cp v kt e equations
         } else if(prop=='cp') {
           p <- EOS$c1 + EOS$c2 * ( T - Theta ) ^ (-2)        
@@ -122,8 +124,11 @@
       }
       if( icontrib=="s") {
         # solvation ghs equations
-        if(prop=="g") 
+        if(prop=="g") {
           p <- -omega.PT*(ZBorn+1) + omega*(ZBorn.PrTr+1) + omega*H2O.PrTr$Y*(T-Tr)
+          # at Tr,Pr, if the origination contribution is not NA, ensure the solvation contribution is 0, not NA
+          if(!is.na(GHS$G)) p[T==Tr & P==Pr] <- 0
+        }
         if(prop=="h") 
           p <- -omega.PT*(ZBorn+1) + omega.PT*T*H2O.PT$Y + T*(ZBorn+1)*dwdT +
                  omega*(ZBorn.PrTr+1) - omega*Tr*H2O.PrTr$Y
@@ -141,15 +146,18 @@
       }
       if( icontrib=='o') {
         # origination ghs equations
-        if(prop=='g') p <- GHS$G - GHS$S * (T-Tr)
+        if(prop=='g') {
+          p <- GHS$G - GHS$S * (T-Tr)
+          # don't inherit NA from GHS$S at Tr
+          p[T==Tr] <- GHS$G
+        }
         else if(prop=='h') p <- GHS$H
         else if(prop=='s') p <- GHS$S
         # origination eos equations: senseless
         else if(prop %in% tolower(props)) p <- 0 * T
       }
-      p <- rep(p,length.out=length(hkf.p))
-      ip <- 1:length(p)
-      hkf.p[ip] <- hkf.p[ip] + p[ip]
+      # accumulate the contribution
+      hkf.p <- hkf.p + p
     }
     wnew <- data.frame(hkf.p)
     if(i>1) w <- cbind(w,wnew) else w <- wnew

Modified: pkg/CHNOSZ/R/makeup.R
===================================================================
--- pkg/CHNOSZ/R/makeup.R	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/R/makeup.R	2013-01-26 05:26:34 UTC (rev 39)
@@ -145,12 +145,7 @@
   # stop if it doesn't look like a chemical formula 
   validateRegex <- paste("^(", elementRegex, ")+$", sep="")
   if(length(grep(validateRegex, formula)) == 0)
-    stop(paste("'",formula,"' is not a simple chemical formula; ",
-    "the formula must start with an elemental symbol, and ",
-    "all elemental symbols must start with an uppercase letter ",
-    "and be followed by another elemental symbol, a number ",
-    "(possibly fractional, possibly signed), or nothing. ",
-    sep="", collapse=""))
+    stop(paste("'",formula,"' is not a simple chemical formula", sep="", collapse="\n"))
   # where to put the output
   element <- character()
   count <- numeric()

Modified: pkg/CHNOSZ/R/util.data.R
===================================================================
--- pkg/CHNOSZ/R/util.data.R	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/R/util.data.R	2013-01-26 05:26:34 UTC (rev 39)
@@ -56,8 +56,17 @@
     newrows[] <- NA
     # put in a default state
     newrows$state <- thermo$opt$state
+    # the formula defaults to the name
+    newrows$formula <- args$name[inew]
     # fill in the columns
     newrows[, icol] <- args[inew, ]
+    # now check the formulas
+    e <- tryCatch(makeup(newrows$formula), error=function(e) e)
+    if(inherits(e, "error")) {
+      warning("please supply a valid chemical formula as the species name or in the 'formula' argument")
+      # transmit the error from makeup
+      stop(e)
+    }
     # assign to thermo$obigt
     thermo$obigt <<- rbind(thermo$obigt, newrows)
     rownames(thermo$obigt) <<- NULL

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/inst/NEWS	2013-01-26 05:26:34 UTC (rev 39)
@@ -1,8 +1,17 @@
-CHANGES IN CHNOSZ 0.9-9.1 (2013-01-10)
+CHANGES IN CHNOSZ 0.9-9.2 (2013-01-26)
 --------------------------------------
 
 - Fix calculation of free energy derivative in wjd().
 
+- Add 'stay.normal' argument to equilibrate().
+
+- If obigt$G is available, hkf() returns this value, not NA, at Tr, Pr.
+
+- mod.obigt() defaults to taking chemical formula from the species name,
+  and checks for validity of formula.
+
+- Add example for LYSC_CHICK to protein.info.Rd.
+
 CHANGES IN CHNOSZ 0.9-9 (2013-01-01)
 ------------------------------------
 

Modified: pkg/CHNOSZ/inst/tests/test-thermo.R
===================================================================
--- pkg/CHNOSZ/inst/tests/test-thermo.R	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/inst/tests/test-thermo.R	2013-01-26 05:26:34 UTC (rev 39)
@@ -24,13 +24,18 @@
 
 test_that("minimal usage of mod.obigt() creates usable data entries", {
   # we need at least a name and some property
-  expect_error(mod.obigt(list(name="test")))
+  expect_error(mod.obigt("test"), "species name and a property")
+  # a valid formula is needed
+  expect_warning(mod.obigt("test", date=today()), "please supply a valid chemical formula")
   # the default state is aq
-  expect_message(itest <- mod.obigt(list(name="test", date=today())), "added test\\(aq\\)")
+  expect_message(itest <- mod.obigt("test", formula="Z0", date=today()), "added test\\(aq\\)")
   # we should get NA values of G for a species with NA properties 
   expect_true(all(is.na(subcrt(itest)$out[[1]]$G)))
+  # a single value of G comes through to subcrt
+  mod.obigt("test", G=100)
+  expect_equal(subcrt("test", T=25, P=1)$out[[1]]$G, 100)
   # values for Cp and c1 integrate to the same values of G
-  G.Cp <- subcrt(mod.obigt(list(name="test", formula="C0", G=0, S=0, Cp=100)))$out[[1]]$G
-  G.c1 <- subcrt(mod.obigt(list(name="test", formula="C0", G=0, S=0, c1=100)))$out[[1]]$G
+  G.Cp <- subcrt(mod.obigt(list(name="test", S=0, Cp=100)))$out[[1]]$G
+  G.c1 <- subcrt(mod.obigt(list(name="test", S=0, c1=100)))$out[[1]]$G
   expect_equal(G.Cp, G.c1)
 })

Modified: pkg/CHNOSZ/man/affinity.Rd
===================================================================
--- pkg/CHNOSZ/man/affinity.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/affinity.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -38,7 +38,7 @@
 The \code{iprotein} and \code{loga.protein} arguments can be used to compute the chemical affinities of formation reactions of proteins that are not in the current \code{\link{species}} definition.
 This approach can be utilized in order to calculate the properties of many proteins in a fraction of the time it would take to calculate them individually.
 The appropriate \code{\link{basis}} species still must be defined prior to calling \code{affinity}.
-\code{iprotein} contains indices of desired proteins in \code{\link{thermo}$protein}; \code{affinity} adds to the species list the amino acid residues and protein backbone group then calculate the properties of the reactions for the residues (including ionization effects), and adds them together to get those of the indicated proteins.
+\code{iprotein} contains indices of desired proteins in \code{\link{thermo}$protein}; \code{affinity} adds to the species list the amino acid residues and and terminal H2O group (indicated by \dQuote{RESIDUE} in \code{thermo$protein}) then calculates the properties of the reactions for the residues and terminal group, including ionization effects, and adds them together to get those of the indicated proteins.
 
   In CHNOSZ version 0.9, \code{energy} gained a new argument \samp{transect} which is set to TRUE by \code{energy.args} when the length(s) of the variables is(are) greater than three. In this mode of operation, instead of performing the calculations on an \eqn{n}{n}-dimensional grid, the affinities are calculated on an \eqn{n}{n}-dimensional transect through chemical potential (possibly including T and/or P) space. 
 

Modified: pkg/CHNOSZ/man/diagram.Rd
===================================================================
--- pkg/CHNOSZ/man/diagram.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/diagram.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -83,7 +83,7 @@
 By default, \code{\link{heat.colors}} are used to fill the predominance fields in diagrams on the screen plot device.
 The style of the boundary lines on 2-D diagrams can be altered by supplying one or more non-zero integers in \code{dotted}, which indicates the fraction of line segments to omit; a value of \samp{1} or NULL for \code{dotted} has the effect of not drawing the boundary lines.
 
-For all diagrams, the \code{names} of the species and theirs colors in \code{col.names} can be changed, as can \code{cex}, \code{cex.names}, and \code{cex.axis} to adjust the overall character expansion factors (see \code{\link{par}}) and those of the species names and axis labels.
+For all diagrams, the \code{names} of the species and their colors in \code{col.names} can be changed, as can \code{cex}, \code{cex.names}, and \code{cex.axis} to adjust the overall character expansion factors (see \code{\link{par}}) and those of the species names and axis labels.
 The x- and y-axis labels are automatically generated unless they are supplied in \code{xlab} and \code{ylab}. 
 A new plot is started unless \code{add} is TRUE.
 If \code{plot.it} is FALSE, no plot will be generated but all the intermediate computations will be performed and the results returned.

Modified: pkg/CHNOSZ/man/equilibrate.Rd
===================================================================
--- pkg/CHNOSZ/man/equilibrate.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/equilibrate.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -9,8 +9,8 @@
 }
 
 \usage{
-  equilibrate(aout, balance=NULL, loga.balance=NULL, normalize=FALSE,
-    ispecies=1:length(aout$values))
+  equilibrate(aout, balance=NULL, loga.balance=NULL,
+    ispecies=1:length(aout$values), normalize=FALSE, stay.normal=FALSE)
   balance(aout, balance=NULL)
   equil.boltzmann(Astar, n.balance, loga.balance)
   equil.reaction(Astar, n.balance, loga.balance)
@@ -19,8 +19,9 @@
 \arguments{
   \item{aout}{list, output from \code{\link{affinity}}}
   \item{balance}{character or numeric, how to balance the transformations}
+  \item{ispecies}{numeric, which species to include}
   \item{normalize}{logical, normalize the molar formulas of species by the balancing coefficients?}
-  \item{ispecies}{numeric, which species to include}
+  \item{stay.normal}{logical, report results for the normalized formulas?}
   \item{Astar}{numeric, affinities of formation reactions excluding species contribution}
   \item{n.balance}{numeric, number of moles of conserved component in the formation reactions of the species of interest}
   \item{loga.balance}{numeric, logarithm of total activity of balanced quantity}
@@ -53,6 +54,7 @@
 This operation is intended for systems of polymers, such as proteins, whose conventional formulas are much larger than the basis speices.
 The normalization also applies to the balancing coefficients, which as a result consist of \samp{1}s.
 \code{normalize} has the same effect as did \code{diagram(..., residue=TRUE)} in versions of CHNOSZ before 0.9-9.
+After normalization and equilibration, the equilibrium activities are then un-normalized (for the original formulas of the species), unless \code{stay.normal} is TRUE.
 
 \code{ispecies} can be supplied to identify a subset of the species to include in the calculation.
 

Modified: pkg/CHNOSZ/man/iprotein.Rd
===================================================================
--- pkg/CHNOSZ/man/iprotein.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/iprotein.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -85,7 +85,10 @@
 aa <- seq2aa("GAJU_HUMAN", "LAAGKVEDSD")
 ip <- add.protein(aa)
 stopifnot(protein.length(ip)==10)
+# the chemical formula of this peptide
 stopifnot(as.chemical.formula(protein.formula(ip))=="C41H69N11O18")
+# we can also calculate a formula without using add.protein
+as.chemical.formula(protein.formula(seq2aa("pentapeptide_test", "ANLSG")))
 
 \dontrun{
 # downloading information about a protein

Modified: pkg/CHNOSZ/man/protein.info.Rd
===================================================================
--- pkg/CHNOSZ/man/protein.info.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/protein.info.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -55,23 +55,49 @@
 }
 
 \seealso{
-  \code{\link{ionize.aa}} for an example that compares \code{MP90.cp} with heat capacities calculated in CHNOSZ at different temperatures and pHs. The primary function for interacting with the database of amino acid compositions of proteins is documented separately (\code{\link{iprotein}}) and examples of relative stability calculations can be found on the \code{\link{protein}} help page.
+  \code{\link{ionize.aa}} for an example that compares \code{MP90.cp} with heat capacities calculated in CHNOSZ at different temperatures and pHs. The functions for interacting with the database of amino acid compositions of proteins are documented at \code{\link{iprotein}}, and examples of relative stability calculations can be found on the \code{\link{protein}} help page.
 }
 
 \examples{\dontshow{data(thermo)}
-## protein length and formula
-# first ten proteins in thermo$protein
-protein.length(1:10)
-# these are all the same
+## example for chicken lysozyme C
+# index in thermo$protein
+ip <- iprotein("LYSC_CHICK")
+# amino acid composition
+ip2aa(ip)
+# length and chemical formula
+protein.length(ip)
+protein.formula(ip)
+# formula, Gibbs energy, average oxidation state of carbon
+protein.info(ip)
+# as above, now with charge and Gibbs energy of ionized protein at pH 7
+basis("CHNOS+")
+protein.info(ip)
+# group additivity for thermodynamic properties and HKF equation-of-state
+# parameters of non-ionized protein
+aa2eos(ip2aa(ip))
+# calculation of standard thermodynamic properties
+# (subcrt uses the species name, not ip)
+subcrt("LYSC_CHICK")
+# affinity calculation, protein identified by ip
+affinity(iprotein=ip)
+# affinity calculation, protein loaded as a species
+species("LYSC_CHICK")
+affinity()
+# NB: subcrt() only shows the properties of the non-ionized
+# protein, but affinity() uses the properties of the ionized
+# protein if the basis species have H+
+
+## these are all the same
 protein.formula("P53_PIG")
 protein.formula(iprotein("P53_PIG"))
 protein.formula(ip2aa(iprotein("P53_PIG")))
-# the formula of a pentapeptide, written in a line
-as.chemical.formula(protein.formula(seq2aa("a_test", "ANLSG")))
 
 ## steps in calculation of chemical activities of two proteins
 ## in metastable equilibrium, after Dick and Shock, 2011
 protein <- iprotein(c("CSG_METVO", "CSG_METJA"))
+# clear out amino acid residues loaded by the example above
+# ( in affinity(iprotein=ip) )
+data(thermo)
 # load supplemental database to use "old" [Met] sidechain group
 add.obigt()
 # set up the basis species to those used in DS11

Modified: pkg/CHNOSZ/man/util.data.Rd
===================================================================
--- pkg/CHNOSZ/man/util.data.Rd	2013-01-10 01:21:55 UTC (rev 38)
+++ pkg/CHNOSZ/man/util.data.Rd	2013-01-26 05:26:34 UTC (rev 39)
@@ -52,7 +52,9 @@
 \code{mod.obigt} changes one or more of the properties of species or adds species to the thermodynamic database.
 These changes are lost if you reload the database by calling \code{\link{data}(thermo)} or if you quit the \R session without saving it.
 The name of the species to add or change must be supplied as the first argument of \code{...} or as a named argument (named \samp{name}).
-When adding new species, a \samp{formula} should be included along with the values of any of the thermodynamic properties.
+When adding new species, a chemical formula should be included along with the values of any of the thermodynamic properties.
+The formula is taken from the \samp{formula} argument, or if that is missing, is taken to be the same as the \samp{name} of the species.
+An error results if the formula is not valid (i.e. can not be parsed by\code{\link{makeup}}).
 Additional arguments refer to the name of the property(s) to be updated and are matched to any part of compound column names in \code{\link{thermo}$obigt}, such as \samp{z} or \samp{T} in \samp{z.T}.
 Unless \samp{state} is specified as one of the properties, its value is taken from \code{thermo$opt$state}.
 When adding species, properties that are not specified become NA (except for \samp{state}).
@@ -99,8 +101,10 @@
 # produces a message about that
 info(ialanine)
 ## add a species
-inew <- mod.obigt("newspecies", formula="CHNOSZ", G=0, H=0, date=today())
-info(inew)
+iCl2O <- mod.obigt("Cl2O", G=20970)
+info(iCl2O)
+# add a species with a name that is distinct from the formula
+mod.obigt("buckminsterfullerene", formula="C60", state="cr")
 \dontrun{
 ## marked dontrun because they open pages in a browser
 # show the contents of thermo$refs



More information about the CHNOSZ-commits mailing list