[CHNOSZ-commits] r7 - in pkg: . R inst inst/tests man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sun Sep 2 17:11:09 CEST 2012


Author: jedick
Date: 2012-09-02 17:11:09 +0200 (Sun, 02 Sep 2012)
New Revision: 7

Modified:
   pkg/DESCRIPTION
   pkg/R/iprotein.R
   pkg/R/subcrt.R
   pkg/R/util.data.R
   pkg/inst/NEWS
   pkg/inst/tests/test-subcrt.R
   pkg/inst/tests/test-util.R
   pkg/inst/tests/test-util.data.R
   pkg/man/eos.Rd
   pkg/man/iprotein.Rd
   pkg/man/util.data.Rd
   pkg/man/util.formula.Rd
Log:
add.obigt() keeps existing species indices; add.protein() can 'print.existing' proteins; minor doc updates


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/DESCRIPTION	2012-09-02 15:11:09 UTC (rev 7)
@@ -1,7 +1,7 @@
 Package: CHNOSZ
+Version: 0.9-7.98
+Date: 2012-09-02
 Title: Chemical Thermodynamics and Activity Diagrams
-Version: 0.9-7.98
-Date: 2012-08-29
 Author: Jeffrey M. Dick
 Maintainer: Jeffrey M. Dick <jmdick at asu.edu>
 Depends: R (>= 2.10.0), utils

Modified: pkg/R/iprotein.R
===================================================================
--- pkg/R/iprotein.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/R/iprotein.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -205,7 +205,7 @@
   return(aa)
 }
 
-add.protein <- function(aa) {
+add.protein <- function(aa, print.existing=FALSE) {
   # add a properly constructed data frame of 
   # amino acid counts to thermo$protein
   if(!identical(colnames(aa), colnames(thermo$protein))) 
@@ -221,5 +221,9 @@
   ip <- iprotein(po)
   # make some noise
   msgout("add.protein: added ", nrow(aa)-sum(ipdup), " of ", nrow(aa), " proteins\n")
+  if(!all(is.na(ipdup)) & print.existing) {
+    potext <- paste(aa$protein[ipdup], aa$organism[ipdup], sep="_", collapse=" ")
+    msgout("add.protein: skipped existing ", potext, "\n")
+  }
   return(ip)
 }

Modified: pkg/R/subcrt.R
===================================================================
--- pkg/R/subcrt.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/R/subcrt.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -235,9 +235,11 @@
           if(identical(species,b.species) & identical(state,b.state))
             msgout("subcrt: balanced reaction, but it is a non-reaction; restarting...\n")
           else msgout('subcrt: adding missing composition from basis definition and restarting...\n')
-          return(subcrt(species=c(species,tb$ispecies[match(colnames(bc),rownames(tb))]),
-            coeff=c(coeff,as.numeric(bc[1,])),state=c(state,b.state),property=property,
-            T=outvert(T,"K"),P=P,grid=grid,convert=convert,logact=logact,exceed.Ttr=FALSE))
+          newspecies <- c(species, tb$ispecies[match(colnames(bc), rownames(tb))])
+          newcoeff <- c(coeff, as.numeric(bc[1, ]))
+          newstate <- c(state, b.state)
+          return(subcrt(species=newspecies, coeff=newcoeff, state=newstate,
+            property=property, T=outvert(T, "K"), P=P, grid=grid, convert=convert, logact=logact, exceed.Ttr=FALSE))
         } else if(identical(action.unbalanced,'warn')) 
             warning(paste('reaction was unbalanced, missing', as.chemical.formula(miss)),call.=FALSE)
       } else {

Modified: pkg/R/util.data.R
===================================================================
--- pkg/R/util.data.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/R/util.data.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -100,12 +100,10 @@
   # check if the file is compatible with thermo$obigt
   tr <- try(rbind(to1,to2),silent=TRUE)
   if(identical(class(tr),'try-error')) stop(paste(file,"is not compatible with thermo$obigt data table."))
-  # identify duplicated
-  idup1 <- which(id1 %in% id2)
-  idup2 <- which(id2 %in% id1)
-  ndup <- length(idup2)
-  nnew <- nrow(to2) - ndup
-  iadd <- 1:nrow(to2)
+  # match the new species to existing ones
+  does.exist <- id2 %in% id1
+  ispecies.exist <- na.omit(match(id2, id1))
+  nexist <- sum(does.exist)
   # convert from J if necessary
   if(tolower(E.units)=="j") {
     # loop over each row
@@ -120,22 +118,31 @@
       to2[i,icol] <- convert(to2[i,icol],"cal")
     }
   }
+  # keep track of the species we've added
+  inew <- numeric()
   if(force) {
-    # drop entries from original
-    if(length(idup1) > 0) to1 <- to1[-idup1,]
+    # replace existing entries
+    if(nexist > 0) {
+      to1[ispecies.exist, ] <- to2[does.exist, ]
+      to2 <- to2[!does.exist, ]
+      inew <- c(inew, ispecies.exist)
+    }
   } else {
-    if(length(idup2) > 0) iadd <- iadd[-idup2]
-    ndup <- 0
+    # ignore any new entries that already exist
+    to2 <- to2[!does.exist, ]
+    nexist <- 0
   }
-  inew <- numeric()
-  if(length(iadd) > 0) {
-    inew <- nrow(to1) + 1:length(iadd)
-    to1 <- rbind(to1,to2[iadd,])
+  # add new entries
+  if(nrow(to2) > 0) {
+    to1 <- rbind(to1, to2)
+    inew <- c(inew, (length(id1)+1):nrow(to1))
   }
+  # commit the change
   thermo$obigt <<- to1
   rownames(thermo$obigt) <<- 1:nrow(thermo$obigt)
-  msgout("add.obigt: added ", length(iadd), " of ", nrow(to2), " species", 
-    " (", ndup, " replacements, ", nnew, " new, units = ", E.units, ") from ", file, "\n")
+  msgout("add.obigt: file has ", length(does.exist), " rows; made ", 
+    nexist, " replacements, ", nrow(to2), " additions, units = ", E.units, "\n")
+  msgout("add.obigt: file was ", file, "\n")
   msgout("add.obigt: use data(thermo) to restore default database\n")
   return(invisible(inew))
 }

Modified: pkg/inst/NEWS
===================================================================
--- pkg/inst/NEWS	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/inst/NEWS	2012-09-02 15:11:09 UTC (rev 7)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 0.9-7.98 (2012-06-26)
+CHANGES IN CHNOSZ 0.9-7.98 (2012-09-02)
 ---------------------------------------
 
 SIGNIFICANT USER-VISIBLE CHANGES:
@@ -145,6 +145,9 @@
   called, allowing the parameters to be restored after running examples
   that change them.
 
+- Add protein.equil() for step-by-step calculation of chemical
+  activities of proteins in metastable equilibrium.
+
 DATA UPDATES:
 
 - Add 148 liquid and 148 crystalline acyclic isoprenoids, polycyclic 

Modified: pkg/inst/tests/test-subcrt.R
===================================================================
--- pkg/inst/tests/test-subcrt.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/inst/tests/test-subcrt.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -3,10 +3,19 @@
 # delete the basis definition in case there is one
 basis(delete=TRUE)
 
-test_that("unbalanced reactions give an error", {
+test_that("unbalanced reactions give a warning", {
   expect_warning(subcrt(c("glucose", "ethanol"), c(-1, 3)), "reaction was unbalanced, missing H-6O3")
 })
 
+test_that("unbalanced reactions are balanced given sufficient basis species", {
+  basis("CHNOS")
+  # since it doesn't alter the species indices of the basis species, this can come second ...
+  add.obigt()
+  s <- subcrt(c("malic acid", "citric acid"), c(-1, 1))
+  expect_equal(s$reaction$coeff, c(-1, 1, -2, -1, 1.5))
+  expect_equal(s$reaction$name, c("malic acid", "citric acid", "CO2", "water", "oxygen"))
+})
+
 test_that("phase transitions of minerals give expected messages and results", {
   iacanthite <- info("acanthite", "cr2")
   expect_message(subcrt(iacanthite), "subcrt: some points below transition temperature for acanthite cr2 \\(using NA for G\\)")

Modified: pkg/inst/tests/test-util.R
===================================================================
--- pkg/inst/tests/test-util.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/inst/tests/test-util.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -22,6 +22,16 @@
   # done through obigt2eos
   coe <- obigt2eos(thermo$obigt[ic,], "aq", fixGHS=TRUE)
   expect_equal(coe$S, GHS[[3]])
+  ## mass and entropy of elements in chemical formulas
+  # the "-1" is a single negative charge, the electron
+  testform <- c("CH4", "H2O", "-1")
+  testmass <- mass(testform)
+  testent <- entropy(testform)
+  expect_equal(testmass, c(16.04276, 18.01528, 0))
+  expect_equal(testent, c(63.83843212237, 55.74952198853, 15.61663479924))
+  # another way to calculate the entropy of the elements in H2O
+  testGHS <- GHS("H2O", G=0, H=0)
+  expect_equal(as.numeric(testGHS[1, 3]), testent[2])
 })
   
 

Modified: pkg/inst/tests/test-util.data.R
===================================================================
--- pkg/inst/tests/test-util.data.R	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/inst/tests/test-util.data.R	2012-09-02 15:11:09 UTC (rev 7)
@@ -34,6 +34,20 @@
   expect_true(max(abs(obigt.calc$a3.c - obigt.ref$a3.c)) < 1e-14)
 })
 
+test_that("add.obigt() replaces existing entries without changing species index", {
+  # store the original species index of citric acid
+  icitric <- info("citric acid", "aq")
+  # add supplemental database - includes citric acid
+  file <- system.file("extdata/thermo/OBIGT-2.csv", package="CHNOSZ")
+  isp <- add.obigt(file, force=TRUE)
+  # species index of citric acid should not have changed
+  expect_equal(info("citric acid", "aq"), icitric)
+  # check that names of species modified are same as in file
+  newdat <- read.csv(file, stringsAsFactors=FALSE)
+  # the order isn't guaranteed ... just make sure they're all there
+  expect_true(all(newdat$name %in% thermo$obigt$name[isp]))
+})
+
 # reference
 
 # Richard, L. and Helgeson, H. C. (1998) Calculation of the thermodynamic properties at elevated 

Modified: pkg/man/eos.Rd
===================================================================
--- pkg/man/eos.Rd	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/man/eos.Rd	2012-09-02 15:11:09 UTC (rev 7)
@@ -88,6 +88,8 @@
 
   Maier, C. G. and Kelley, K. K. (1932) An equation for the representation of high-temperature heat content data. \emph{J. Am. Chem. Soc.} \bold{54}, 3243--3246. \url{http://dx.doi.org/10.1021/ja01347a029}
 
+  Robie, R. A. and Hemingway, B. S. (1995) \emph{Thermodynamic Properties of Minerals and Related Substances at 298.15 K and 1 Bar (\eqn{10^5} Pascals) Pressure and at Higher Temperatures}. U. S. Geol. Surv., Bull. 2131, 461 p. \url{http://www.worldcat.org/oclc/32590140}
+
   Shock, E. L. and Helgeson, H. C. (1988) Calculation of the thermodynamic and transport properties of aqueous species at high pressures and temperatures: Correlation algorithms for ionic species and equation of state predictions to 5 kb and 1000\eqn{^{\circ}}{degrees }C. \emph{Geochim. Cosmochim. Acta} \bold{52}, 2009--2036. \url{http://dx.doi.org/10.1016/0016-7037(88)90181-0}
   
   Shock, E. L., Oelkers, E. H., Johnson, J. W., Sverjensky, D. A. and Helgeson, H. C. (1992) Calculation of the thermodynamic properties of aqueous species at high pressures and temperatures: Effective electrostatic radii, dissociation constants and standard partial molal properties to 1000 \eqn{^{\circ}}{degrees }C and 5 kbar. \emph{J. Chem. Soc. Faraday Trans.} \bold{88}, 803--826. \url{http://dx.doi.org/10.1039/FT9928800803}

Modified: pkg/man/iprotein.Rd
===================================================================
--- pkg/man/iprotein.Rd	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/man/iprotein.Rd	2012-09-02 15:11:09 UTC (rev 7)
@@ -20,7 +20,7 @@
   dl.aa(protein)
   aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
   read.aa(file = "protein.csv")
-  add.protein(aa)
+  add.protein(aa, print.existing=FALSE)
 }
 
 \arguments{
@@ -33,6 +33,7 @@
   \item{abundance}{numeric, abundances of proteins}
   \item{average}{logical, return the weighted average of amino acid counts?}
   \item{file}{character, path to file with amino acid compositions}
+  \item{print.existing}{logical, print a message identifying existing proteins that were not added?}
 }
 
 \details{

Modified: pkg/man/util.data.Rd
===================================================================
--- pkg/man/util.data.Rd	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/man/util.data.Rd	2012-09-02 15:11:09 UTC (rev 7)
@@ -64,7 +64,7 @@
 
   \code{obigt2eos} takes a data frame in the format of \code{thermo$obigt} of one or more rows, removes scaling factors from equations-of-state parameters, and applies new column names depending on the \code{state}. This function is used by both \code{\link{info}} and \code{\link{subcrt}} when retrieving entries from the thermodynamic database. If \code{fixGHS} is TRUE a missing one of G, H or S for any species is calculated from the other two and the chemical formula of the species (used by \code{subcrt} when retrieving database entries).
 
-  \code{RH2obigt} implements a group additivity algorithm for standard molal thermodynamic properties and equations of state parameters of crystalline and liquid organic molecules from Richard and Helgeson, 1998. The names of the \code{compound}s and their physical \code{state} are searched for in the indicated \code{file}, that also contains chemical formulas and group stoichiometries; the names of the groups are stored in the column names of this file, and must be present in \code{\link{thermo}$obigt}. The default \code{file} includes data taken from Table 15 of Richard and Helgeson, 1998 for high molecular weight compounds in \samp{cr}ystalline and \samp{liq}uid states. An error is produced if any of the \code{compound}-\code{state} combinations is not found in the \code{file}, if any of the group names for a given \code{compound}-\code{state} combination is not found in \code{thermo$obigt}, or if the chemical formula calculated from group additivity (with the aid of \code{\link{i2A}} and \code{\link{as.chemical.formula}}) is not identical to that listed in the \code{file}.
+  \code{RH2obigt} implements a group additivity algorithm for standard molal thermodynamic properties and equations of state parameters of crystalline and liquid organic molecules from Richard and Helgeson, 1998. The names of the \code{compound}s and their physical \code{state} are searched for in the indicated \code{file}, that also contains chemical formulas and group stoichiometries; the names of the groups are stored in the column names of this file, and must be present in \code{\link{thermo}$obigt}. The default \code{file} (\code{\link{extdata}/thermo/RH98_Table15.csv}) includes data taken from Table 15 of Richard and Helgeson, 1998 for high molecular weight compounds in \samp{cr}ystalline and \samp{liq}uid states. An error is produced if any of the \code{compound}-\code{state} combinations is not found in the \code{file}, if any of the group names for a given \code{compound}-\code{state} combination is not found in \code{thermo$obigt}, or if the chemical formula calculated from group additivity (with the aid of \code{\link{i2A}} and \code{\link{as.chemical.formula}}) is not identical to that listed in the \code{file}.
 
 }
 

Modified: pkg/man/util.formula.Rd
===================================================================
--- pkg/man/util.formula.Rd	2012-08-29 14:18:01 UTC (rev 6)
+++ pkg/man/util.formula.Rd	2012-09-02 15:11:09 UTC (rev 7)
@@ -64,21 +64,21 @@
 
 \examples{
 \dontshow{data(thermo)}
-## mass and entropy of elements in chemical formulas
-# the "-1" is a single negative charge, the electron
-testform <- c("CH4", "H2O", "-1")
-testmass <- mass(testform)
-testent <- entropy(testform)
-stopifnot(all.equal(testmass, c(16.04276, 18.01528, 0)))
-stopifnot(all.equal(testent, c(63.83843212237, 
-  55.74952198853, 15.61663479924)))
-# another way to calculate the entropy of the elements in H2O
-(testGHS <- GHS("H2O", G=0, H=0))
-stopifnot(all.equal(as.numeric(testGHS[1, 3]), testent[2]))
+## mass and entropy from chemical formulas
+mass("H2O")
+entropy("H2O")
+mass("-1")  # electron
+entropy("-1")
 
+## three ways to get the formula of alanine
+iA <- info("alanine")
+info(iA)$formula
+as.chemical.formula(makeup(iA))
+get.formula(iA)
+
 ## converting among Gibbs energy, enthalpy, entropy
 # calculate the value of G from H and S
-GHS("H2O", H=water("H"), S=water("S"))
+GHS("H2O", H=water("H"), S=water("S"))[1, ]
 # that not quite equal to the value from water("G");
 # probably using different entropies of the elements
 



More information about the CHNOSZ-commits mailing list