[CHNOSZ-commits] r145 - in pkg/CHNOSZ: . R inst man vignettes

Sun Feb 12 10:43:41 CET 2017

Author: jedick
Date: 2017-02-12 10:43:41 +0100 (Sun, 12 Feb 2017)
New Revision: 145

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/revisit.R
   pkg/CHNOSZ/inst/CHECKLIST
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/man/objective.Rd
   pkg/CHNOSZ/man/revisit.Rd
   pkg/CHNOSZ/vignettes/anintro.Rmd
   pkg/CHNOSZ/vignettes/eos-regress.Rmd
   pkg/CHNOSZ/vignettes/vig.bib
Log:
anintro.Rmd: add optimization of chemical activities


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/DESCRIPTION	2017-02-12 09:43:41 UTC (rev 145)
@@ -1,6 +1,6 @@
 Date: 2017-02-12
 Package: CHNOSZ
-Version: 1.0.8-34
+Version: 1.0.8-35
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/R/revisit.R
===================================================================
--- pkg/CHNOSZ/R/revisit.R	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/R/revisit.R	2017-02-12 09:43:41 UTC (rev 145)
@@ -51,7 +51,7 @@
   col = par("fg"), yline = 2, ylim = NULL, cex = par("cex"),
   lwd = par("lwd"), mar = NULL, side = 1:4, xlim = NULL, labcex = 0.6,
   pch = 1, main = NULL, plot.it = NULL, add = FALSE, plot.optval = TRUE,
-  style.2D = "contour") {
+  style.2D = "contour", bg = par("bg")) {
   # given a list of logarithms of activities of species
   # (as vectors or matrices or higher dimensional arrays) 
   # calculate a diversity index or thermodynamic property 
@@ -158,6 +158,11 @@
     xrange <- range(xs)
   } else xs <- NA
 
+  # format the objective name if it's DGtr or DGinf
+  if(objective=="DGtr") objexpr <- expr.property("DGtr/2.303RT")
+  if(objective=="DGinf") objexpr <- expr.property("DGinf/2.303RT")
+  else objexpr <- objective
+
   # make plots and assemble return values
   if(nd==0) {
     # a 0-D plot
@@ -168,21 +173,24 @@
         qqline(loga1)
       } else if(any(grepl("a2", objargs))) {
         # plot the points for a referenced objective
-        ylab <- "loga1"
-        xlab <- "loga2"
+        xlab <- substitute(log~italic(a)[expt])
+        ylab <- substitute(log~italic(a)[calc])
         if(is.null(xlim)) xlim <- extendrange(loga2)
         if(is.null(ylim)) ylim <- extendrange(loga1)
-        plot(loga2, loga1, xlab=xlab, ylab=ylab, pch=pch, col=col, xlim=xlim, ylim=ylim, cex=cex)
+        # just initialize the plot here; add points below so that appear above lines
+        plot(loga2, loga1, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, type="n")
         # add a 1:1 line
         lines(range(loga2), range(loga2), col="grey")
-        # add a lowess line
+        # add a loess line
         if(!is.null(lwd)) {
           ls <- loess.smooth(loga2, loga1, family="gaussian")
           lines(ls$x, ls$y, col="red", lwd=lwd)
         }
+        # add points
+        points(loga2, loga1, pch=pch, col=col, cex=cex, bg=bg)
       } else plot.it <- FALSE
       # add a title
-      if(missing(main)) main <- paste(objective, "=", round(H,3)) 
+      if(missing(main)) main <- substitute(obj==H, list(obj=objexpr, H=round(H,3)))
       if(plot.it) title(main=main)
     }
   } else if(nd==1) {
@@ -195,12 +203,8 @@
     if(plot.it) {
       if(is.null(ylim)) ylim <- extendrange(H, f=0.075)
       if(is.null(xlim)) xlim <- xrange
-      # format the objective name if it's DGtr or DGinf
-      if(objective=="DGtr") ylab <- expr.property("DGtr/2.303RT")
-      if(objective=="DGinf") ylab <- expr.property("DGinf/2.303RT")
-      else ylab <- objective
       if(!add) thermo.plot.new(xlim=xlim, ylim=ylim, xlab=axis.label(xname),
-        ylab=ylab, yline=yline, cex=cex, lwd=lwd, mar=mar, side=side)
+        ylab=objexpr, yline=yline, cex=cex, lwd=lwd, mar=mar, side=side)
       # plot the values
       lines(xs, c(H), col=col)
       # indicate the optimal value

Modified: pkg/CHNOSZ/inst/CHECKLIST
===================================================================
--- pkg/CHNOSZ/inst/CHECKLIST	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/inst/CHECKLIST	2017-02-12 09:43:41 UTC (rev 145)
@@ -2,11 +2,14 @@
 release checklist for CHNOSZ
 ****************************
 
-- run examples() to make sure that all examples can be run
+- uncomment CSL line in Rmd files (commented for building on R-Forge)
+  csl: elementa.csl
+
+- compile vignettes with release version number and date.
+
+- run examples() and demos() to make sure they can be run
   (that includes \donttest ones that aren't run by R CMD check)
 
-- run demos() to run all demos
-
 - check output of demo("sources") to make sure all data sources are cited
 
 - check output of R CMD Rd2pdf: fix lines truncated by page margins
@@ -24,8 +27,6 @@
 
 - update list of documentation topics in examples() with any new ones
 
-- run test_package("CHNOSZ") to run all tests
-
 - recreate extdata/thermo/obigt_check.csv after all data updates:
   co <- check.obigt()
   write.csv(co, "obigt_check.csv", row.names=FALSE, na="")

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/inst/NEWS	2017-02-12 09:43:41 UTC (rev 145)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.0.8-34 (2017-02-12)
+CHANGES IN CHNOSZ 1.0.8-35 (2017-02-12)
 ---------------------------------------
 
 DOCUMENTATION:

Modified: pkg/CHNOSZ/man/objective.Rd
===================================================================
--- pkg/CHNOSZ/man/objective.Rd	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/man/objective.Rd	2017-02-12 09:43:41 UTC (rev 145)
@@ -71,7 +71,7 @@
 
 \code{DGtr} calculates the change in Gibbs energy/2.303RT of a system in which species with initial logarithms of activitiy (\code{loga1}) are transformed to the same species with different final logarithms of activity (\code{loga2}) at constant temperature, pressure and chemical potentials of basis species.
 It is calculated as the sum over species of (G2-G1) where G1/RT = -a1*Astar + a1*loga1 - a1 + a constant (where a1 is 10^loga1), likewise for G2, and where \code{Astar} is the starved affinity, that is the affinity of the reaction to form one mole of the species at unit activity from the basis species in their defined activities.
-The equation used arises from integrating dG = -A/dxi = -A/dn where xi is the reaction progress variable, dn/dxi = 1 is the reaction coefficient on the species, and A = Astar - 2.303RTloga is the chemical affinity. 
+The equation used arises from integrating dG = -A/dxi = -A/dn where xi is the reaction progress variable, dn/dxi = 1 is the reaction coefficient on the species, and A = Astar - 2.303RTloga is the chemical affinity (Dick and Shock, 2013).
 
 Each objective function has an attribute (see \code{\link{attributes}} and \code{\link{structure}}) named \samp{optimum} that takes the value of \samp{minimal} (\code{SD}, \code{CV}, \code{RMSD}, \code{CVRMSD}, \code{DGmix}, \code{DDGmix}, \code{DGtr}) or \samp{maximal} (\code{logact}, \code{shannon}, \code{qqr}, \code{spearman}, \code{pearson}).
 This attribute is used in \code{\link{optimal.index}} and \code{\link{extremes}} to identify the conditions having optimal values of the objective functions.
@@ -139,9 +139,10 @@
 
   Anderson, G. M. (2005) \emph{Thermodynamics of Natural Systems}, 2nd ed., Cambridge University Press, 648 p. \url{http://www.worldcat.org/oclc/474880901}
 
+  Dick, J. M. and Shock, E. L. (2013) A metastable equilibrium model for the relative abundance of microbial phyla in a hot spring. \emph{PLoS ONE} \bold{8}, e72395. \url{http://dx.doi.org/10.1371/journal.pone.0072395}
+
   Ludovisi, A. and Taticchi, M. I. (2006) Investigating beta diversity by Kullback-Leibler information measures. \emph{Ecological Modelling} \bold{192}, 299--313. \url{http://dx.doi.org/10.1016/j.ecolmodel.2005.05.022}
 
-
 }
 
 \concept{Secondary thermodynamic modeling}

Modified: pkg/CHNOSZ/man/revisit.Rd
===================================================================
--- pkg/CHNOSZ/man/revisit.Rd	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/man/revisit.Rd	2017-02-12 09:43:41 UTC (rev 145)
@@ -14,7 +14,7 @@
     ispecies = NULL, col = par("fg"), yline = 2, ylim = NULL,
     cex = par("cex"), lwd = par("lwd"), mar = NULL, side = 1:4,
     xlim = NULL, labcex = 0.6, pch = 1, main = NULL, plot.it = NULL,
-    add = FALSE, plot.optval = TRUE, style.2D = "contour")
+    add = FALSE, plot.optval = TRUE, style.2D = "contour", bg = par("bg"))
   optimal.index(z, objective)
   extremes(z, objective)
 }
@@ -41,6 +41,7 @@
   \item{plot.optval}{logical, show the location of the optimal value(s)?}
   \item{style.2D}{character, type of 2-D plot}
   \item{z}{numeric, matrix of values (\samp{H} element of output from \code{revisit})}
+  \item{bg}{character, background for \code{\link{points}}}
 }
 
 \details{

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-12 09:43:41 UTC (rev 145)
@@ -44,12 +44,17 @@
 knit_hooks$set(smallish.mar = function(before, options, envir) {
     if (before) par(mar = c(4.2, 4.2, 0.9, 0.9))  # smallish margins on top and right
 })
-# dpi setting
-dpi <- 72
 # use pngquant to optimize PNG images
 knit_hooks$set(pngquant = hook_pngquant)
 # pngquant isn't available on R-Forge ...
-if (!nzchar(Sys.which("pngquant"))) pngquant <- NULL else pngquant <- "--speed=1 --quality=0-50"
+if (!nzchar(Sys.which("pngquant"))) {
+  pngquant <- NULL 
+  # save space by using a lower resolution
+  dpi <- 50
+} else {
+  pngquant <- "--speed=1 --quality=0-50"
+  dpi <- 72
+}
 # some frequently used HTML expressions
 logfO2 <- "log<i>f</i><sub>O<sub>2</sub></sub>"
 # use lowercase here because these tend to be variable names in the examples
@@ -152,7 +157,7 @@
 
 Some experimental functions are available:
 
-* (**experimental**) using <span style="color:green">`revisit()`</span> to calculate/plot summary statistics of the chemical activities of the species of interest and <span style="color:green">`findit()`</span> to search for combinations of activities of basis species, temperature and/or pressure that optimize those statistics.
+* (**experimental**) using <span style="color:green">`revisit()`</span> to calculate/plot summary statistics of the chemical activities of the species of interest and <span style="color:red">`findit()`</span> to search for combinations of activities of basis species, temperature and/or pressure that optimize those statistics.
 
 # Thermodynamic database and chemical formulas
 
@@ -235,7 +240,7 @@
 
 To calculate the standard molal properties of species and reactions, use <span style="color:green">`subcrt()`</span>.
 ```{marginfigure}
-The inspiration for the name <span style="color:green">`subcrt()`</span>, and the source of the FORTRAN subroutine used to calculate the thermodynamic properties of `r h2o`, is SUPCRT (Johnson et al., 1992).
+The inspiration for the name <span style="color:green">`subcrt()`</span>, and the source of the FORTRAN subroutine used to calculate the thermodynamic properties of H<sub>2</sub>O, is SUPCRT (Johnson et al., 1992).
 ```
 <sup>[- at JOH92]</sup>
 If no reaction coefficients are given, <span style="color:green">`subcrt()`</span> calculates the standard molal properties of invididual species:
@@ -472,10 +477,6 @@
 ```{r data_thermo, message=FALSE}
 ```
 
-## Activity coefficients
-
-XXX Example using <span style="color:green">`nonideal()`</span> ...  Alberty, 2003, Fig. 1.4-1.5 (p. 9-10, 235-236)
-
 # <span style="color:green">`affinity()`</span>, species of interest, and potential diagrams
 
 <span style="color:green">`affinity()`</span> offers calculations of chemical affinity of formation reactions over a configurable range of T, P, and activities of basis species.
@@ -497,7 +498,7 @@
 
 Now, we can use <span style="color:green">`affinity()`</span> to calculate the affinities of the formation reactions of each of the species:
 ```{marginfigure}
-The values returned by <span style="color:green">`affinity()`</span> are dimensionless, i.e. log<sub>10</sub>(*A*/*RT*).
+The values returned by <span style="color:green">`affinity()`</span> are dimensionless, i.e. *A*/2.303*RT*.
 ```
 ```{r affinity}
 affinity()$values
@@ -606,7 +607,7 @@
 ```{r mosaicfun, fig.fullwidth=TRUE, fig.width=9, fig.height=3, small.mar=TRUE, dpi=dpi, out.width="85%", message=FALSE, results="hide", cache=TRUE, fig.cap="Different projections (defined by the basis species) of the same thermodynamic system. The choice between them depends on convenience rather than correctness.", pngquant=pngquant}
 mosaicfun <- function(newvar, T = 200, res = 300) {
   swap.basis("e-", names(newvar))
-  if(names(newvar) == "O2") basis("O2", "gas")
+  if (names(newvar) == "O2") basis("O2", "gas")
   mosaicargs <- c(list(bases), blend=TRUE, pH=list(c(-2, 12, res)), newvar, T=T)
   m1 <- do.call(mosaic, mosaicargs)
   diagram(m1$A.species, lwd = 2, fill = rev(topo.colors(10)))
@@ -669,7 +670,7 @@
 abline(h = 0, lty = 2, lwd = 2)
 ```
 Finally, we use <span style="color:green">`diagram()`</span> to plot the results.
-Although only temperature is shown on the *x*-axis, pH and the activities of CO<sub>2</sub>, H<sub>2</sub>, NH<sub>4</sub><sup>+</sup>, and H<sub>2</sub>S are also varied according to the data in `rb`.
+Although only temperature is shown on the *x* axis, pH and the activities of CO<sub>2</sub>, H<sub>2</sub>, NH<sub>4</sub><sup>+</sup>, and H<sub>2</sub>S are also varied according to the data in `rb`.
 By default, <span style="color:green">`diagram()`</span> attempts to scale the affinities by dividing by the reaction coefficients of a shared basis species (in this case, CO<sub>2</sub>).
 To override that behavior, we set `balance = 1` to plot the affinities of the formation reactions as written (per mole of the species being formed).
 Also, `legend.x=NA` is used to suppress making a legend (so the labels are placed next to the lines instead).
@@ -1196,10 +1197,10 @@
 The experimental relative abundances are plotted as thin horizontal lines with the same style and color as the thicker curved lines calculated for metastable equilibrium.
 With the exception of YNL049C, the overlap between the calculations and experiments looks to be greatest near the middle-left part of the figure.
 The <span style="color:green">`revisit()`</span> function offers some statistical and thermodynamic measures that can quantify this comparison.
-Here, we plot the free energy associated with the information-theoretic "relative entropy" or Kullback-Leibler divergence:
+Here, we plot the information-theoretic free energy Δ*G<sub>inf</sub>* (relative entropy or Kullback-Leibler divergence):
 ```{r yeastplot, eval=FALSE, echo=10}
 ```
-```{r yeastplot, fig.fullwidth=TRUE, fig.width=7.5, fig.height=2.5, small.mar=TRUE, dpi=100, out.width="85%", echo=FALSE, message=FALSE, results="hide", cache=TRUE, fig.cap="ER-to-Golgi proteins: calculations without and with normalization, and free energy difference between experimental and normalization-calculated abundances.", pngquant=pngquant}
+```{r yeastplot, fig.fullwidth=TRUE, fig.width=7.5, fig.height=2.5, small.mar=TRUE, dpi=ifelse(dpi==50, 50, 100), out.width="85%", echo=FALSE, message=FALSE, results="hide", cache=TRUE, fig.cap="ER-to-Golgi proteins: calculations without and with normalization, and free energy difference between experimental and normalization-calculated abundances.", pngquant=pngquant}
 ```
 
 The minimum free energy difference occurs near `r logfO2` = -78.
@@ -1227,7 +1228,7 @@
 aa_Ef <- read.fasta(system.file("extdata/fasta/EF-Tu.aln",
                                 package = "CHNOSZ"), iseq = 1:2)
 ```
-<span style="color:green">`seq2aa()`</span> counts the amino acids in a user-supplied sequence and generates a data frame of the amino acid compoisition:
+<span style="color:green">`seq2aa()`</span> counts the amino acids in a user-supplied sequence and generates a data frame of the amino acid composition:
 ```{marginfigure}
 See also <span style="color:blue">`?count.aa`</span>, which can process both protein and nucleic acid sequences.
 ```
@@ -1357,6 +1358,133 @@
 ```{r bison_transferase, eval=FALSE, echo=12:15}
 ```
 
+# Experimental features
+
+## Activity coefficients
+
+XXX Example using <span style="color:green">`nonideal()`</span> ...  Alberty, 2003, Fig. 1.4-1.5 (p. 9-10, 235-236)
+
+## Optimization of chemical activities
+
+What are the conditions that minimize the standard deviation of calculated activities of species?
+What are the conditions that minimize the energetic difference between measured and calculated abundances?
+These are questions about optimization of chemical activities.
+<span style="color:green">`revisit()`</span> provides calculations and plots of an objective function in 1 or 2 dimensions of activity of basis species, *T*, or *P*.
+```{marginfigure}
+See <span style="color:blue">`?objective`</span> for more information.
+```
+<span style="color:red">`findit()`</span> provides a calculation of an objective function on a higher-dimensional grid.
+
+Amino acids may be produced by high-temperature processes in hydrothermal vents (smokers).
+To what extent do their relative abundances tend toward metastable equilibrium?
+Let's suppose that the major variables are oxygen fugacity and activity of water (`r h2o`) and ammonia (NH<sub>3</sub>).
+Here are the variables and the (very large) range of logarithms of chemical activity of the basis species that we will explore.
+```{r smoker_vars}
+vars <- list(O2 = c(-50, -25), NH3 = c(-15, 10), H2O = c(-15, 10))
+```
+
+Consider the amino acid abundances reported by @FMM_14.
+We identify the amino acids here with their one-letter abbreviations.
+Then, <span style="color:green">`aminoacids()`</span> is used to produce the full names, which in turn are used to search `thermo$obigt` for their formulas.
+<span style="color:green">`makeup()`</span> is used to count the elements in the formulas.
+```{r smoker_aa, message=FALSE}
+aa <- c("D", "T", "S", "E", "G", "A", "K", "H")
+AA <- aminoacids("", aa)
+AA.formula <- do.call(rbind, makeup(info(AA)))
+AA
+```
+
+Let's use the reported concentrations (µM) of amino acids in sample D1441 CW-2.
+```{marginfigure}
+Three amino acids (Val, Phe, Arg) with concentrations below the detection limit of 0.01 µM are not included here.
+```
+The concentrations are converted to logarithmic values (`loga2`).
+We then calculate the total C concentration in the measurements; this uses the amino acid formulas calculated above.
+```{r smoker_uM}
+uM <- c(1.10, 0.70, 3.73, 0.39, 3.04, 1.83, 0.27, 0.21)
+loga2 <- log10(uM * 1e-6)
+nC <- AA.formula[, "C"]
+Ctot <- sum(nC * uM * 1e-6)
+```
+
+Next, set the basis, load the species, and assign the temperature and resolution to be used in <span style="color:green">`affinity()`</span>.
+Also, we assign the objective function to be the root mean square deviation (RMSD) between experimental and calculated logarithms.
+```{r smoker_basis, results="hide"}
+basis("CHNOS")
+species(AA)
+T <- 270
+res <- 80
+objective <- "RMSD"
+```
+
+Then, we set up the figure, calculate chemical affinities in one dimension, and run <span style="color:green">`equilibrate()`</span> with the given total carbon activity.
+<span style="color:green">`revisit()`</span> is used to plot RMSD and to identify the `r logfO2` where it is minimized:
+```{r smoker_plot, eval=FALSE, echo=1:6}
+layout(matrix(1:6, nrow = 2))
+a <- affinity(O2 = c(vars[[1]], res), T = T)
+e <- equilibrate(a, loga.balance = log10(Ctot))
+r <- revisit(e, objective, loga2)
+d.basis <- describe.basis(ibasis = 1:4)
+legend("topleft", d.basis)
+ourfun <- function(ibasis = 1:5, x="bottomright") {
+  a <- affinity(T = T)
+  e <- equilibrate(a, loga.balance = log10(Ctot))
+  revisit(e, objective, loga2, cex = 2.7, pch = 21)
+  text(loga2, unlist(e$loga.equil), aa)
+  d.basis <- describe.basis(ibasis = ibasis)
+  legend(x, d.basis)
+}
+basis("O2", r$xopt)
+ourfun(5)
+a <- affinity(O2 = c(vars[[1]], res), NH3 = c(vars[[2]], res), T = T)
+e <- equilibrate(a, loga.balance = log10(Ctot))
+r <- revisit(e, objective, loga2)
+basis("O2", r$xopt)
+basis("NH3", r$yopt)
+ourfun(c(3, 5))
+findit(vars[1:3], objective, loga2=loga2, loga.balance=log10(Ctot),
+       T=T, res=10, niter=8, rat=0.8)
+ourfun(c(2, 3, 5), "right")
+```
+
+Now, we write a function that calculates the affinities and metasable equilibrium activities at a single condition and uses <span style="color:green">`revisit()`</span> to make a scatter plot.
+The plot includes a 1:1 line (grey) and a trend line calculated using R's `loess.smooth()` (red), and the minimum value of RMSD in the title.
+The function also adds a legend summarizing the optimal conditions.
+```{r smoker_plot, eval=FALSE, echo=7:14}
+```
+
+We set activity of `r logfO2` in the basis (where RMSD was minimized the first call to <span style="color:green">`revisit()`</span>), then use our function to make a scatter plot.
+```{r smoker_plot, eval=FALSE, echo=15:16}
+```
+
+In the next pair of plots, let's use two variables: `r logfO2` and log*a*<sub>NH<sub>3</sub></sub>.
+This time, <span style="color:green">`revisit()`</span> makes a contour diagram of the objective function, highlighting the location of the minimum (star) as well as the minimum trough following the *x* or *y* axis (dotted blue and green lines).
+After that, we set the activities of both of the basis species and then use `ourfun()` to make a scatter plot.
+The legend here shows the two variables that have been optimized; these give RMSD that is lower than in the first calculation:
+```{r smoker_plot, eval=FALSE, echo=17:22}
+```
+
+Finally, we use <span style="color:red">`findit()`</span> to optimize three variables.
+The affinity and equilibrium calculations are integrated into <span style="color:red">`findit()`</span>, so the function call requires the `loga.balance` and `T` arguments.
+Other arguments indicate the grid resolution, factor (ratio) by which variable ranges are reduced in each step, and the number of iterations.
+The result is a series of 2D contour plots of decreasing size, each one constructed for the optimal value of log*a*<sub>`r h2o`</sub> from the previous step.
+```{marginfigure}
+More dimensions require a lower resolution due to limited computational resources.
+```
+While running, <span style="color:red">`findit()`</span> sets the activities of basis species, so we can immediately call `ourfun()` to make make a scatter plot.
+```{marginfigure}
+The text for <span style="color:red">`findit()`</span> is red because it has side effects (changing the activities of basis species).
+```
+```{r smoker_plot, eval=FALSE, echo=23:25}
+```
+
+```{r smoker_plot, fig.fullwidth=TRUE, fig.width=9, fig.height=5, small.mar=TRUE, dpi=100, out.width="85%", echo=FALSE, message=FALSE, results="hide", cache=TRUE, fig.cap="Optimization of a thermodynamic model for relative abundances of amino acids in a 270 °C black smoker fluid using 1, 2, or 3 variables (left to right).", pngquant=pngquant}
+```
+
+The calculation using <span style="color:red">`findit()`</span>, in which the added variable log*a*<sub>`r h2o`</sub> optimizes to ca. -2.2, shows that measured concentrations of 6 amino acids fall within 1--2 log units of the relative abundances in metastable equilibrium.
+According these calculations, two amino acids, serine and threonine, are very far from metastable equilibrium with the others.
+There is a danger of using too many, or unrepresentative variables, but in some systems, calculations of this type may provide insight on processes affecting reactions of organic compounds.
+
 # Thermodynamic data options
 
 ## Source of data: <span style="color:green">`browse.refs()`</span>
@@ -1375,6 +1503,8 @@
 
 ## Tests; errors
 
+e.g. "variables define a transect but their lengths are not all equal"
+
 ## Messages from functions
 
 (Look at messages in calculation above: groups, ER-to-Golgi?)
@@ -1388,7 +1518,7 @@
 # Document history
 
 * 2010-09-30 Initial version.
-* 2011-08-15 Add browse.refs(); modifying database hint changed to help(thermo).
+* 2011-08-15 Add <span style="color:green">`browse.refs()`</span>; modifying database hint changed to <span style="color:blue">`help(thermo)`</span>.
 * 2012-06-16 Add “More activity diagrams”.
 * 2015-05-14 Add warning about internal consistency of thermodynamic data.
 * 2017-02-15 Complete rewrite; switch from Sweave to knitr (Tufte style).

Modified: pkg/CHNOSZ/vignettes/eos-regress.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/eos-regress.Rmd	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/vignettes/eos-regress.Rmd	2017-02-12 09:43:41 UTC (rev 145)
@@ -29,7 +29,14 @@
 # use pngquant to optimize PNG images
 knitr::knit_hooks$set(pngquant = knitr::hook_pngquant)
 # pngquant isn't available on R-Forge ...
-if (!nzchar(Sys.which("pngquant"))) pngquant <- NULL else pngquant <- "--speed=1 --quality=0-50"
+if (!nzchar(Sys.which("pngquant"))) {
+  pngquant <- NULL 
+  # save space by using a lower resolution
+  dpi <- 50
+} else {
+  pngquant <- "--speed=1 --quality=0-50"
+  dpi <- 72
+}
 ```
 
 
@@ -85,7 +92,7 @@
 Cpdat$P <- convert(Cpdat$P, "bar")
 ```
 
-```{r EOSregress_hide, fig.margin = TRUE, fig.cap = "Heat Capacity of Aqueous Methane", fig.width=3.5, fig.height=3.5, cache=TRUE, results="hide", message=FALSE, echo=FALSE, dpi=72, out.width=672, out.height=336, pngquant=pngquant}
+```{r EOSregress_hide, fig.margin = TRUE, fig.cap = "Heat Capacity of Aqueous Methane", fig.width=3.5, fig.height=3.5, cache=TRUE, results="hide", message=FALSE, echo=FALSE, dpi=dpi, out.width=672, out.height=336, pngquant=pngquant}
 var <- c("invTTheta2", "TXBorn")
 Cplm <- EOSregress(Cpdat, var, T.max=600)
 EOSplot(Cpdat, coefficients=round(Cplm$coefficients, 1))
@@ -166,7 +173,7 @@
 Vcoeffs_database <- EOScoeffs("CH4", "V")
 ```
 
-```{r Vplot_hide, fig.margin=TRUE, results="hide", message=FALSE, echo=FALSE, fig.width=3.5, fig.height=7, fig.cap="Volume of Aqueous Methane", dpi=72, out.width=672, out.height=672, pngquant=pngquant}
+```{r Vplot_hide, fig.margin=TRUE, results="hide", message=FALSE, echo=FALSE, fig.width=3.5, fig.height=7, fig.cap="Volume of Aqueous Methane", dpi=dpi, out.width=672, out.height=672, pngquant=pngquant}
 par(mfrow=c(2, 1))
 # plot 1
 EOSplot(Vdat, coefficients=Vcoeffs)
@@ -212,7 +219,7 @@
 As noted above, $\omega$ for electrolytes is not a constant.
 What happens if we apply the linear model anyway, knowing it's not applicable (especially at high temperature)?
 
-```{r Nalm, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Heat Capacity of Na$^+$ (Inapplicable: Constant $ω$)", dpi=72, out.width=672, out.height=336, pngquant=pngquant}
+```{r Nalm, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Heat Capacity of Na$^+$ (Inapplicable: Constant $ω$)", dpi=dpi, out.width=672, out.height=336, pngquant=pngquant}
 var <- c("invTTheta2", "TXBorn")
 Nalm <- EOSregress(Nadat, var, T.max=600)
 EOSplot(Nadat, coefficients=Nalm$coefficients, fun.legend=NULL)
@@ -236,7 +243,7 @@
 Then, we can use an iterative procedure that refines successive guesses of $\omega_{P_r,T_r}$.
 The convergence criterion is measured by the difference in sequential regressed values of $\omega$.
 
-```{r Nawhile, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Heat Capacity of Na$^+$ (Variable $ω$)", dpi=72, out.width=672, out.height=336, pngquant=pngquant}
+```{r Nawhile, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Heat Capacity of Na$^+$ (Variable $ω$)", dpi=dpi, out.width=672, out.height=336, pngquant=pngquant}
 diff.omega <- 999
 while(abs(diff.omega) > 1) {
   Nalm1 <- EOSregress(Nadat, var1, omega.PrTr=tail(omega.guess, 1), Z=1)
@@ -258,7 +265,7 @@
 Compared to $C_P^\circ$, the regression of $V^\circ$ is very finicky.
 Given a starting guess of $\omega_{P_r,T_r}$ of 1400000 cm$^3$∙bar/mol, the iteration converges on 1394890 instead of the "true" database value of 1383230 (represented by dashed line in the plot).
 
-```{r NaVolume_hide, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Volume of Na$^+$ (Variable $ω$)", results="hide", message=FALSE, echo=FALSE, dpi=72, out.width=672, out.height=336, pngquant=pngquant}
+```{r NaVolume_hide, fig.margin=TRUE, fig.width=3.5, fig.height=3.5, fig.cap="Volume of Na$^+$ (Variable $ω$)", results="hide", message=FALSE, echo=FALSE, dpi=dpi, out.width=672, out.height=336, pngquant=pngquant}
 T <- convert(seq(0, 600, 25), "K")
 P <- 1000
 prop.PT <- subcrt("Na+", T=T, P=P, grid="T", convert=FALSE)$out[[1]]

Modified: pkg/CHNOSZ/vignettes/vig.bib
===================================================================
--- pkg/CHNOSZ/vignettes/vig.bib	2017-02-12 02:14:49 UTC (rev 144)
+++ pkg/CHNOSZ/vignettes/vig.bib	2017-02-12 09:43:41 UTC (rev 145)
@@ -445,6 +445,17 @@
   doi       = {10.1038/ncomms2998},
 }
 
+ at Article{FMM_14,
+  author    = {Fuchida, Shigeshi and Mizuno, Yuki and Masuda, Harue and Toki, Tomohiro and Makita, Hiroko},
+  journal   = {Organic Geochemistry},
+  title     = {Concentrations and distributions of amino acids in black and white smoker fluids at temperatures over 200 °C},
+  year      = {2014},
+  volume    = {66},
+  pages     = {98--106},
+  doi       = {10.1016/j.orggeochem.2013.11.008},
+  issn      = {0146-6380},
+}
+
 @Article{MD98,
   author    = {Merino, Enrique and Dewers, Thomas},
   journal   = {Journal of Hydrology},