[CHNOSZ-commits] r141 - in pkg/CHNOSZ: . R demo inst man tests/testthat vignettes

Sat Feb 11 05:34:20 CET 2017

Author: jedick
Date: 2017-02-11 05:34:19 +0100 (Sat, 11 Feb 2017)
New Revision: 141

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/R/basis.R
   pkg/CHNOSZ/R/iprotein.R
   pkg/CHNOSZ/R/util.expression.R
   pkg/CHNOSZ/R/util.fasta.R
   pkg/CHNOSZ/demo/bugstab.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/man/iprotein.Rd
   pkg/CHNOSZ/man/util.fasta.Rd
   pkg/CHNOSZ/tests/testthat/test-iprotein.R
   pkg/CHNOSZ/vignettes/anintro.Rmd
   pkg/CHNOSZ/vignettes/hotspring.Rnw
   pkg/CHNOSZ/vignettes/hotspring.lyx
Log:
anintro.Rmd: add proteins: adding proteins


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/DESCRIPTION	2017-02-11 04:34:19 UTC (rev 141)
@@ -1,6 +1,6 @@
-Date: 2017-02-10
+Date: 2017-02-11
 Package: CHNOSZ
-Version: 1.0.8-30
+Version: 1.0.8-31
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/R/basis.R
===================================================================
--- pkg/CHNOSZ/R/basis.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/R/basis.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -94,7 +94,7 @@
 # to load a preset basis definition by keyword
 preset.basis <- function(key=NULL) {
   # the available keywords
-  basis.key <- c("CHNOS", "CHNOS+", "CHNOSe", "CHNOPS+", "MgCHNOPS+", "FeCHNOS", "FeCHNOS+", "CEQ")
+  basis.key <- c("CHNOS", "CHNOS+", "CHNOSe", "CHNOPS+", "MgCHNOPS+", "FeCHNOS", "FeCHNOS+", "QEC")
   # just list the keywords if none is specified
   if(is.null(key)) return(basis.key)
   # delete any previous basis definition
@@ -109,7 +109,7 @@
   else if(ibase==5) species <- c("Mg+2", "CO2", "H2O", "NH3", "H3PO4", "H2S", "e-", "H+")
   else if(ibase==6) species <- c("Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen")
   else if(ibase==7) species <- c("Fe2O3", "CO2", "H2O", "NH3", "H2S", "oxygen", "H+")
-  else if(ibase==8) species <- c("cysteine", "glutamic acid", "glutamine", "H2O", "oxygen")
+  else if(ibase==8) species <- c("glutamine", "glutamic acid", "cysteine", "H2O", "oxygen")
   # get the preset logact
   logact <- preset.logact(species)
   # load the species and return the result

Modified: pkg/CHNOSZ/R/iprotein.R
===================================================================
--- pkg/CHNOSZ/R/iprotein.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/R/iprotein.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -104,6 +104,8 @@
 }
 
 seq2aa <- function(protein, sequence) {
+  # remove newlines and whitespace
+  sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
   # make a data frame from counting the amino acids in the sequence
   caa <- count.aa(sequence)
   colnames(caa) <- aminoacids(3)
@@ -112,7 +114,7 @@
   ip <- suppressMessages(iprotein(protein))
   # now make the data frame
   po <- strsplit(protein, "_")[[1]]
-  aa <- data.frame(protein=po[1], organism=po[2], ref=NA, abbrv=NA)
+  aa <- data.frame(protein=po[1], organism=po[2], ref=NA, abbrv=NA, stringsAsFactors=FALSE)
   aa <- cbind(aa, chains=1, caa)
   return(aa)
 }
@@ -149,10 +151,10 @@
   return(out)
 }
 
-read.aa <- function(file="protein.csv") {
+read.aa <- function(file="protein.csv", ...) {
   # 20090428 added colClasses here
   # 20140128 added as.is=TRUE (in case numeric values are stored in ref or abbrv column)
-  aa <- read.csv(file, colClasses=c(rep("character", 2), NA, NA, rep("numeric", 21)), as.is=TRUE)
+  aa <- read.csv(file, colClasses=c(rep("character", 2), NA, NA, rep("numeric", 21)), as.is=TRUE, ...)
   if(!identical(colnames(aa), colnames(get("thermo")$protein)))
     stop(paste("format of", file, "is incompatible with thermo$protein"))
   return(aa)

Modified: pkg/CHNOSZ/R/util.expression.R
===================================================================
--- pkg/CHNOSZ/R/util.expression.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/R/util.expression.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -74,7 +74,7 @@
   if(property=="pH") return("pH")
   if(property=="pe") return("pe")
   if(property=="IS") return("IS")
-  if(property=="ZC") return(quote(bar(italic(Z))[C]))
+  if(property=="ZC") return(quote(italic(Z)[C]))
   # process each character in the property abbreviation
   prevchar <- character()
   for(i in 1:length(propchar)) {

Modified: pkg/CHNOSZ/R/util.fasta.R
===================================================================
--- pkg/CHNOSZ/R/util.fasta.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/R/util.fasta.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -44,12 +44,12 @@
   return(as.numeric(out))
 }
 
-read.fasta <- function(file, i=NULL, ret="count", lines=NULL, ihead=NULL,
+read.fasta <- function(file, iseq=NULL, ret="count", lines=NULL, ihead=NULL,
   start=NULL, stop=NULL, type="protein", id=NULL) {
   # read sequences from a fasta file
   # some of the following code was adapted from 
   # read.fasta in package seqinR
-  # value of 'i' is what sequences to read 
+  # value of 'iseq' is what sequences to read (default is all)
   # value of 'ret' determines format of return value:
   #   count: amino acid composition (same columns as thermo$protein, can be used by add.protein)
   #        or nucleic acid base composition (A-C-G-T)
@@ -80,28 +80,20 @@
     linefun <- function(i1,i2) lines[i1:i2]
   }
   # identify the lines that begin and end each sequence
-  if(is.null(i)) {
-    i <- ihead
-    begin <- i + 1
-    end <- i - 1
-    end <- c(end[-1], nlines)
-  } else {
-    begin <- i + 1
-    iend <- match(i,ihead)
-    # we have to be careful about the last record
-    iend[iend==ihead[length(ihead)]] <- NA
-    end <- ihead[iend+1] - 1
-    end[is.na(end)] <- nlines
-  } 
+  begin <- ihead + 1
+  end <- ihead - 1
+  end <- c(end[-1], nlines)
+  # use all or selected sequences
+  if(is.null(iseq)) iseq <- seq_along(begin)
   # just return the lines from the file
   if(ret=="fas") {
     iline <- numeric()
-    for(i in 1:length(begin)) iline <- c(iline,(begin[i]-1):end[i])
+    for(i in iseq) iline <- c(iline,(begin[i]-1):end[i])
     return(lines[iline])
   }
   # get each sequence from the begin to end lines
   seqfun <- function(i) paste(linefun(begin[i],end[i]),collapse="")
-  sequences <- lapply(1:length(i), seqfun)
+  sequences <- lapply(iseq, seqfun)
   # organism name is from file name
   # (basename minus extension)
   bnf <- strsplit(basename(file),split=".",fixed=TRUE)[[1]][1]
@@ -109,9 +101,9 @@
   # protein/gene name is from header line for entry
   # (strip the ">" and go to the first space)
   missid <- missing(id)
-  if(is.null(id)) id <- as.character(palply("", 1:length(i), function(j) {
+  if(is.null(id)) id <- as.character(palply("", iseq, function(j) {
     # get the text of the line
-    f1 <- linefun(i[j],i[j])
+    f1 <- linefun(ihead[j],ihead[j])
     # stop if the first character is not ">"
     # or the first two charaters are "> "
     if(substr(f1,1,1)!=">" | length(grep("^> ",f1)>0))

Modified: pkg/CHNOSZ/demo/bugstab.R
===================================================================
--- pkg/CHNOSZ/demo/bugstab.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/demo/bugstab.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -4,8 +4,8 @@
 
 # resolution for plots
 res <- 500
-# basis can be "AA" or "CHNOS"
-basis <- "AA"
+# basis can be "QEC" or "CHNOS"
+basis <- "QEC"
 layout(cbind(matrix(sapply(list(c(1, 2), c(3, 4)), function(x) rep(rep(x, each=3), 3)), nrow=6, byrow=TRUE),
              matrix(rep(c(0, 5, 5, 5, 5, 0), each=4), nrow=6, byrow=TRUE)))
 par(mar=c(3.3, 3.3, 1.5, 1.5), mgp=c(2.1, 0.7, 0), xaxs="i", yaxs="i", las=1, cex=0.9)

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/inst/NEWS	2017-02-11 04:34:19 UTC (rev 141)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.0.8-30 (2017-02-10)
+CHANGES IN CHNOSZ 1.0.8-31 (2017-02-11)
 ---------------------------------------
 
 DOCUMENTATION:
@@ -25,8 +25,8 @@
 - For the `groups` argument in diagram(), activities are multiplied
   by the balance coefficients before the summation.
 
-- Add "CEQ" as a keyword for preset species in basis() (cysteine,
-  glutamic acid, glutamine, H2O, oxygen).
+- Add "QEC" as a keyword for preset species in basis() (glutamine,
+  glutamic acid, cysteine, H2O, oxygen).
 
 - More flexible parsing of chemical formulas for ZC() and other
   functions; e.g. `ZC(colMeans(protein.formula(1:4)))` now works.
@@ -49,6 +49,15 @@
   marks and lines; this is used in diagram() to redraw the axes on
   filled diagrams.
 
+- Add `...` argument to read.aaa() (additional arguments for
+  read.csv()).
+
+- seq2aa() removes newlines and whitespace before counting the
+  letters in the sequence.
+
+- read.fasta(): change argument `i` to `iseq`; this is used to select
+  particular sequences to read from the file.
+
 CLEANUP AND BUG FIXES:
 
 - subcrt() returns `loggam` using the common logarithm; add

Modified: pkg/CHNOSZ/man/iprotein.Rd
===================================================================
--- pkg/CHNOSZ/man/iprotein.Rd	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/man/iprotein.Rd	2017-02-11 04:34:19 UTC (rev 141)
@@ -17,7 +17,7 @@
   aa2eos(aa, state=get("thermo")$opt$state)
   seq2aa(protein, sequence)
   aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
-  read.aa(file = "protein.csv")
+  read.aa(file = "protein.csv", ...)
   add.protein(aa)
 }
 
@@ -31,6 +31,7 @@
   \item{abundance}{numeric, abundances of proteins}
   \item{average}{logical, return the weighted average of amino acid counts?}
   \item{file}{character, path to file with amino acid compositions}
+  \item{...}{additional arguments passed to \code{\link{read.csv}}}
 }
 
 \details{

Modified: pkg/CHNOSZ/man/util.fasta.Rd
===================================================================
--- pkg/CHNOSZ/man/util.fasta.Rd	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/man/util.fasta.Rd	2017-02-11 04:34:19 UTC (rev 141)
@@ -13,7 +13,7 @@
 \usage{
   grep.file(file, pattern = "", y = NULL, ignore.case = TRUE, 
     startswith = ">", lines = NULL, grep = "grep")
-  read.fasta(file, i = NULL, ret = "count", lines = NULL, 
+  read.fasta(file, iseq = NULL, ret = "count", lines = NULL, 
     ihead = NULL, start=NULL, stop=NULL, type="protein", id = NULL)
   count.aa(seq, start=NULL, stop=NULL, type="protein")
   uniprot.aa(protein, start=NULL, stop=NULL)
@@ -27,7 +27,7 @@
   \item{startswith}{character, only lines starting with this expression are matched}
   \item{lines}{list of character, supply the lines here instead of reading them from file}
   \item{grep}{character, name of system \samp{grep} command}
-  \item{i}{numeric, line numbers of sequence headers to read}
+  \item{iseq}{numeric, which sequences to read from the file}
   \item{ret}{character, specification for type of return (count, sequence, or FASTA format)}
   \item{ihead}{numeric, which lines are headers}
   \item{start}{numeric, position in sequence to start counting}
@@ -48,7 +48,7 @@
 If the lines from the file were obtained in a preceding operation, they can be supplied to this function in the \code{lines} argument.
 
 \code{read.fasta} is used to retrieve entries from a FASTA file.
-To read only selected sequences pass the line numbers of the header lines to the function in \code{i} (they can be identified using e.g. \code{grep.file}).
+Use \code{iseq} to select the sequences to read (the default is all sequences).
 The function returns various formats depending on the value of \code{ret}.
 The default \samp{count} returns a data frame of amino acid counts (the data frame can be given to \code{\link{add.protein}} in order to add the proteins to \code{\link{thermo}$protein}), \samp{seq} returns a list of sequences, and \samp{fas} returns a list of lines extracted from the FASTA file, including the headers (this can be used e.g. to generate a new FASTA file with only the selected sequences).
 Similarly to \code{grep.file}, this function utilizes the OS's \samp{grep} on supported operating systems in order to identify the header lines as well as \samp{cat} to read the file, otherwise \code{\link{readLines}} and \R's \code{\link{substr}} are used to read the file and locate the header lines.

Modified: pkg/CHNOSZ/tests/testthat/test-iprotein.R
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-iprotein.R	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/tests/testthat/test-iprotein.R	2017-02-11 04:34:19 UTC (rev 141)
@@ -39,9 +39,12 @@
   expect_equal(formula, lprop$formula)
 })
 
-test_that("amino acid counts taken from a fasta file can be added",{
+test_that("read.fasta() identifies sequences correctly and gives amino acid compositions in the correct format",{
   ffile <- system.file("extdata/fasta/EF-Tu.aln", package="CHNOSZ")
   aa <- read.fasta(ffile)
+  expect_equal(aa[1, ], read.fasta(ffile, 1))
+  # use unlist here so that different row names are not compared
+  expect_equal(unlist(aa[8, ]), unlist(read.fasta(ffile, 8)))
   expect_message(ip1 <- add.protein(aa), "added 8 new protein\\(s\\)")
   expect_message(ip2 <- add.protein(aa), "replaced 8 existing protein\\(s\\)")
   # add.protein should return the correct indices for existing proteins

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-10 15:39:22 UTC (rev 140)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2017-02-11 04:34:19 UTC (rev 141)
@@ -42,7 +42,7 @@
     if (before) par(mar = c(4.2, 4.2, 0.9, 0.9))  # smallish margins on top and right
 })
 # dpi setting
-dpi <- 100
+dpi <- 72
 # use pngquant to optimize PNG images
 knitr::knit_hooks$set(pngquant = knitr::hook_pngquant)
 pngquant <- "--speed=1 --quality=0-50"
@@ -472,7 +472,7 @@
 Here we use <span style="color:red">`basis()`</span> with a keyword to identify a preset basis definition.
 ```{marginfigure}
 Possible keywords are `CHNOS` (including CO<sub>2</sub>, H<sub>2</sub>O, NH<sub>3</sub>, H<sub>2</sub>S, and O<sub>2</sub>), `CHNOS+` (also including H<sup>+</sup>), `CHNOSe` (including H<sup>+</sup>, and *e*<sup>-</sup> instead of O<sub>2</sub>).
-See <span style="color:blue">?basis</span> for more options.
+See <span style="color:blue">`?basis`</span> for more options.
 ```
 ```{r basis_CHNOSZ, results="hide"}
 basis("CHNOS+")
@@ -492,12 +492,12 @@
 The same result (in energetic units) could be obtained using <span style="color:green">`subcrt()`</span>, but <span style="color:green">`affinity()`</span> has the advantage of being able to perform calculations on a grid of *T*, *P*, or activities of basis species.
 Let's choose a set of variables commonly used in aqueous speciation diagrams: Eh and pH.
 To use Eh as a variable, the electron (`e-`) should be in the basis.
-To get the electron in there, we could use a different keyword (<span style="color:red">`basis("CHNOSe")`</span>), or swap oxygen out of the existing basis:
+To put the electron in there, we could use a different keyword (<span style="color:red">`basis("CHNOSe")`</span>), or swap oxygen out of the existing basis:
 ```{r swap_basis}
 swap.basis("O2", "e-")
 ```
 
-The <span style="color:red">swap.basis()</span> changed the basis species and recalculated their activities, but preserved the species of interest.
+The <span style="color:red">`swap.basis()`</span> changed the basis species and recalculated their activities, but preserved the species of interest.
 ```{marginfigure}
 That is, running <span style="color:green">`affinity()`</span>`$values` again would give the same result.
 ```
@@ -609,7 +609,7 @@
 ## *T*, *P*, activity transects
 
 Above, we used evenly-spaced grids of *T*, *P*, and/or chemical activities of basis species; the ranges of variables were given by two or three values (minimum, maximum, and optionally resolution).
-`affinity()` can also perform calculations along a transect, i.e. a particular path along one or more variables.
+<span style="color:green">`affinity()`</span> can also perform calculations along a transect, i.e. a particular path along one or more variables.
 A transect is calculated when there are four or more values assigned to the variable(s).
 Let's use this feature to calculate affinities (negative Gibbs energies) of methanogenesis and biosynthetic reactions in a hydrothermal system.
 Some results of mixing calculations for seawater and vent fluid from the Rainbow hydrothermal field, reported by @SC10, are included in a data file in CHNOSZ:
@@ -634,14 +634,14 @@
 Now we can calculate affinity along the transect of changing temperature and activities of five basis species.
 Each variable is given as a named argument; the name for `NH4+` must be quoted.
 ```{marginfigure}
-A shorter expression would use `do.call()` to construct the argument list: `do.call(affinity, as.list(rb))`
+A shorter expression would use R's `do.call()` to construct the argument list: `do.call(<span style="color:green">affinity</span>, as.list(rb))`
 ```
 ```{marginfigure}
 The target of the conversion is `G`, or free energy, from `logK`.
 That conversion requires temperature in Kelvin, which is obtained by conversion from °C.
 We finish with a negation (affinity is negative Gibbs energy) and scaling from cal to kcal.
 ```
-Using the `convert()` function in CHNOSZ, we also convert the result from dimensionless values (*A*/2.303*RT*) to kcal/mol.
+Using <span style="color:green">`convert()`</span>, we also convert the result from dimensionless values (*A*/2.303*RT*) to kcal/mol.
 ```{r rainbow_affinity, message=FALSE}
 a <- affinity(T=rb$T, CO2=rb$CO2, H2=rb$H2,
               `NH4+`=rb$`NH4+`, H2S=rb$H2S, pH=rb$pH)
@@ -655,9 +655,9 @@
         col=rainbow(8), lwd=2, legend.x=NA, bg="slategray3")
 abline(h=0, lty=2, lwd=2)
 ```
-Finally, we use `diagram()` to plot the results.
+Finally, we use <span style="color:green">`diagram()`</span> to plot the results.
 Although only temperature is shown on the *x*-axis, pH and the activities of CO<sub>2</sub>, H<sub>2</sub>, NH<sub>4</sub><sup>+</sup>, and H<sub>2</sub>S are also varied according to the data in `rb`.
-By default, `diagram()` attempts to scale the affinities by dividing by the reaction coefficients of a shared basis species (in this case, CO<sub>2</sub>).
+By default, <span style="color:green">`diagram()`</span> attempts to scale the affinities by dividing by the reaction coefficients of a shared basis species (in this case, CO<sub>2</sub>).
 To override that behavior, we set `balance=1` to plot the affinities of the formation reactions as written (per mole of the species being formed).
 Also, `legend.x=NA` is used to suppress making a legend (so the labels are placed next to the lines instead).
 ```{r rainbow_diagram, eval=FALSE}
@@ -665,7 +665,7 @@
 
 ## Buffers
 
-There is one final feature of `affinity()` to notice.
+There is one other feature of <span style="color:green">`affinity()`</span> to mention here.
 Can we go the other direction: calculate the activities of basis species from the activities of the species of interest?
 This question relates to the concept of chemical activity buffers.
 In CHNOSZ there are two ways to perform buffer calculations:
@@ -675,7 +675,7 @@
 * the buffers are active in calculations of affinity of other species
 * use <span style="color:red">`mod.buffer()`</span> to change or add buffers in `thermo$buffer`
 * <span style="color:blue">`demo(buffer)`</span> uses it for mineral buffers (solid lines)
-2. Use the `what` argument of `diagram()` to solve for the activity of the indicated basis species
+2. Use the `what` argument of <span style="color:green">`diagram()`</span> to solve for the activity of the indicated basis species
 * more convenient (the buffers come from the currently defined species of interest), but only a single basis species can be buffered, and it's not used in the calculation of affinity
 * <span style="color:blue">`demo(buffer)`</span> uses it for aqueous organic species as buffers (dotted and dashed lines)
 
@@ -746,7 +746,7 @@
 
 ## Getting from affinity to equilibrium
 
-The `equilibrate()` function in CHNOSZ automatically chooses between two methods for calculating equilibrium.
+The <span style="color:green">`equilibrate()`</span> function in CHNOSZ automatically chooses between two methods for calculating equilibrium.
 ```{marginfigure}
 For more information, see the vignette <span style="color:blue">*Equilibrium in CHNOSZ*</span>.
 ```
@@ -783,7 +783,7 @@
 diagram(a150, add=TRUE, col="red")
 ```
 
-Now we use `equilibrate()` to calculate the activities of species.
+Now we use <span style="color:green">`equilibrate()`</span> to calculate the activities of species.
 Our balancing constraint is that the total activity of C is 10<sup>-3</sup>.
 This shows a hypothetical *metastable equilibrium*; we know that for true equilibrium the total activity of C is affected by pH.
 ```{r bjerrum_2, eval=FALSE}
@@ -809,18 +809,18 @@
 ## Groups of species
 
 Sometimes it is helpful to look at the total concentration (i.e. activity) of groups in species distribution diagrams.
-The `groups` argument of `diagram()` can be used to sum together the activities of species.
+The `groups` argument of <span style="color:green">`diagram()`</span> can be used to sum together the activities of species.
 
 To demonstrate this feature, let's consider the distribution of carbon among organic and inorganic species in the hydrothermal mixing scenario described by @SS98.
 First we define the basis and add two inorganic species.
-The `index.return=FALSE` argument tells `info()` to return the index (number) of the species in the current species definition; these indices are saved for use below:
+The `index.return=FALSE` argument tells <span style="color:green">`info()`</span> to return the index (number) of the species in the current species definition; these indices are saved for use below:
 ```{r groups_basis, results="hide", message=FALSE}
 basis("CHNOS+")
 ii <- species(c("CO2", "HCO3-"), index.return=TRUE)
 ```
 
 Next, we add each group of organic species: C<sub>1</sub>--C<sub>8</sub> alcohols, C<sub>3</sub>--C<sub>8</sub> ketones, C<sub>2</sub>--C<sub>12</sub> carboxylic acids and their corresponding anions, and C<sub>2</sub>--C<sub>8</sub> alkenes.
-We provide `info()` with a set of `ispecies` values to select these species.
+We provide <span style="color:green">`info()`</span> with a set of `ispecies` values to select these species.
 The species in each group are ordered by carbon number in the database, so the set is made from the starting and ending indices using R's `seq()` function, wrapped by `seq2()` to make the code shorter.
 ```{r groups_species, message=FALSE}
 seq2 <- function(x) seq(x[1], x[2])
@@ -836,7 +836,7 @@
 The specific values are for calculations with vent fluids initially set by the fayalite-magnetite-quartz buffer minus 1/2 log*f*<sub>O<sub>2</sub></sub> (FMQ - 1/2).
 ```
 These values were calculated using a speciation and mixing model that is not available in CHNOSZ; however, we can use these intermediate values as input to the "downstream" calculations that are available in CHNOSZ.
-Because of the noise introduced by digitization of the figure, we smooth the data using R's `smooth.spline()` function; the lower *T* limit reflects the absence of data below this temperature in the figure for log*f*<sub>O<sub>2</sub></sub>.
+Because of the noise introduced by digitization of the figure, we smooth the data using R's `smooth.spline()`; the lower *T* limit reflects the absence of data below this temperature in the figure for log*f*<sub>O<sub>2</sub></sub>.
 ```{r groups_data}
 O2dat <- read.csv(system.file(
   "extdata/cpetc/SS98_Fig5a.csv", package="CHNOSZ"))
@@ -848,8 +848,8 @@
 ```
 
 We are ready to calculate affinities and equilibrium activities of the species.
-This calculation utilizes the transect mode of `affinity()`.
-The call to `equilibrate()` runs with the default balance (in this case, CO<sub>2</sub>), with a log activity set to -2.5.
+This calculation utilizes the transect mode of <span style="color:green">`affinity()`</span>.
+The call to <span style="color:green">`equilibrate()`</span> runs with the default balance (in this case, CO<sub>2</sub>), with a log activity set to -2.5.
 ```{marginfigure}
 Actually, the total concentration of carbon depends on the mixing ratio, ranging from about 10<sup>-2.2</sup> (seawater) to 10<sup>-2.6</sup> (vent fluid).
 The ability to vary the activity of the balanced basis species is not yet implemented in CHNOSZ, so a single value is used here.
@@ -892,10 +892,10 @@
 ## Choosing a different balance
 
 How about the choice between balancing constraints?
-Be default, `equilibrate()` and `diagram()` balance reactions on the first basis species that is present in each of the species of interest.
+Be default, <span style="color:green">`equilibrate()`</span> and <span style="color:green">`diagram()`</span> balance reactions on the first basis species that is present in each of the species of interest.
 Let's look at some amino acids in a hypothetical metastable equilibrium.
 This calculation is based on one described by @Sho90b for five amino acids, but here we include 20 proteinogenic amino acids, whose names are returned by `aminoacids("")`.
-We use `ZC.col()` to generate colors based on the average oxidation state of carbon of the amino acids (red and blue for relatively reduced and oxidized).
+We use <span style="color:green">`ZC.col()`</span> to generate colors based on the average oxidation state of carbon of the amino acids (red and blue for relatively reduced and oxidized).
 ```{r aminoacids_setup, results="hide", message=FALSE}
 basis("CHNOS")
 basis("CO2", "gas")
@@ -906,7 +906,7 @@
 col <- ZC.col(aa.ZC)
 ```
 
-To make plots using different balance constraints, let's write a simple function that sets the `balance` argument of `diagram()` and adds a title to the plot.
+To make plots using different balance constraints, let's write a simple function that sets the `balance` argument of <span style="color:green">`diagram()`</span> and adds a title to the plot.
 The first plot is the most similar to Figure 4 of Shock (1990), except for the absence of alanine (probably due to different thermodynamic data) and the presence of some other amino acids.
 There, we set `balance=1`, which indicates that moles of species are conserved; this is equivalent to balancing on the amino acid backbone.
 The remaining plots balance on each of the basis species (except for O<sub>2</sub>), then on volume (in the last plot).
@@ -931,12 +931,12 @@
 
 # Proteins
 
-Proteins in CHNOSZ are handled differently from other species.
+Proteins in CHNOSZ are handled a little bit differently from other species.
 Amino acid group additivity is used to obtain the thermodynamic properties of proteins.
 Therefore, CHNOSZ has a data file with amino acid compositions of selected proteins, as well as functions for importing and downloading amino acid sequence data.
 When proteins in CHNOSZ are identified by name, they include an underscore, such as in `LYSC_CHICK` (chicken lysozyme C).
 
-The length and chemical formula of one or more proteins are returned by `protein.length()` and `protein.formula()`.
+The length and chemical formula of one or more proteins are returned by <span style="color:green">`protein.length()`</span> and <span style="color:green">`protein.formula()`</span>.
 We can calculate the formula of the protein, and the per-residue formula, and show that both have the same average oxidation state of carbon:
 ```{r formula_LYSC_CHICK}
 pl <- protein.length("LYSC_CHICK")
@@ -979,8 +979,8 @@
 ```{r protein_Cp, eval=FALSE}
 ```
 
-Note that `subcrt()` has no provision for protein ionization.
-Instead, ionization is handled via `affinity()`, which calls `ionize.aa()` if a charged species is in the basis.
+Note that <span style="color:green">`subcrt()`</span> has no provision for protein ionization.
+Instead, ionization is handled via <span style="color:green">`affinity()`</span>, which calls <span style="color:green">`ionize.aa()`</span> if a charged species is in the basis.
 ```{marginfigure}
 Whether to calculate properties using aqueous or crystalline groups is determined by the value of `thermo\$opt\$state`; if it is changed from its default of `aq` to `cr`, no ionization is possible.
 ```
@@ -1004,16 +1004,16 @@
        col=1:4, lty=1, bty="n", cex=0.9)
 ```
 We calculate the affinities for the same four proteins, using both charged and uncharged sets of basis species to activate and suppress the ionization calculations.
-The ionized calculation returns a series of values (as a function of pH), but there is only one value of affinity returned by the nonionized calculation, so we need to use `as.numeric()` to avoid subtracting non-conformable arrays:
+The ionized calculation returns a series of values (as a function of pH), but there is only one value of affinity returned by the nonionized calculation, so we need to use R's `as.numeric()` to avoid subtracting non-conformable arrays:
 ```{r protein_ionization, eval=FALSE}
 ```
 
-Above, we used the `iprotein` argument of `affinity()` to specify the proteins in the calculation, using their indices as returned by `iprotein()`.
+Above, we used the `iprotein` argument of <span style="color:green">`affinity()`</span> to specify the proteins in the calculation, using their indices returned by <span style="color:green">`iprotein()`</span>.
 ```{marginfigure}
 The `iprotein` index refers to the rownumber of `thermo\$protein`; this is distinct from the `ispecies` index, which refers to the rownumber of `thermo\$species`.
 ```
 That approach utilizes some optimizations that can be realized due group additivity, and is useful for calculations involving many proteins.
-An alternative, but slower, approach is to identify the proteins to `species()`; this produces results that are equivalent to using the `iprotein` argument:
+An alternative, but slower, approach is to identify the proteins to <span style="color:green">`species()`</span>; this produces results that are equivalent to using the `iprotein` argument:
 <!-- this is needed because the figure above might be cached, preventing the call to basis() there -->
 ```{r basis_CHNOS, echo=FALSE, results="hide"}
 basis("CHNOS")
@@ -1032,10 +1032,10 @@
 
 Let's compare the `r zc` of Rubisco with optimal growth temperature of organisms, as shown in Figure 6a of @Dic14.
 First we read a CSV file with the protein ID's and optimal growth temperature (*T*<sub>opt</sub>); the midpoint of the range of *T*<sub>opt</sub> is used for plotting.
-Then we use `read.fasta()` to read a FASTA file holding the amino acid sequences of the proteins; the function returns a data frame with the amino acid counts.
+Then we use <span style="color:green">`read.fasta()`</span> to read a FASTA file holding the amino acid sequences of the proteins; the function returns a data frame with the amino acid counts.
 To put the proteins in the right order, the IDs in the CSV file are matched to the names of the proteins in the FASTA file.
 Then, in one line, we calculate the formula of the protein, followed by `r zc`.
-Next, point symbols are assigned according to domain (Eukaryota, Bacteria, Archaea); numerals inside the symbols reflect the ordering by *T*<sub>opt</sub> in three temperature ranges (0--35 °C, 37.5--60 °C, and 65--100 °C).
+Next, point symbols are assigned according to domain (Archaea, Bacteria, Eukaryota); numerals inside the symbols reflect the ordering by *T*<sub>opt</sub> in three temperature ranges (0--35 °C, 37.5--60 °C, and 65--100 °C).
 
 ```{r rubisco_ZC, fig.margin=TRUE, fig.width=4, fig.height=4, small.mar=TRUE, dpi=dpi, out.width="100%", echo=FALSE, message=FALSE, fig.cap="Average oxidation state of carbon in Rubisco compared with optimal growth temperature of organisms.", cache=TRUE, pngquant=pngquant}
 datfile <- system.file("extdata/cpetc/rubisco.csv", package="CHNOSZ")
@@ -1059,22 +1059,22 @@
 ```{r rubisco_ZC, eval=FALSE}
 ```
 
-`protein.basis()` returns the stoichiometry of the basis species the formation reaction of the proteins.
-Dividing by `protein.length()` gives the per-residue reaction coefficients.
-Using the set of basis species we have seen before (CO<sub>2</sub>, NH<sub>3</sub>, H<sub>2</sub>S, `r h2o`, `r o2`) there is a correlation between `r zc` and *n*<sub>`r o2`</sub>/residue, as well as a correlation of the latter with *n*<sub>`r h2o`</sub>/residue (left column).
+<span style="color:green">`protein.basis()`</span> returns the stoichiometry of the basis species the formation reaction of the proteins.
+Dividing by <span style="color:green">`protein.length()`</span> gives the per-residue reaction coefficients (*n*̅).
+Using the set of basis species we have seen before (CO<sub>2</sub>, NH<sub>3</sub>, H<sub>2</sub>S, `r h2o`, `r o2`) there is a correlation between `r zc` and *n*̅<sub>`r o2`</sub>, as well as a correlation of the latter with *n*̅<sub>`r h2o`</sub> (left column).
 ```{marginfigure}
-The calculation of *Z*<sub>C</sub>, which is a sum of elemental ratios, is not affected by the choice of basis species.
+The calculation of *Z*<sub>C</sub>, which sums elemental ratios, is not affected by the choice of basis species.
 ```
-The "CEQ" keyword to <span style="color:red">basis()</span> loads basis species with a particular combination of three amino acids (cysteine, glutamic acid, glutamine, `r h2o`, `r o2`) that strengthens the relationship between `r zc` and *n*<sub>`r o2`</sub>/residue, but weakens that between *n*<sub>`r o2`</sub>/residue and *n*<sub>`r h2o`</sub>/residue (right column).
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/chnosz -r 141