From noreply at r-forge.r-project.org Sat Feb 1 09:39:10 2014 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Sat, 1 Feb 2014 09:39:10 +0100 (CET) Subject: [CHNOSZ-commits] r65 - in pkg/CHNOSZ: . R inst vignettes Message-ID: <20140201083910.992BC180153@r-forge.r-project.org> Author: jedick Date: 2014-02-01 09:39:09 +0100 (Sat, 01 Feb 2014) New Revision: 65 Modified: pkg/CHNOSZ/DESCRIPTION pkg/CHNOSZ/R/species.R pkg/CHNOSZ/R/util.data.R pkg/CHNOSZ/inst/NEWS pkg/CHNOSZ/vignettes/hotspring.Rnw pkg/CHNOSZ/vignettes/hotspring.lyx Log: more additions to hotspring.Rnw Modified: pkg/CHNOSZ/DESCRIPTION =================================================================== --- pkg/CHNOSZ/DESCRIPTION 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/DESCRIPTION 2014-02-01 08:39:09 UTC (rev 65) @@ -1,6 +1,6 @@ -Date: 2014-01-30 +Date: 2014-02-01 Package: CHNOSZ -Version: 1.0.3-2 +Version: 1.0.3-3 Title: Chemical Thermodynamics and Activity Diagrams Author: Jeffrey Dick Maintainer: Jeffrey Dick Modified: pkg/CHNOSZ/R/species.R =================================================================== --- pkg/CHNOSZ/R/species.R 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/R/species.R 2014-02-01 08:39:09 UTC (rev 65) @@ -31,7 +31,8 @@ # can cause problems in situations where zeros are needed # (manifests as issue in longex("phosphate"), where which.balance() # identifies H2O as conserved component) - out <- zapsmall(nbasis) + # 20140201 set digits (to R default) becuase getOption("digits") is changed in knitr + out <- zapsmall(nbasis, digits=7) # add names of species and basis species colnames(out) <- colnames(tbmat) # add names of species only if it was a character argument Modified: pkg/CHNOSZ/R/util.data.R =================================================================== --- pkg/CHNOSZ/R/util.data.R 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/R/util.data.R 2014-02-01 08:39:09 UTC (rev 65) @@ -154,7 +154,11 @@ thermo$obigt <- to1 rownames(thermo$obigt) <- 1:nrow(thermo$obigt) assign("thermo", thermo, "CHNOSZ") - msgout("add.obigt: ", file, "\n") + # message about file, if file argument is missing (default) + if(missing(file)) { + msgout("add.obigt: using default file:\n") + msgout(file, "\n") + } msgout("add.obigt: read ", length(does.exist), " rows; made ", nexist, " replacements, ", nrow(to2), " additions, units = ", E.units, "\n") msgout("add.obigt: use data(thermo) to restore default database\n") Modified: pkg/CHNOSZ/inst/NEWS =================================================================== --- pkg/CHNOSZ/inst/NEWS 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/inst/NEWS 2014-02-01 08:39:09 UTC (rev 65) @@ -1,4 +1,4 @@ -CHANGES IN CHNOSZ 1.0.3-2 (2014-01-30) +CHANGES IN CHNOSZ 1.0.3-3 (2014-02-01) -------------------------------------- - Add files with average amino acid compositions of proteins from Bison Modified: pkg/CHNOSZ/vignettes/hotspring.Rnw =================================================================== --- pkg/CHNOSZ/vignettes/hotspring.Rnw 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/vignettes/hotspring.Rnw 2014-02-01 08:39:09 UTC (rev 65) @@ -10,6 +10,7 @@ \geometry{verbose,tmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm,rmargin=2.5cm} \usepackage{color} \usepackage{babel} +\usepackage{amsbsy} \usepackage{amssymb} \usepackage[numbers]{natbib} \usepackage[unicode=true,pdfusetitle, @@ -35,7 +36,7 @@ library(knitr) ## set global chunk options opts_chunk$set(fig.path='figure/hotspring-', cache.path='cache/hotspring-', fig.align='center', fig.show='hold', par=TRUE) -## set code/output width to be 80 +## set code/output width to be 60 options(width=80) ## tune details of base graphics (http://yihui.name/knitr/hooks) knit_hooks$set(par=function(before, options, envir){ @@ -47,20 +48,35 @@ \title{Hot-spring proteins in CHNOSZ} \maketitle -This is based on R code in the Supporting Information for the papers. -All code used is shown in the blocks, but much of the text output -(particularly in the figure-generating code) has been suppressed. + +\section{Introduction} + +The intention of this document is to demonstrate the calculations +described in two recent papers \citep{DS11,DS13} dealing with the +distribution and abundances of proteins in ``Bison Pool'', a hot +spring in Yellowstone National Park. The calculations use metastable +equilibrium to interrelate the compositions of proteins (from metagenomic +data) with environmental conditions, particularly pH, temperature, +and redox chemistry. This document is focused on the details of the +calculations and has little in the way of introduction of concepts +or interpretation and discussion; please see the papers for more details. + +There is no hidden code in this document; all code used to make the +figures is shown in the blocks, but much of the text output (particularly +in the figure-generating code) has been suppressed. The R code here +is based on the Supporting Information for the papers, with modifications. In order to keep the code here as short and efficient as possible, some aspects of the published figures including manual labeling and higher resolution are not all reproduced; these changes are noted -where possible. Also, some tricks (e.g. using ``xtabs'' to create -the BLAST frequency table, and ``revisit'' to calculate the DGtr) -have been introduced in order to streamline the code. +where possible. Also, some tricks (e.g. using \texttt{xtabs()} to +create the BLAST frequency table, and \texttt{revisit()} to calculate +the $\Delta G_{tr}$) have been introduced here in order to streamline +the code. Load CHNOSZ and the thermodynamic database. The thermodynamic data -for the methionine sidechain group used in the paper has been superseded -in CHNOSZ\_0.9-9 \citep{LD12}, but the older dataset is still available -using \texttt{add.obigt()}: +for the methionine sidechain group used in the 2011 paper \citep{DS11} +was updated in CHNOSZ\_0.9-9 \citep{LD12}, but the older dataset +is still available using \texttt{add.obigt()}. <>= library(CHNOSZ) @@ -68,68 +84,88 @@ add.obigt() @ -First the shared constants +Some values that are shared among different calculations: measured +temperature ($^{\circ}$C) and pH. These are representative values +only; the actual values are not constant but vary due to water flow, +weather, animals, etc. -<>= -# names of the sampling locations -sites <- c("N", "S", "R", "Q", "P") -sitenames <- paste("bison", sites, sep="") -# the measured temperatures (degrees C) and pHs +<>= bison.T <- c(93.3, 79.4, 67.5, 65.3, 57.1) bison.pH <- c(7.350, 7.678, 7.933, 7.995, 8.257) @ -Load the proteins +Now let's plot the measured temperature and pH in the hot spring. +Distances (in meters) of the sampling sites are measured from the +source of the hot spring. The \texttt{Tfun()} and \texttt{pHfun()} +are also used further below. +<>= +distance <- c(0, 6, 11, 14, 22) +par(mfrow=c(1, 2), mar=c(4, 4, 3, 2)) +xpoints <- seq(0, 22, length.out=128) +# T plot +plot(distance, bison.T, xlab="distance, m", ylab=axis.label("T")) +Tfun <- splinefun(distance, bison.T, method="mono") +lines(xpoints, Tfun(xpoints)) + +# pH plot +plot(distance, bison.pH, xlab="distance, m", ylab="pH") +pHfun <- splinefun(distance, bison.pH, method="mono") +lines(xpoints, pHfun(xpoints)) +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0022782.g005}{DOI link} +to original figure (panels A and B). The length of \texttt{xpoints} +here, 128, is the resolution used for the figures in the 2011 paper +(defined in the \texttt{mkargs()} function of the Supporting Information). + + +\section{General setup} + +Load the proteins. These are ``model proteins'', i.e. average amino +acid compositions of sequences classified according to their functional +annotation or by their phylum association, at each of the five sampling +sites located in the hot spring. + <>= # read the amino acid compositions aa.annot <- read.aa(system.file("extdata/protein/DS11.csv", package="CHNOSZ")) aa.phyla <- read.aa(system.file("extdata/protein/DS13.csv", package="CHNOSZ")) @ -Setup names, colors, line types +Here are the site names for the sampling locations (also referred +to as sites 1--5). +<>= +sites <- c("N", "S", "R", "Q", "P") +sitenames <- paste("bison", sites, sep="") +@ + +Here are the classifications according to functional annotation: + +<>= +classes <- unique(aa.annot$protein) +classes +@ + +Here are names of phyla and colors and line types used for plotting +(colors based on Figure 2 of Wu and Eisen, 2008 \citep{WE08}, with +modifications): + <>= -# the annotation-derived classifications -classes <- unique(aa.annot$protein) # the names of the phyla in alphabetical order (Deinococcus-Thermus at end) phyla.abc <- sort(unique(aa.phyla$organism))[c(1:7,9:11,8)] # an abbreviation for Dein.-Thermus phyla.abbrv <- phyla.abc phyla.abbrv[[11]] <- "Dein.-Thermus" -# colors used to identify the phyla in the barchart, based on Wu and Eisen, 2008 with modifications phyla.cols <- c("#f48ba5", "#f2692f", "#cfdd2a", "#962272", "#87c540", "#66c3a2", "#12a64a", "#f58656", "#ee3237", "#25b7d5", "#3953a4") -# line types used to identify the phyla phyla.lty <- c(1:6, 1:5) +phyla.abbrv @ -Function to setup the basis species -<>= -# function to load basis species -setup.basis <- function() { - basis(c("HCO3-", "H2O", "NH3", "HS-", "H2", "H+")) - basis(c("HCO3-", "NH3", "HS-", "H+"), c(-3, -4, -7, -7.933)) -} -@ - -Function to calculate $\log a_{\mathrm{H_{2}}}$, according to Equation -(2) in \citep{DS13}. - -<>= -get.logaH2 <- function(T) -11 + T * 3/40 -@ - -Set the temperature limits (\texttt{Tlim}) over which to perform the -calculations. - -<>= -Tlim <- c(50, 100) -@ - - \section{Average oxidation state of carbon} The average oxidation state of carbon ($\overline{Z}_{\mathrm{C}}$) @@ -187,20 +223,207 @@ does not appear in plot 1 here. -\section{Comparison old and new {[}Met{]}} +\section{Relative stabilities of ``overall'' model proteins} + +\subsection{Formation reactions of proteins from basis species} + +Function to setup the basis species. The basis species consist of +$\mathrm{HCO_{3}^{-}}$, $\mathrm{H_{2}O}$, $\mathrm{NH_{3}}$, $\mathrm{HS^{-}}$, +$\mathrm{H_{2}}$ and $\mathrm{H^{+}}$ (all aqueous species except +for $\mathrm{H_{2}O}$ liquid). + +<>= +setup.basis <- function() { + basis(c("HCO3-", "H2O", "NH3", "HS-", "H2", "H+")) + basis(c("HCO3-", "NH3", "HS-", "H+"), c(-3, -4, -7, -7.933)) +} +@ + +Set up the basis species and the species. Here, we add the proteins +using the previously read amino acid compositions (\texttt{aa.annot}), +and we save their index number (\texttt{ip.annot}) for use later. +For now, we load the species corresponding to the ``overall'' model +proteins (the first 5 in aa.annot). + +<>= +setup.basis() +ip.annot <- add.protein(aa.annot) +species("overall", sitenames) +@ + +The first 6 columns there represent the stoichiometry of the reactions +to form the proteins from the basis species. The reactions can be +divided by the lengths (number of amino acid residues) of the proteins +to write per-residue formation reactions. + +<>= +pl <- protein.length(ip.annot[1:5]) +mysp <- species() +mysp[, 1:6]/pl +@ + +Note e.g. the higher coefficient on $\mathrm{H_{2}}$ for sites 1 +and 2; increasing the activity of $\mathrm{H_{2}}$ (more reducing +conditions) has a relatively more favorable mass-action effect on +the formation of the proteins at the higher temperature sites. + + +\subsection{Chemical affinities along a chemical gradient} + +Function to calculate $\log a_{\mathrm{H_{2}}}$ as a linear equation +in $T$. This was used to fit the spatial distribution of proteins +in the 2011 paper \citep{DS11} (also shown as Equation 2 in 2013 +\citep{DS13}). +\begin{equation} +\log a_{\mathrm{H_{2_{\left(aq\right)}}}}=-11+3/40\times T\mathrm{\left(^{\circ}C\right)}\label{eq:logaH2} +\end{equation} + + +<>= +get.logaH2 <- function(T) -11 + T * 3/40 +@ + +Calculate the residue-normalized chemical affinities of the formation +reactions of the overall model proteins. First set activities of the +proteins equal to unity (logarithm of activity equal to zero). Then +calculate affinities per mole of protein for the temperature, pH and +$\log a_{\mathrm{H_{2}}_{\left(aq\right)}}$ of each site. Use the +lengths of the model proteins to calculate the affinities per residue. + +<>= +species(1:5, 0) +a <- affinity(T=bison.T, pH=bison.pH, H2=get.logaH2(bison.T)) +a.res <- t(as.data.frame(a$values))/pl +a.res +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0022782.t005}{DOI link} +to original table. + +The affinities are expressed as dimensionless values, i.e. $\boldsymbol{A}/2.303RT$ +where $\boldsymbol{A}$, $R$ and $T$ stand for chemical affinity, +gas constant, and temperature in Kelvin. The affinities are all negative, +but show a progression from higher for protein (row) 1 at the conditions +of sites (columns) 1 and 2 to higher for protein (row) 4 at the conditions +of sites (columns) 3 to 5. + +<>= +apply(a.res, 2, which.max) +@ + +To go from residue-normalized affinities to the actual progression +of relative stabilities of the proteins (i.e. size-adjusted affinities), +we subtract the logarithm of the protein length from the per-residue +affinities (also see description following Eq. 19 in Ref. \citep{DS11}). + +<>= +a.res <- a.res - log10(pl) +apply(a.res, 2, which.max) +@ + +This 1-2-4 progression of stabilities is visualized below, and for +some groups of proteins, an update to the thermodynamic properties +of the methionine sidechain causes site 3 also to become stable. + + +\subsection{Relative stabilities along a chemical gradient} + +Set the temperature limits (\texttt{Tlim}) over which to perform the +calculations. + +<>= +Tlim <- c(50, 100) +@ + +Make two plots here. (1) The metastable equilibrium predominance diagram +of the overall model proteins as a function of temperature and logarithm +of activity of hydrogen. The stability fields for the proteins from +the higher temperatures are at higher activities of hydrogen in the +diagram. The dotted line passes through the stability fields of the +model proteins at approximately the actual environmental temperatures. +(2) Combine the gradients of temperature, pH and hydrogen activity +to calculate the metastable equilibrium activities of the proteins. +The total activity of residues is set by reference activities of the +proteins equal to $10^{-3}$. In order to label the $x$-axis ``distance'', +modify a couple of entries in the list returned by \texttt{affinity()} +(\texttt{vars}, \texttt{vals}); otherwise the $x$-axis would represent +temperature (the first variable in the argument list to \texttt{affinity()}). + +<>= +par(mfrow=c(1, 2)) +# first plot +a <- affinity(T=Tlim, H2=c(-7, -4)) +diagram(a, fill=NULL, names=1:5, normalize=TRUE) +lines(Tlim, get.logaH2(Tlim), lty=3) +# second plot +species(1:5, -3) +xT <- Tfun(xpoints) +xpH <- pHfun(xpoints) +xH2 <- get.logaH2(xT) +a <- affinity(T=xT, pH=xpH, H2=xH2) +a$vars[1] <- "distance, m" +a$vals[[1]] <- xpoints +e <- equilibrate(a, normalize=TRUE) +diagram(e, legend.x=NULL) +legend("bottom", lty=1:5, legend=1:5, bty="n", cex=0.6) +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0022782.g005}{DOI link} +to original figure (panels B and F). + +\clearpage + +Plot the equilibrium degrees of formation as a function of distance +for different classes of proteins. Calculate the affinities for all +of the proteins. For each group of three, make strip charts using +the \texttt{strip()} function in CHNOSZ. The heights of the bars represent +the relative abundances of the model proteins. The five steps of the +color code go from site 1 (red) to site 5 (blue). + +<>= +loadclass <- function(class) { + species(delete=TRUE) + species(rep(class, each=5), rep(sitenames, length(class))) +} +xclasses <- c("overall", "transferase", "transport", "synthetase", "membrane", "permease") +loadclass(xclasses) +a <- affinity(T=xT, pH=xpH, H2=xH2) +a$vars[1] <- "distance, m" +a$vals[[1]] <- xpoints +col <- c("red", "orange", "yellow", "green", "blue") +par(mfrow=c(1, 2), mar=c(4, 4, 1, 1)) +for(i in 1:2) { + ispecies <- lapply((1:3)+(i-1)*3, function(x) {1:5+(x-1)*5} ) + names(ispecies) <- xclasses[(1:3)+(i-1)*3] + strip(a = a, ispecies = ispecies, col = col, xticks = distance, cex.names = 1) +} +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0022782.g007}{DOI link} +to original figure. + +\clearpage + + +\section{Comparing old and new methionine sidechain parameters} + Make some $T-\log a_{\mathrm{H_{2}}}$ protein metastable equilibrium predominance diagrams using old and new methionine sidechain {[}Met{]} -parameters. +parameters. When this is run, the old {[}Met{]} is in the database +(since \texttt{add.obigt()} was called above), so in the second row +(j=2), we reset the database to use the new {[}Met{]}. <>= par(mfrow=c(2, 3)) for(j in 1:2) { # use old [Met] for first row and new [Met] for second row - if(j==1) add.obigt(force=TRUE) else data(thermo) + if(j==2) { + data(thermo) + ip.annot <- add.protein(aa.annot) + } # setup basis species and proteins setup.basis() - ip.annot <- add.protein(aa.annot) # make the plots for(annot in c("overall", "transferase", "synthase")) { ip <- ip.annot[aa.annot$protein==annot] @@ -214,22 +437,21 @@ } @ -DOI link to original figure: \href{http://10.1371/journal.pone.0072395.g002}{link}. -The original was made with higher resolution (256) and some labels -were manually placed. +\href{http://10.1371/journal.pone.0072395.g002}{DOI link} to original +figure. The original was made with higher resolution (256) and some +labels were manually placed. -\section{Relative abundance calculation} +\section{Relative abundance calculations} Function to return the fractional abundances based on BLAST counts, -stored in the 'ref' column of aa.phyla. +stored in the \texttt{ref} column of \texttt{aa.phyla}. <>= alpha.blast <- function() { out <- xtabs(ref ~ protein + organism, aa.phyla) - # put it in correct order + # put it in correct order, then turn counts into fractions out <- out[c(1,5:2), c(1:7,9:11,8)] - # turn counts into fractions (sum=1) out <- out/rowSums(out) return(as.data.frame(out)) } @@ -237,23 +459,22 @@ Function to calculate metastable equilibrium degrees of formation of proteins (normalized to residues) as a function of $\log a_{\mathrm{H_{2}}}$ -for a specified location. Note: in the call to affinity, change 101 -to 1001 to reproduce calculations in paper. +for a specified location. Note: in the call to \texttt{affinity()}, +increase the resolution from 101 to 1001 to reproduce the calculations +in paper. <>= alpha.equil <- function(i=1) { - # which rows of the table correspond to this location + # order the names and counts to go with the alphabetical phylum list iloc <- which(aa.phyla$protein==sitenames[i]) - # order the names and counts to go with the alphabetical phylum list iloc <- iloc[order(match(aa.phyla$organism[iloc], phyla.abc))] # set up basis species, with pH specific for this location setup.basis() basis("pH", bison.pH[i]) - # calculate affinities of reactions to form the proteins over the logaH2 range + # calculate metastable equilibrium activities of the residues a <- affinity(H2=c(-11, -1, 101), T=bison.T[i], iprotein=ip.phyla[iloc]) - # calculate the metastable equilibrium activities of the residues e <- equilibrate(a, loga.balance=0, normalize=TRUE, stay.normal=TRUE) - # remove the logarithms to get relative abundances (sum=1 because loga.balance=0) + # remove the logarithms to get relative abundances a.residue <- 10^sapply(e$loga.equil, c) colnames(a.residue) <- aa.phyla$organism[iloc] # the BLAST profile @@ -269,18 +490,18 @@ Now we're ready to make a plot! We start by adding the proteins with amino acid composition that were read from the file. Then plot the degrees of formation as a function of $\log a_{\mathrm{H_{2}}}$ at -sites 1, 3 and 5, but record the optimal values of $\log a_{\mathrm{H_{2}}}$ -at all 5 sites. +sites 1, 3 and 5, but record the results (including calculated relative +abundances and optimal values of $\log a_{\mathrm{H_{2}}}$) at all +5 sites. <>= ip.phyla <- add.protein(aa.phyla) layout(matrix(1:6, ncol=3), heights=c(2, 1)) -# keep track of the optimal logaH2 -logaH2.opt <- numeric() +equil.results <- list() for(i in 1:5) { # get the equilibrium degrees of formation and the optimal logaH2 ae <- alpha.equil(i) - logaH2.opt <- c(logaH2.opt, ae$logaH2.opt) + equil.results[[i]] <- ae if(i %in% c(1, 3, 5)) { iphy <- match(colnames(ae$alpha), phyla.abc) # top row: equilibrium degrees of formation @@ -309,22 +530,15 @@ \href{http://dx.doi.org/10.1371/journal.pone.0072395.g003}{DOI link} to published version of the figure. +\clearpage + \section{Activity of hydrogen comparison} -Now that we have computed the activities of hydrogen that minimize -the Gibbs energy of transformation between the metastable equilibrium -of model proteins and observed phylum abundances, let's plot them -and also compare them with the field measurements of varioius redox -proxies and with the linear equation used to fit the spatial distribution -of proteins from the 2011 study \citep{DS11} (reprinted as Equation -2 in 2013 \citep{DS13}): -\begin{equation} -\log a_{\mathrm{H_{2_{\left(aq\right)}}}}=-11+3/40\times T\mathrm{\left(^{\circ}C\right)}\label{eq:logaH2} -\end{equation} +Let's compare the computed activities of hydrogen with various redox +indicators measured in the field. - \subsection{Conversion of field measurements} @@ -333,10 +547,9 @@ Oxidation-reduction potential (ORP), measured in the field using a portable probe and pH/voltmeter, can be converted to Eh by adding the potential of the reference electrode, in this case silver-silver -chloride (Ag/AgCl) in saturated KCl. Lacking the high-temperature -potentials of this electrode, the following equation from Ref. \citep{BPJ85} -for Ag/AgCl (1M KCl) is used, with temperature in degrees Celsius -and potential in volts. +chloride (Ag/AgCl) in saturated KCl. As an approximation, the following +equation from Ref. \citep{BPJ85} for Ag/AgCl (1M KCl) is used, with +temperature in $^{\circ}$C and potential in volts. <>= E.AgAgCl <- function(T) { @@ -344,10 +557,9 @@ } @ -Together with ORP, temperature and pH will all be needed. Data from -Bison Pool and another hot spring shown in Fig. 9 of Ref. \citep{DS11} -are both included, but for simplicity here they are lumped together. -The ORP values have units of mV. +Data from Bison Pool and another hot spring shown in Fig. 9 of Ref. +\citep{DS11} are both included, but for simplicity here they are +lumped together. The ORP values have units of mV. <>= T.ORP <- c(93.9, 87.7, 75.7, 70.1, 66.4, 66.2) @@ -361,14 +573,14 @@ \[ 2e^{-}+2\mathrm{H^{+}}\rightleftharpoons\mathrm{H_{2}}_{\left(aq\right)} \] -to calculate $\log a_{\mathrm{H_{2}}_{\left(aq\right)}}$. The law -of mass action is the condition where the activity quotient ($Q$) -of the reaction is equal to the equilibrium constant ($K$). +to calculate $\log a_{\mathrm{H_{2}}_{\left(aq\right)}}$. The ``law +of mass action'' is the equality between the equilibrium constant +($K$) and the activity product ($Q$) of the species in the reaction. -<>= +<>= Eh <- ORP/1000 + E.AgAgCl(T.ORP) pe <- convert(Eh, "pe", T=convert(T.ORP, "K")) -logK.ORP <- subcrt(c("e-", "H+", "H2"), c(-2, -2, 1), c("aq", "aq", "aq"), T=T.ORP)$out$logK +logK.ORP <- subcrt(c("e-", "H+", "H2"), c(-2, -2, 1), T=T.ORP)$out$logK logaH2.ORP <- logK.ORP - 2*pe - 2*pH.ORP @ @@ -376,9 +588,8 @@ \paragraph{Sulfide/Sulfate} For sulfide/sulfate, assign activities that are equal to concentrations -(in molal units) measured in the field season of 2005, when the biofilm -samples were collected for metagenomic sequencing, and use the law -of mass action for +(in molal units) measured in the field season of 2005, and use the +law of mass action for \[ \mathrm{HS^{-}}+4\mathrm{H_{2}O}\rightleftharpoons\mathrm{SO_{4}}^{-2}+\mathrm{H^{+}}+4\mathrm{H_{2}}_{\left(aq\right)} \] @@ -386,11 +597,10 @@ spectrophotometrically at the hot spring, and sulfate was determined using ion chromatography on water samples returned from the field. -<>= +<>= loga.HS <- log10(c(4.77e-6, 2.03e-6, 3.12e-7, 4.68e-7, 2.18e-7)) loga.SO4 <- log10(c(2.10e-4, 2.03e-4, 1.98e-4, 2.01e-4, 1.89e-4)) -logK.S <- subcrt(c("HS-", "H2O", "SO4-2", "H+", "H2"), c(-1, -4, 1, 1, 4), - state=c("aq", "liq", "aq", "aq", "aq"), T=bison.T)$out$logK +logK.S <- subcrt(c("HS-", "H2O", "SO4-2", "H+", "H2"), c(-1, -4, 1, 1, 4), T=bison.T)$out$logK logaH2.S <- (logK.S + bison.pH - loga.SO4 + loga.HS) / 4 @ @@ -402,29 +612,28 @@ \[ 0.5\mathrm{O_{2}}_{\left(aq\right)}+\mathrm{H_{2}}_{\left(aq\right)}\rightleftharpoons\mathrm{H_{2}O}\,. \] -Convert the dissolved oxygen concentrations, in mg/L, to molarities -(mol/L) and use these values to set the activity of $\mathrm{O_{2}}_{\left(aq\right)}$. +Convert the dissolved oxygen concentrations (mg/L) to molarities (mol/L) +to set the activity of $\mathrm{O_{2}}_{\left(aq\right)}$. -<>= +<>= DO <- c(0.173, 0.776, 0.9, 1.6, 2.8) logaO2 <- log10(DO/1000/32) -logK <- subcrt(c("O2", "H2", "H2O"), c(-0.5, -1, 1), c("aq", "aq", "liq"), T=bison.T)$out$logK +logK <- subcrt(c("O2", "H2", "H2O"), c(-0.5, -1, 1), T=bison.T)$out$logK logaH2.O <- 0 - 0.5*logaO2 - logK @ \paragraph{Plot it!} -The points in the left plot represent proxies for redox potential -based on ORP, sulfur and oxygen measurements, while the points in -in right plot are the optimal values of $\log a_{\mathrm{H_{2}}}$ -for the microbial phylum abundance distribution. Notably, the various -redox indicators are not in equilibrium with each other (the $\log a_{\mathrm{H_{2}}}$ -values are different), but they all show an increase in apparent $\log a_{\mathrm{H_{2}}}$ -with temperature. The dotted line in both plots portrays Equation -(\ref{eq:logaH2}). +The points in the left plot represent field-based indicators for redox +potential, while the points in the right plot are the optimal values +of $\log a_{\mathrm{H_{2}}}$ for the microbial phylum abundance model. +The various redox indicators are not in equilibrium with each other +(the $\log a_{\mathrm{H_{2}}}$ values are different), but they all +show an increase in apparent $\log a_{\mathrm{H_{2}}}$ with temperature. +The dotted line in both plots portrays Equation (\ref{eq:logaH2}). -<>= +<>= # 2011 plot plot(Tlim, get.logaH2(Tlim), xlim=Tlim, ylim=c(-45,0), xlab=axis.label("T"), ylab=axis.label("H2"), type="l", lty=3) @@ -440,8 +649,8 @@ # 2013 plot plot(Tlim, get.logaH2(Tlim), xlim=Tlim, ylim=c(-11,-2), xlab=axis.label("T"), ylab=axis.label("H2"), type="l", lty=3) -lines(bison.T, logaH2.opt, lty=2) -points(bison.T, logaH2.opt, pch=21, bg="white") +lines(bison.T, sapply(equil.results, "[", "logaH2.opt"), lty=2) +points(bison.T, sapply(equil.results, "[", "logaH2.opt"), pch=21, bg="white") text(90, -5.3, "Equation 2") text(66, -9, "optimal parameterization\nfor observed\nphylum abundances", adj=0) @ @@ -452,6 +661,78 @@ \section{Relative abundance comparison} +Let's make some plots comparing the relative abundances calculated +above (stored in \texttt{equil.results}) with the abundances from +BLAST counts. We'll also store the optimal models in \texttt{equil.opt}. + +<>= +layout(matrix(c(1, 2, 3, 4, 5, 6), nrow=2, byrow=TRUE), widths=c(2, 2, 2)) +par(mar=c(2.5, 0, 2.5, 0)) +plot.new() +legend("topright", pch=0:11, legend=phyla.abbrv, bty="n", cex=1.5) +lim <- c(-6, -0.5) +equil.opt <- a.blast <- alpha.blast() +for(iloc in 1:5) { + a.equil <- equil.results[[iloc]] + iopt <- match(a.equil$logaH2.opt, a.equil$H2vals) + ae.opt <- a.equil$alpha[iopt, ] + # which are these phyla in the alphabetical list of phyla + iphy <- match(names(ae.opt), phyla.abc) + equil.opt[iloc, iphy] <- ae.opt + mar <- c(2.5, 4.0, 2.5, 1) + thermo.plot.new(xlab=expression(log[2]*alpha[obs]), ylab=expression(log[2]*alpha[equil]), + xlim=lim, ylim=lim, mar=mar, cex=1, yline=1.5) + # add points and 1:1 line + points(log2(a.blast[iloc, iphy]), log2(ae.opt), pch=iphy-1) + lines(lim, lim, lty=2) + title(main=paste("site", iloc)) + # within-plot legend: DGtr + DGexpr <- as.expression(quote(Delta*italic(G[tr])/italic(RT) == phantom())) + DGval <- format(round(2.303*a.equil$DGtr[iopt], 3), nsmall=3) + legend("bottomright", bty="n", legend=c(DGexpr, DGval)) +} +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0072395.g005}{DOI link} +to original figure. + +\clearpage + +Finally, a comparison in the form of two barplots. + +<>= +par(mar=c(4, 4, 3, 0), mgp=c(1.8, 0.7, 0)) +par(mfrow=c(1, 3), cex=1) +# make the blast plot +ab <- alpha.blast() +rownames(ab) <- 1:5 +barplot(t(ab), col=phyla.cols, ylab=NULL, xlab="site", axes=TRUE, cex.axis=0.8, cex.names=0.8, las=1) +mtext(expression(alpha[obs]), 2, 2, cex=1.1*par("cex")) +title(main="BLAST profile", cex.main=0.8) +# make the equilibrium plot +rownames(equil.opt) <- 1:5 +barplot(t(equil.opt), col=phyla.cols, ylab=NULL, xlab="site", axes=TRUE, cex.axis=0.8, + cex.names=0.8, las=1) +mtext(expression(alpha[equil]), 2, 2, cex=1.1*par("cex")) +title(main="metastable\nequilibrium", cex.main=0.8) +# add legend +par(mar=c(4, 1, 3, 0)) +plot.new() +legend("bottomleft", legend=rev(phyla.abbrv), fill=rev(phyla.cols), bty="n", cex=0.7) +@ + +\href{http://dx.doi.org/10.1371/journal.pone.0072395.g006}{DOI link} +to original figure. + + +\section{Document history} +\begin{itemize} +\item 2011-08-23 Initial version (CHNOSZ\_0.9-7) +\item 2012-01-07 Use \texttt{add.obigt()} for superseded properties of methionine +sidechain group. +\item 2014-02-01 Include results from 2013 paper and use knitr instead of +Sweave. +\end{itemize} \bibliographystyle{unsrtnat} \bibliography{vig} Modified: pkg/CHNOSZ/vignettes/hotspring.lyx =================================================================== --- pkg/CHNOSZ/vignettes/hotspring.lyx 2014-01-30 16:20:32 UTC (rev 64) +++ pkg/CHNOSZ/vignettes/hotspring.lyx 2014-02-01 08:39:09 UTC (rev 65) @@ -95,7 +95,7 @@ \begin_layout Standard \begin_inset ERT -status open +status collapsed \begin_layout Plain Layout @@ -120,7 +120,7 @@ \begin_layout Plain Layout -## set code/output width to be 80 +## set code/output width to be 60 \end_layout \begin_layout Plain Layout @@ -163,40 +163,74 @@ Hot-spring proteins in CHNOSZ \end_layout +\begin_layout Section +Introduction +\end_layout + \begin_layout Standard -This is based on R code in the Supporting Information for the papers. - All code used is shown in the blocks, but much of the text output (particularly - in the figure-generating code) has been suppressed. - In order to keep the code here as short and efficient as possible, some - aspects of the published figures including manual labeling and higher resolutio -n are not all reproduced; these changes are noted where possible. - Also, some tricks (e.g. - using -\begin_inset Quotes eld -\end_inset +The intention of this document is to demonstrate the calculations described + in two recent papers +\begin_inset CommandInset citation +LatexCommand citep +key "DS11,DS13" -xtabs -\begin_inset Quotes erd \end_inset - to create the BLAST frequency table, and + dealing with the distribution and abundances of proteins in \begin_inset Quotes eld \end_inset -revisit +Bison Pool \begin_inset Quotes erd \end_inset - to calculate the DGtr) have been introduced in order to streamline the - code. +, a hot spring in Yellowstone National Park. + The calculations use metastable equilibrium to interrelate the compositions + of proteins (from metagenomic data) with environmental conditions, particularly + pH, temperature, and redox chemistry. + This document is focused on the details of the calculations and has little + in the way of introduction of concepts or interpretation and discussion; + please see the papers for more details. \end_layout \begin_layout Standard +There is no hidden code in this document; all code used to make the figures + is shown in the blocks, but much of the text output (particularly in the + figure-generating code) has been suppressed. + The R code here is based on the Supporting Information for the papers, + with modifications. + In order to keep the code here as short and efficient as possible, some + aspects of the published figures including manual labeling and higher resolutio +n are not all reproduced; these changes are noted where possible. + Also, some tricks (e.g. + using +\family typewriter +xtabs() +\family default + to create the BLAST frequency table, and +\family typewriter +revisit() +\family default + to calculate the +\begin_inset Formula $\Delta G_{tr}$ +\end_inset + +) have been introduced here in order to streamline the code. +\end_layout + +\begin_layout Standard Load CHNOSZ and the thermodynamic database. [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/chnosz -r 65