[CHNOSZ-commits] r168 - in pkg/CHNOSZ: . R demo inst man tests/testthat vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Feb 21 15:34:27 CET 2017


Author: jedick
Date: 2017-02-21 15:34:27 +0100 (Tue, 21 Feb 2017)
New Revision: 168

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/NAMESPACE
   pkg/CHNOSZ/R/add.protein.R
   pkg/CHNOSZ/R/affinity.R
   pkg/CHNOSZ/R/diagram.R
   pkg/CHNOSZ/R/info.R
   pkg/CHNOSZ/R/protein.info.R
   pkg/CHNOSZ/R/util.affinity.R
   pkg/CHNOSZ/R/util.protein.R
   pkg/CHNOSZ/demo/buffer.R
   pkg/CHNOSZ/demo/ionize.R
   pkg/CHNOSZ/demo/protein.equil.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/man/add.protein.Rd
   pkg/CHNOSZ/man/data.Rd
   pkg/CHNOSZ/man/diagram.Rd
   pkg/CHNOSZ/man/examples.Rd
   pkg/CHNOSZ/man/extdata.Rd
   pkg/CHNOSZ/man/info.Rd
   pkg/CHNOSZ/man/ionize.aa.Rd
   pkg/CHNOSZ/man/protein.Rd
   pkg/CHNOSZ/man/protein.info.Rd
   pkg/CHNOSZ/man/util.protein.Rd
   pkg/CHNOSZ/tests/testthat/test-add.protein.R
   pkg/CHNOSZ/tests/testthat/test-affinity.R
   pkg/CHNOSZ/tests/testthat/test-ionize.aa.R
   pkg/CHNOSZ/tests/testthat/test-protein.info.R
   pkg/CHNOSZ/tests/testthat/test-util.affinity.R
   pkg/CHNOSZ/vignettes/anintro.Rmd
   pkg/CHNOSZ/vignettes/equilibrium.Rnw
   pkg/CHNOSZ/vignettes/equilibrium.lyx
   pkg/CHNOSZ/vignettes/vig.bib
Log:
protein.info() renamed to pinfo()


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/DESCRIPTION	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,6 +1,6 @@
-Date: 2017-02-20
+Date: 2017-02-21
 Package: CHNOSZ
-Version: 1.0.8-57
+Version: 1.0.8-58
 Title: Chemical Thermodynamics and Activity Diagrams
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/NAMESPACE	2017-02-21 14:34:27 UTC (rev 168)
@@ -13,7 +13,7 @@
   "plot.new", "plot.window", "points", "rect", "text", "title")
 importFrom("stats", "D", "as.formula", "cor", "lm", "loess.smooth",
   "na.omit", "predict.lm", "qqline", "qqnorm", "sd", "splinefun",
-  "uniroot")
+  "uniroot", "median")
 importFrom("utils", "browseURL", "capture.output", "combn", "demo",
   "example", "head", "installed.packages", "read.csv", "tail",
   "write.csv", "write.table")

Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/add.protein.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -3,65 +3,10 @@
 # reorganize protein functions 20120513
 
 # add.protein - add amino acid counts to thermo$protein (returns iprotein)
-# aa2eos - perform group additivity calculations
 # seq2aa - calculate amino acid counts from a sequence
 # aasum - combine amino acid counts (sum, average, or weighted sum by abundance)
 # read.aa - read amino acid counts from a file
 
-aa2eos <- function(aa, state=get("thermo")$opt$state) {
-  # display and return the properties of
-  # proteins calculated from amino acid composition
-  # the names of the protein backbone groups depend on the state
-  # [UPBB] for aq or [PBB] for cr
-  if(state=="aq") bbgroup <- "UPBB" else bbgroup <- "PBB"
-  # names of the AABB, sidechain and protein backbone groups
-  groups <- c("AABB", colnames(aa)[6:25], bbgroup)
-  # put brackets around the group names
-  groups <- paste("[", groups, "]", sep="")
-  # the rownumbers of the groups in thermo$obigt
-  groups_state <- paste(groups, state)
-  obigt <- get("thermo")$obigt
-  obigt_state <- paste(obigt$name, obigt$state)
-  igroup <- match(groups_state, obigt_state)
-  # the properties are in columns 8-20 of thermo$obigt
-  groupprops <- obigt[igroup, 8:20]
-  # the elements in each of the groups
-  groupelements <- i2A(igroup)
-  # a function to work on a single row of aa
-  eosfun <- function(aa) {
-    # numbers of groups: chains [=AABB], sidechains, protein backbone
-    nchains <- as.numeric(aa[, 5])
-    length <- sum(as.numeric(aa[, 6:25]))
-    npbb <- length - nchains
-    ngroups <- c(nchains, as.numeric(aa[, 6:25]), npbb)
-    # the actual adding and multiplying of thermodynamic properties
-    # hmm. seems like we have to split up the multiplication/transposition
-    # operations to get the result into multiple columns. 20071213
-    eos <- t(data.frame(colSums(groupprops * ngroups)))
-    # to get the formula, add up and round the group compositions 20090331
-    f.in <- round(colSums(groupelements * ngroups), 3)
-    # take out any elements that don't appear (sometimes S)
-    f.in <- f.in[f.in!=0]
-    # turn it into a formula
-    f <- as.chemical.formula(f.in)
-    # now the species name
-    name <- paste(aa$protein, aa$organism, sep="_")
-    # make some noise for the user
-    message("aa2eos: found ", appendLF=FALSE)
-    message(name, " (", f, ", ", appendLF=FALSE)
-    message(round(length, 3), " residues)")
-    ref <- aa$ref
-    header <- data.frame(name=name, abbrv=NA, formula=f, state=state, ref1=ref, ref2=NA, date=NA, stringsAsFactors=FALSE)
-    eosout <- cbind(header, eos)
-    return(eosout)
-  }
-  # loop over each row of aa
-  out <- lapply(1:nrow(aa), function(i) eosfun(aa[i, ]))
-  out <- do.call(rbind, out)
-  rownames(out) <- NULL
-  return(out)
-}
-
 seq2aa <- function(protein, sequence) {
   # remove newlines and whitespace
   sequence <- gsub("\\s", "", gsub("[\r\n]", "", sequence))
@@ -70,7 +15,7 @@
   colnames(caa) <- aminoacids(3)
   # a protein with no amino acids is sort of boring
   if(all(caa==0)) stop("no characters match an amino acid")
-  ip <- suppressMessages(protein.info(protein))
+  ip <- pinfo(protein)
   # now make the data frame
   po <- strsplit(protein, "_")[[1]]
   aa <- data.frame(protein=po[1], organism=po[2], ref=NA, abbrv=NA, stringsAsFactors=FALSE)
@@ -127,7 +72,7 @@
     stop("the value of 'aa' is not a data frame with the same columns as thermo$protein")
   # find any protein IDs that are duplicated
   po <- paste(aa$protein, aa$organism, sep="_")
-  ip <- suppressMessages(protein.info(po))
+  ip <- pinfo(po)
   ipdup <- !is.na(ip)
   # now we're ready to go
   tp.new <- thermo$protein
@@ -142,7 +87,7 @@
   thermo$protein <- tp.new
   assign("thermo", thermo, "CHNOSZ")
   # return the new rownumbers
-  ip <- protein.info(po)
+  ip <- pinfo(po)
   # make some noise
   if(!all(ipdup)) message("add.protein: added ", nrow(aa)-sum(ipdup), " new protein(s) to thermo$protein")
   if(any(ipdup)) message("add.protein: replaced ", sum(ipdup), " existing protein(s) in thermo$protein")

Modified: pkg/CHNOSZ/R/affinity.R
===================================================================
--- pkg/CHNOSZ/R/affinity.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/affinity.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -49,7 +49,8 @@
     # is needed here
     if(!is.null(iprotein)) {
       # check all proteins are available
-      if(!all(iprotein %in% 1:nrow(thermo$protein))) stop("some value(s) in iprotein not in rownumbers of thermo$protein")
+      if(any(is.na(iprotein))) stop("`iprotein` has some NA values")
+      if(!all(iprotein %in% 1:nrow(thermo$protein))) stop("some value(s) of `iprotein` are not rownumbers of thermo$protein")
       # add protein residues to the species list
       resnames <- c("H2O",aminoacids(3))
       # residue activities set to zero;

Modified: pkg/CHNOSZ/R/diagram.R
===================================================================
--- pkg/CHNOSZ/R/diagram.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/diagram.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -22,7 +22,7 @@
   # colors
   col=par("col"), col.names=par("col"), fill=NULL, 
   # labels
-  names=NULL, main=NULL, legend.x=NA, format.names=TRUE,
+  names=NULL, main=NULL, legend.x=NA, format.names=TRUE, adj=0.5, dy=0,
   # plotting controls
   add=FALSE, plot.it=TRUE, tplot=TRUE, ...
 ) {
@@ -242,17 +242,37 @@
       if(!add & !is.null(legend.x)) {
         # 20120521: use legend.x=NA to label lines rather than make legend
         if(is.na(legend.x)) {
+          maxvals <- do.call(pmax, pv)
           for(i in 1:length(plotvals)) {
+            # y-values for this line
             myvals <- as.numeric(plotvals[[i]])
             # don't take values that lie close to or above the top of plot
             myvals[myvals > ylim[1] + 0.95*diff(ylim)] <- ylim[1]
-            imax <- which.max(myvals)
-            # put labels on the maximum of the line, but avoid the sides of the plot
-            adj <- 0.5
-            if(xvalues[imax] > xlim[1] + 0.8*diff(xlim)) adj <- 1
-            if(xvalues[imax] < xlim[1] + 0.2*diff(xlim)) adj <- 0
-            # also include y-adjustment (labels bottom-aligned with the line)
-            text(xvalues[imax], plotvals[[i]][imax], labels=names[i], adj=c(adj, 0))
+            # the starting x-adjustment
+            thisadj <- adj
+            # if this line has any of the overall maximum values, use only those values
+            # (useful for labeling straight-line affinity comparisons 20170221)
+            is.max <- myvals==maxvals
+            if(any(is.max) & plotvar != "alpha") {
+              # put labels on the median x-position
+              imax <- median(which(is.max))
+            } else {
+              # put labels on the maximum of the line
+              # (useful for labeling alpha plots)
+              imax <- which.max(myvals)
+              # try to avoid the sides of the plot; take care of reversed x-axis
+              if(missing(adj)) {
+                if(sign(diff(xlim)) > 0) {
+                  if(xvalues[imax] > xlim[1] + 0.8*diff(xlim)) thisadj <- 1
+                  if(xvalues[imax] < xlim[1] + 0.2*diff(xlim)) thisadj <- 0
+                } else {
+                  if(xvalues[imax] > xlim[1] + 0.2*diff(xlim)) thisadj <- 0
+                  if(xvalues[imax] < xlim[1] + 0.8*diff(xlim)) thisadj <- 1
+                }
+              }
+            }
+            # also include y-offset (dy) and y-adjustment (labels bottom-aligned with the line)
+            text(xvalues[imax], plotvals[[i]][imax] + dy, labels=names[i], adj=c(thisadj, 0))
           }
         } else legend(x=legend.x, lty=lty, legend=names, col=col, cex=cex.names, lwd=lwd, ...)
       }

Modified: pkg/CHNOSZ/R/info.R
===================================================================
--- pkg/CHNOSZ/R/info.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/info.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -31,7 +31,7 @@
   # since thermo$obigt$abbrv contains NAs, convert NA results to FALSE
   matches.species[is.na(matches.species)] <- FALSE
   # turn it in to no match if it's a protein in the wrong state
-  ip <- suppressMessages(protein.info(species))
+  ip <- pinfo(species)
   if(any(matches.species) & !is.na(ip) & !is.null(state)) {
     matches.state <- matches.species & grepl(state, thermo$obigt$state)
     if(!any(matches.state)) matches.species <- FALSE
@@ -44,10 +44,8 @@
       if(!is.na(ip)) {
         # here we use a default state from thermo$opt$state
         if(is.null(state)) state <- thermo$opt$state
-        # retrieve the amino acid composition
-        aa <- protein.info(ip)
         # add up protein properties
-        eos <- aa2eos(aa, state)
+        eos <- protein.obigt(ip, state=state)
         # the real assignment work 
         nrows <- suppressMessages(mod.obigt(eos))
         thermo <- get("thermo", "CHNOSZ")

Modified: pkg/CHNOSZ/R/protein.info.R
===================================================================
--- pkg/CHNOSZ/R/protein.info.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/protein.info.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,13 +1,14 @@
 # CHNOSZ/protein.info.R
 
 # calculate formulas and summarize properties of proteins
-# protein.info: find rownumber in thermo$protein
+# pinfo: find rownumber in thermo$protein
+# protein.length: lengths of the indicated proteins
 # protein.formula: chemical makeup of the indicated proteins
-# protein.length: lengths of the indicated proteins
+# protein.obigt: perform group additivity calculations
 # protein.basis: coefficients of basis species in formation reactions of [ionized] proteins [residues]
 # protein.equil: step-by-step example of protein equilibrium calculation
 
-protein.info <- function(protein, organism=NULL, residue=FALSE) {
+pinfo <- function(protein, organism=NULL, residue=FALSE) {
   # return the `protein` (possibly per residue) for:
   #   dataframe `protein`
   # return the rownumber(s) of thermo$protein for:
@@ -15,30 +16,24 @@
   #   character `protein` and `organism`, e.g. 'LYSC', 'CHICK'
   # return the row(s) of thermo$protein (possibly per residue) for:
   #   numeric `protein` (the rownumber itself)
-  thermo <- get("thermo")
+  t_p <- get("thermo")$protein
   if(is.data.frame(protein)) out <- protein
   if(is.numeric(protein)) {
     # drop NA matches to thermo$protein
-    iproteins <- 1:nrow(thermo$protein)
+    iproteins <- 1:nrow(t_p)
     protein[!protein %in% iproteins] <- NA
     # get amino acid counts
-    out <- thermo$protein[protein, ]
+    out <- t_p[protein, ]
   }
   if(is.data.frame(protein) | is.numeric(protein)) {
     # compute per-residue counts if requested
     if(residue) out[, 5:25] <- out[, 5:25]/rowSums(out[, 6:25])
   } else {
-    # from here we'll search by protein/organism pairs
-    tp.po <- paste(thermo$protein$protein, thermo$protein$organism, sep="_")
-    if(is.null(organism)) my.po <- protein
-    else my.po <- paste(protein, organism, sep="_")
-    iprotein <- match(my.po, tp.po)
-    # tell the user about NA's
-    if(any(is.na(iprotein))) {
-      nNA <- sum(is.na(iprotein))
-      if(nNA==1) ptext <- "" else ptext <- "s"
-      message("iprotein: ", sum(is.na(iprotein)), " protein", ptext, " not matched")
-    }
+    # search for protein or protein_organism in thermo$protein
+    t_p_names <- paste(t_p$protein, t_p$organism, sep="_")
+    if(is.null(organism)) my_names <- protein
+    else my_names <- paste(protein, organism, sep="_")
+    iprotein <- match(my_names, t_p_names)
     out <- iprotein
   }
   out
@@ -46,7 +41,7 @@
 
 protein.formula <- function(protein, organism=NULL, residue=FALSE) {
   # return a matrix with chemical formulas of proteins
-  aa <- protein.info(protein.info(protein, organism))
+  aa <- pinfo(pinfo(protein, organism))
   rf <- group.formulas()
   out <- as.matrix(aa[, 5:25]) %*% as.matrix(rf)
   if(residue) out <- out / rowSums(aa[, 6:25])
@@ -56,18 +51,73 @@
 
 protein.length <- function(protein, organism=NULL) {
   # calculate the length(s) of proteins
-  aa <- protein.info(protein.info(protein, organism))
+  aa <- pinfo(pinfo(protein, organism))
   # use rowSums on the columns containing amino acid counts
   pl <- as.numeric(rowSums(aa[, 6:25]))
   return(pl)
 }
 
+protein.obigt <- function(protein, organism=NULL, state=get("thermo")$opt$state) {
+  # display and return the properties of
+  # proteins calculated from amino acid composition
+  aa <- pinfo(pinfo(protein, organism))
+  # the names of the protein backbone groups depend on the state
+  # [UPBB] for aq or [PBB] for cr
+  if(state=="aq") bbgroup <- "UPBB" else bbgroup <- "PBB"
+  # names of the AABB, sidechain and protein backbone groups
+  groups <- c("AABB", colnames(aa)[6:25], bbgroup)
+  # put brackets around the group names
+  groups <- paste("[", groups, "]", sep="")
+  # the rownumbers of the groups in thermo$obigt
+  groups_state <- paste(groups, state)
+  obigt <- get("thermo")$obigt
+  obigt_state <- paste(obigt$name, obigt$state)
+  igroup <- match(groups_state, obigt_state)
+  # the properties are in columns 8-20 of thermo$obigt
+  groupprops <- obigt[igroup, 8:20]
+  # the elements in each of the groups
+  groupelements <- i2A(igroup)
+  # a function to work on a single row of aa
+  eosfun <- function(aa) {
+    # numbers of groups: chains [=AABB], sidechains, protein backbone
+    nchains <- as.numeric(aa[, 5])
+    length <- sum(as.numeric(aa[, 6:25]))
+    npbb <- length - nchains
+    ngroups <- c(nchains, as.numeric(aa[, 6:25]), npbb)
+    # the actual adding and multiplying of thermodynamic properties
+    # hmm. seems like we have to split up the multiplication/transposition
+    # operations to get the result into multiple columns. 20071213
+    eos <- t(data.frame(colSums(groupprops * ngroups)))
+    # to get the formula, add up and round the group compositions 20090331
+    f.in <- round(colSums(groupelements * ngroups), 3)
+    # take out any elements that don't appear (sometimes S)
+    f.in <- f.in[f.in!=0]
+    # turn it into a formula
+    f <- as.chemical.formula(f.in)
+    # now the species name
+    name <- paste(aa$protein, aa$organism, sep="_")
+    # make some noise for the user
+    message("protein.obigt: found ", appendLF=FALSE)
+    message(name, " (", f, ", ", appendLF=FALSE)
+    message(round(length, 3), " residues)")
+    ref <- aa$ref
+    header <- data.frame(name=name, abbrv=NA, formula=f, state=state, ref1=ref, ref2=NA, date=NA, stringsAsFactors=FALSE)
+    eosout <- cbind(header, eos)
+    return(eosout)
+  }
+  # loop over each row of aa
+  out <- lapply(1:nrow(aa), function(i) eosfun(aa[i, ]))
+  out <- do.call(rbind, out)
+  rownames(out) <- NULL
+  return(out)
+}
+
 protein.basis <- function(protein, T=25, normalize=FALSE) {
   # 20090902 calculate the coefficients of basis species in reactions
   # to form proteins (possibly per normalized by length) listed in protein
   # 20120528 renamed protein.basis from residue.info ...
   # what are the elemental compositions of the proteins
-  aa <- protein.info(protein.info(protein))
+  aa <- pinfo(pinfo(protein))
   pf <- protein.formula(aa)
   # what are the coefficients of the basis species in the formation reactions
   sb <- species.basis(pf)
@@ -94,7 +144,7 @@
   message("protein.equil: temperature from argument is ", T, " degrees C")
   TK <- convert(T, "K")
   # get the amino acid compositions of the proteins
-  aa <- protein.info(protein.info(protein))
+  aa <- pinfo(pinfo(protein))
   # get some general information about the proteins
   pname <- paste(aa$protein, aa$organism, sep="_")
   plength <- protein.length(aa)

Modified: pkg/CHNOSZ/R/util.affinity.R
===================================================================
--- pkg/CHNOSZ/R/util.affinity.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/util.affinity.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -162,7 +162,7 @@
       isprotein <- grepl("_", myspecies$name)
       if(any(isprotein)) {
         # the rownumbers in thermo$protein
-        ip <- protein.info(myspecies$name[isprotein])
+        ip <- pinfo(myspecies$name[isprotein])
         # get the affinity of ionization
         iHplus <- match("H+", rownames(mybasis))
         # as.numeric is needed in case the logact column is character mode
@@ -420,7 +420,7 @@
   # initialize output list
   out <- vector("list", length(iprotein))
   # get aa from iprotein
-  aa <- protein.info(iprotein)
+  aa <- pinfo(iprotein)
   # calculate the values of A/2.303RT as a function of T-P-pH
   A <- ionize.aa(aa=aa, property="A", T=TPpH$T, P=TPpH$P, pH=TPpH$pH)
   if(transect) {

Modified: pkg/CHNOSZ/R/util.protein.R
===================================================================
--- pkg/CHNOSZ/R/util.protein.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/R/util.protein.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -37,7 +37,7 @@
             M.cp(Ti),N.cp(Ti),P.cp(Ti),Q.cp(Ti),R.cp(Ti),
             S.cp(Ti),T.cp(Ti),V.cp(Ti),W.cp(Ti),Y.cp(Ti))
     # get the protein composition
-    tt <- protein.info(protein.info(protein))[,6:25]
+    tt <- pinfo(pinfo(protein))[,6:25]
     cnew <- c(cnew, sum(cp * as.numeric(tt)) + sum(as.numeric(tt)) * UPBB.cp(Ti))
   }
   return(cnew)

Modified: pkg/CHNOSZ/demo/buffer.R
===================================================================
--- pkg/CHNOSZ/demo/buffer.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/demo/buffer.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -13,7 +13,7 @@
   basis("H2", buffer)
   a <- affinity(T=xlim, P=300, return.buffer=TRUE, exceed.Ttr=TRUE)
   lines(a$vals[[1]], a$H2, col=3, lwd=2)
-  text(a$vals[[1]][ixlab], a$H2[ixlab], buffer)
+  text(a$vals[[1]][ixlab], a$H2[ixlab] + 0.2, buffer)
 }
 bufferline("FeFeO", 20)
 bufferline("QFM", 38)

Modified: pkg/CHNOSZ/demo/ionize.R
===================================================================
--- pkg/CHNOSZ/demo/ionize.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/demo/ionize.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,5 +1,5 @@
 ## ionize.aa(): Contour plots of net charge and ionization properties of LYSC_CHICK
-aa <- protein.info("LYSC_CHICK")
+aa <- pinfo(pinfo("LYSC_CHICK"))
 pH <- seq(0, 14, 0.2)
 T <- seq(0, 200, 2)
 val <- expand.grid(pH=pH, T=T)

Modified: pkg/CHNOSZ/demo/protein.equil.R
===================================================================
--- pkg/CHNOSZ/demo/protein.equil.R	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/demo/protein.equil.R	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,6 +1,6 @@
 ## steps in calculation of chemical activities of two proteins
 ## in metastable equilibrium, after Dick and Shock, 2011
-protein <- protein.info(c("CSG_METVO", "CSG_METJA"))
+protein <- pinfo(c("CSG_METVO", "CSG_METJA"))
 # clear out amino acid residues loaded by the example above
 # ( in affinity(iprotein=ip) )
 data(thermo)

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/inst/NEWS	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.0.8-57 (2017-02-20)
+CHANGES IN CHNOSZ 1.0.8-58 (2017-02-21)
 ---------------------------------------
 
 DOCUMENTATION:
@@ -39,6 +39,9 @@
 - Chemical formulas that are part of the axis labels (log activity or
   fugacity) now have formatting applied.
 
+- Add arguments `adj` and `dy` for x-alignment and y-offset of line
+  labels.
+
 NEW FEATURES:
 
 - Add ZC.col() for generating a red-grey-blue color scale from
@@ -118,10 +121,10 @@
 - Rename browse.refs() to thermo.refs(); remove URL browsing (except for
   summary table).
 
-- Rename iprotein() to protein.info(), replacing the previous function
-  of the same name.
+- New function pinfo() merges functionality of old iprotein() and
+  ip2aa(), which have been removed (along with protein.info()).
 
-- Merge ip2aa() with protein.info().
+- Rename aa2eos() to protein.obigt().
 
 CHANGES IN CHNOSZ 1.0.8 (2016-05-28)
 ------------------------------------

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/man/add.protein.Rd	2017-02-21 14:34:27 UTC (rev 168)
@@ -1,55 +1,52 @@
 \name{add.protein}
-\alias{aa2eos}
+\alias{add.protein}
 \alias{seq2aa}
-\alias{aasum}
 \alias{read.aa}
-\alias{add.protein}
+\alias{aasum}
 \title{Amino Acid Compositions of Proteins}
 \description{
-  Functions to identify proteins, get and set amino acid compositions, and calculate thermodynamic properties from group additivity.
+  Functions to get amino acid compositions and add them to protein list for use by other functions.
 }
 
 \usage{
-  aa2eos(aa, state=get("thermo")$opt$state)
+  add.protein(aa)
   seq2aa(protein, sequence)
-  aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
   read.aa(file = "protein.csv", ...)
-  add.protein(aa)
+  aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
 }
 
 \arguments{
-  \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
-  \item{organism}{character, name of organism}
   \item{aa}{data frame, amino acid composition in the format of \code{thermo$protein}}
-  \item{state}{character, physical state}
+  \item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
   \item{sequence}{character, protein sequence}
-  \item{abundance}{numeric, abundances of proteins}
-  \item{average}{logical, return the weighted average of amino acid counts?}
   \item{file}{character, path to file with amino acid compositions}
   \item{...}{additional arguments passed to \code{\link{read.csv}}}
+  \item{abundance}{numeric, abundances of proteins}
+  \item{average}{logical, return the weighted average of amino acid counts?}
+  \item{organism}{character, name of organism}
 }
 
 \details{
-  A \samp{protein} in CHNOSZ is defined by a name and by the counts of amino acids, stored in \code{\link{thermo}$protein}. The purpose of the functions described here is to identify proteins and work with their amino acid compositions. From the amino acid compositions, the thermodynamic properties of the proteins can be estimated (Dick et al., 2006) for use in other functions in the package. 
+A \samp{protein} in CHNOSZ is defined by its identifying information and the amino acid composition, stored in \code{\link{thermo}$protein}.
+The names of proteins in CHNOSZ are distinguished from those of other chemical species by having an underscore character ("_") that separates two identifiers, referred to as the \code{protein} and \code{organism}.
+An example is \samp{LYSC_CHICK}. 
+The purpose of the functions described here is to identify proteins and work with their amino acid compositions.
+From the amino acid compositions, the thermodynamic properties of the proteins can be estimated by group additivity.
 
-  Often, the names of proteins are sufficient to set up calculations using functions such as \code{\link{subcrt}} or \code{\link{species}}. The names of proteins in CHNOSZ are distinguished from those of other chemical species by having an underscore character ("_") that separates two identifiers, referred to as the \code{protein} and \code{organism} (but any other meaning can be attached to these names). An example is \samp{LYSC_CHICK}. 
+\code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo$protein}, corresponding to the provided \code{sequence}.
+Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
 
-  The first function provides low-level operations:
+\code{aasum} returns a data frame representing the sum of amino acid compositions in the rows of the input \code{aa} data frame.
+The amino acid compositions are multiplied by the indicated \code{abundance}; that argument is recycled to match the number of rows of \code{aa}.
+If \code{average} is TRUE the final sum is divided by the number of input compositions.
+The name used in the output is taken from the first row of \code{aa} or from \code{protein} and \code{organism} if they are specified.
+This function is useful for calculating bulk amino acid compositions in stress response experiments or localization studies; see \code{\link{read.expr}} for examples of its use.
 
-  \code{aa2eos} calculates the thermodynamic properties and equations-of-state parameters for the completely nonionized proteins using group additivity with parameters taken from Dick et al., 2006 (aqueous proteins) and LaRowe and Dick, 2012 (crystalline proteins and revised aqueous methionine sidechain group). The return value is a data frame in the same format as \code{thermo$obigt}. \code{state} indicates the physical state for the parameters used in the calculation (\samp{aq} or \samp{cr}).
-
-  The remaining functions are more likely to be called directly by the user:
-
-  \code{seq2aa} returns a data frame of amino acid composition, in the format of \code{thermo$protein}, corresponding to the provided \code{sequence}. Here, the \code{protein} argument indicates the name of the protein with an underscore (e.g. \samp{LYSC_CHICK}).
-
-  \code{aasum} returns a data frame representing the sum of amino acid compositions in the rows of the input \code{aa} data frame. The amino acid compositions are multiplied by the indicated \code{abundance}; that argument is recycled to match the number of rows of \code{aa}. If \code{average} is TRUE the final sum is divided by the number of input compositions. The name used in the output is taken from the first row of \code{aa} or from \code{protein} and \code{organism} if they are specified. This function is useful for calculating bulk amino acid compositions in stress response experiments or localization studies; see \code{\link{read.expr}} for examples of its use.
-
 \code{read.aa} returns a data frame of amino acid composition based on the contents of the indicated \code{file}, which should be a CSV file with the same column names as \code{thermo$protein}.
 
-\code{add.protein} completes the loop; any amino acid composition returned by the \code{*aa} functions described above can be added to \code{thermo$protein} using this function to be made available to other functions in the package.
+Given amino acid composition returned by the \code{*aa} functions described above, \code{add.protein} adds them to \code{thermo$protein} for use by other functions in CHNOSZ.
 The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo$protein} are replaced.
 The value returned by this function is the rownumbers of \code{thermo$protein} that are added and/or replaced.
-
 }
 
 \examples{
@@ -60,22 +57,20 @@
 ip <- add.protein(aa)
 stopifnot(protein.length(ip)==10)
 # the chemical formula of this peptide
-stopifnot(as.chemical.formula(protein.formula(ip))=="C41H69N11O18")
+as.chemical.formula(protein.formula(ip)) # "C41H69N11O18"
 # we can also calculate a formula without using add.protein
-as.chemical.formula(protein.formula(seq2aa("pentapeptide_test", "ANLSG")))
+aa <- seq2aa("pentapeptide_test", "ANLSG")
+as.chemical.formula(protein.formula(aa))
 }
 
 \seealso{
-\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for getting amino acid compositions from a FASTA file or downloading them from UniProt, and \code{\link{more.aa}} for getting amino acid compositions for model organisms from additional data files in the \code{\link{extdata}/protein} directory.
+\code{\link{read.fasta}}, \code{\link{uniprot.aa}}, \code{\link{more.aa}} for other ways of getting amino acid compositions.
 
-\code{\link{protein.info}} for protein-level functions (chemical formulas, summaries of reaction coefficients and energies), and \code{\link{read.expr}} for working with protein abundance and subcellular localization data.
+\code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
 
-Examples of stability calculations for proteins are in \code{\link{protein}}.
-}
+\code{\link{read.expr}} for working with protein abundance and subcellular localization data.
 
-\references{
-  Dick, J. M., LaRowe, D. E. and Helgeson, H. C. (2006) Temperature, pressure, and electrochemical constraints on protein speciation: Group additivity calculation of the standard molal thermodynamic properties of ionized unfolded proteins. \emph{Biogeosciences} \bold{3}, 311--336. \url{http://dx.doi.org/10.5194/bg-3-311-2006}
-
+\code{\link{protein}} for examples of affinity calculations and diagrams.
 }
 
 \concept{Protein thermodynamic modeling}

Modified: pkg/CHNOSZ/man/data.Rd
===================================================================
--- pkg/CHNOSZ/man/data.Rd	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/man/data.Rd	2017-02-21 14:34:27 UTC (rev 168)
@@ -166,7 +166,7 @@
     }
 
     \item \code{thermo$protein}
-    Data frame of amino acid compositions of selected proteins. Many of the compositions were taken from the SWISS-PROT/UniProt online database (Boeckmann et al., 2003) and the protein and organism names usually follow the conventions adopted there. In some cases different isoforms of proteins are identified using modifications of the protein names; for example, \samp{MOD5.M} and \code{MOD5.N} proteins of \samp{YEAST} denote the mitochondrial and nuclear isoforms of this protein. See \code{\link{protein.info}} to search this data frame by protein name, and other functions to work with the amino acid compositions.
+    Data frame of amino acid compositions of selected proteins. Many of the compositions were taken from the SWISS-PROT/UniProt online database (Boeckmann et al., 2003) and the protein and organism names usually follow the conventions adopted there. In some cases different isoforms of proteins are identified using modifications of the protein names; for example, \samp{MOD5.M} and \code{MOD5.N} proteins of \samp{YEAST} denote the mitochondrial and nuclear isoforms of this protein. See \code{\link{pinfo}} to search this data frame by protein name, and other functions to work with the amino acid compositions.
     \tabular{lll}{
       \code{protein} \tab character \tab Identification of protein\cr
       \code{organism} \tab character \tab Identification of organism\cr

Modified: pkg/CHNOSZ/man/diagram.Rd
===================================================================
--- pkg/CHNOSZ/man/diagram.Rd	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/man/diagram.Rd	2017-02-21 14:34:27 UTC (rev 168)
@@ -15,7 +15,7 @@
     cex=par("cex"), cex.names=1, cex.axis=par("cex"),
     lty=NULL, lwd=par("lwd"), dotted=NULL,
     col=par("col"), col.names=par("col"), fill=NULL,
-    names=NULL, main=NULL, legend.x=NA, format.names=TRUE,
+    names=NULL, main=NULL, legend.x=NA, format.names=TRUE, adj=0.5, dy=0,
     add=FALSE, plot.it=TRUE, tplot=TRUE, ...)
   strip(affinity, ispecies = NULL, col = NULL, ns = NULL, 
     xticks = NULL, ymin = -0.2, xpad = 1, cex.names = 0.7)
@@ -52,6 +52,8 @@
   \item{main}{character, a main \code{\link{title}} for the plot; \code{NULL} means to plot no title}
   \item{legend.x}{character, description of legend placement passed to \code{\link{legend}}}
   \item{format.names}{logical, apply formatting to chemical formulas?}
+  \item{adj}{numeric, adjustment for line labels}
+  \item{dy}{numeric, y offset for line labels}
   \item{add}{logical, add to current plot?}
   \item{plot.it}{logical, make a plot?}
   \item{tplot}{logical, set up plot with \code{\link{thermo.plot.new}}?}

Modified: pkg/CHNOSZ/man/examples.Rd
===================================================================
--- pkg/CHNOSZ/man/examples.Rd	2017-02-20 12:27:24 UTC (rev 167)
+++ pkg/CHNOSZ/man/examples.Rd	2017-02-21 14:34:27 UTC (rev 168)
@@ -80,8 +80,9 @@
 
 \examples{
 \dontshow{data(thermo)}
+\dontshow{opar <- par(no.readonly=TRUE)}
 demos(c("ORP", "NaCl"))
-\dontshow{par(thermo$opar)}
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/chnosz -r 168


More information about the CHNOSZ-commits mailing list