[CHNOSZ-commits] r221 - in pkg/CHNOSZ: . R demo inst inst/extdata/protein man tests/testthat vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 26 16:23:07 CEST 2017


Author: jedick
Date: 2017-09-26 16:23:06 +0200 (Tue, 26 Sep 2017)
New Revision: 221

Added:
   pkg/CHNOSZ/R/yeast.aa.R
   pkg/CHNOSZ/man/yeast.aa.Rd
   pkg/CHNOSZ/tests/testthat/test-yeast.aa.R
Removed:
   pkg/CHNOSZ/R/more.aa.R
   pkg/CHNOSZ/inst/extdata/protein/Eco.csv.xz
   pkg/CHNOSZ/man/more.aa.Rd
   pkg/CHNOSZ/tests/testthat/test-more.aa.R
Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/NAMESPACE
   pkg/CHNOSZ/R/examples.R
   pkg/CHNOSZ/R/info.R
   pkg/CHNOSZ/demo/revisit.R
   pkg/CHNOSZ/demo/wjd.R
   pkg/CHNOSZ/demo/yeastgfp.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/man/add.protein.Rd
   pkg/CHNOSZ/man/extdata.Rd
   pkg/CHNOSZ/man/read.expr.Rd
   pkg/CHNOSZ/tests/testthat/test-wjd.R
   pkg/CHNOSZ/vignettes/anintro.Rmd
Log:
remove Eco.csv.xz and rename more.aa() to yeast.aa()


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/DESCRIPTION	2017-09-26 14:23:06 UTC (rev 221)
@@ -1,6 +1,6 @@
 Date: 2017-09-26
 Package: CHNOSZ
-Version: 1.1.0-19
+Version: 1.1.0-20
 Title: Thermodynamic Calculations for Geobiochemistry
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/NAMESPACE	2017-09-26 14:23:06 UTC (rev 221)
@@ -13,7 +13,7 @@
   "describe.property", "describe.basis", "equilibrate",
   "aminoacids", "ZC.col",
   "pinfo", "protein.length", "protein.formula",
-  "read.fasta", "protein.basis", "yeastgfp", "more.aa", "add.protein",
+  "read.fasta", "protein.basis", "yeastgfp", "yeast.aa", "add.protein",
   "unitize", "revisit", "seq2aa", "findit",
   "thermo.refs", "mod.obigt", "today",
 # examples

Modified: pkg/CHNOSZ/R/examples.R
===================================================================
--- pkg/CHNOSZ/R/examples.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/R/examples.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -10,7 +10,7 @@
     "util.fasta", "util.formula", "util.matrix", "util.misc", "util.seq", "util.units",
     "util.water", "taxonomy", "info", "protein.info", "hkf", "water", "IAPWS95", "subcrt",
     "makeup", "basis", "swap.basis", "species", "affinity", "equil.boltzmann", 
-    "diagram", "buffer", "nonideal", "add.protein", "protein", "ionize.aa", "more.aa", "read.expr",
+    "diagram", "buffer", "nonideal", "add.protein", "protein", "ionize.aa", "yeast.aa", "read.expr",
     "anim", "objective", "revisit", "transfer", "EOSregress", "wjd")
   plot.it <- FALSE
   if(is.character(do.png))

Modified: pkg/CHNOSZ/R/info.R
===================================================================
--- pkg/CHNOSZ/R/info.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/R/info.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -18,10 +18,8 @@
       nrow(thermo$element), ", buffers: ", length(unique(thermo$buffers$name)), sep=""))
     message(paste("number of proteins in thermo$protein is", nrow(thermo$protein), "from",
       length(unique(thermo$protein$organism)), "organisms"))
-    # print information about SGD.csv, ECO.csv, HUM.csv
-    more.aa(organism="Sce")
-    more.aa(organism="Eco")
-    #pdata.aa(organism="HUM")
+    # print information about Sce.csv
+    yeast.aa()
     # print information about yeastgfp.csv
     yeastgfp()
     return()

Deleted: pkg/CHNOSZ/R/more.aa.R
===================================================================
--- pkg/CHNOSZ/R/more.aa.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/R/more.aa.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -1,66 +0,0 @@
-# CHNOSZ/more.aa.R
-# get amino acid compositions of proteins from model organisms
-# (Eco.csv or Sce.csv)
-
-more.aa <- function(protein=NULL, organism) {
-  # return the composition of one or more proteins from
-  # a "model organism", E. coli (Eco) or S. cerevisiae (Sce)
-  # extracted from get.protein 20120519
-  datapath <- paste("extdata/protein/", organism, ".csv.xz", sep="")
-  datafile <- system.file(datapath, package="CHNOSZ")
-  if(datafile=="") stop(paste("missing", datapath))
-  mydata <- read.csv(datafile, as.is=TRUE)
-  # if protein is not supplied, just give some information about the datafile
-  if(is.null(protein)) {
-    message("more.aa: ", datapath, " has data for ", nrow(mydata), " proteins")
-    return(invisible())
-  }
-  # which columns to search for matches
-  if(organism=="Sce") searchcols <- c("ORF", "SGDID", "GENE")
-  else if(organism=="Eco") searchcols <- c("protein", "abbrv")
-  # which columns have the amino acids, in the order of thermo$protein 
-  iaa <- match(toupper(aminoacids(3)), toupper(colnames(mydata)))
-  # iterate over a list
-  waslist <- TRUE
-  out <- list()
-  if(!is.list(protein)) {
-    waslist <- FALSE
-    protein <- list(protein)
-  }
-  for(i in 1:length(protein)) {
-    # find the matches
-    imatch <- rep(NA, length(protein[[i]]))
-    for(cname in searchcols) {
-      icol <- match(cname, colnames(mydata))
-      if(is.na(icol)) next
-      iimatch <- match(protein[[i]], mydata[, icol])
-      imatch[!is.na(iimatch)] <- iimatch[!is.na(iimatch)]
-    }
-    # report and remember the unsuccessful matches
-    if(all(is.na(imatch))) stop("no proteins found!")
-    inotmatch <- which(is.na(imatch)) 
-    if(length(inotmatch) > 0) {
-      if(length(inotmatch)==1) verb <- " was" else verb <- " were"
-      message("more.aa: ", paste(protein[[i]][inotmatch], collapse=" "), verb, " not matched")
-    }
-    aa <- data.frame(mydata[imatch, iaa])
-    # add the identifying columns
-    if(organism=="Sce") ref <- mydata$SGDID[imatch]
-    else ref <- rep(NA, length(protein[[i]]))
-    if(organism=="Sce") abbrv <- mydata$GENE[imatch]
-    else abbrv <- rep(NA, length(protein[[i]]))
-    chains <- rep(1, length(protein[[i]]))
-    chains[inotmatch] <- NA
-    org <- rep(organism[[1]], length(protein[[i]]))
-    precols <- data.frame(protein[[i]], organism=org, ref, abbrv, chains, stringsAsFactors=FALSE)
-    colnames(precols)[1] <- "protein"
-    colnames(aa) <- aminoacids(3)
-    aa <- cbind(precols, aa)
-    out <- c(out, list(aa))
-  }
-  # done!
-  if(!waslist) return(out[[1]])
-  else return(out)
-}
-
-

Copied: pkg/CHNOSZ/R/yeast.aa.R (from rev 220, pkg/CHNOSZ/R/more.aa.R)
===================================================================
--- pkg/CHNOSZ/R/yeast.aa.R	                        (rev 0)
+++ pkg/CHNOSZ/R/yeast.aa.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -0,0 +1,61 @@
+# CHNOSZ/yeast.aa.R
+# get amino acid compositions of proteins from Saccharomyces cerevisiae
+
+yeast.aa <- function(protein=NULL) {
+  # return the composition of one or more proteins from S. cerevisiae (Sce)
+  # extracted from get.protein 20120519
+  datapath <- paste("extdata/protein/Sce.csv.xz", sep="")
+  datafile <- system.file(datapath, package="CHNOSZ")
+  if(datafile=="") stop(paste("missing", datapath))
+  mydata <- read.csv(datafile, as.is=TRUE)
+  # if protein is not supplied, just give some information about the datafile
+  if(is.null(protein)) {
+    message("yeast.aa: ", datapath, " has data for ", nrow(mydata), " proteins")
+    return(invisible())
+  }
+  # which columns to search for matches
+  searchcols <- c("ORF", "SGDID", "GENE")
+  # which columns have the amino acids, in the order of thermo$protein 
+  iaa <- match(toupper(aminoacids(3)), toupper(colnames(mydata)))
+  # iterate over a list
+  waslist <- TRUE
+  out <- list()
+  if(!is.list(protein)) {
+    waslist <- FALSE
+    protein <- list(protein)
+  }
+  for(i in 1:length(protein)) {
+    # find the matches
+    imatch <- rep(NA, length(protein[[i]]))
+    for(cname in searchcols) {
+      icol <- match(cname, colnames(mydata))
+      if(is.na(icol)) next
+      iimatch <- match(protein[[i]], mydata[, icol])
+      imatch[!is.na(iimatch)] <- iimatch[!is.na(iimatch)]
+    }
+    # report and remember the unsuccessful matches
+    if(all(is.na(imatch))) stop("no proteins found!")
+    inotmatch <- which(is.na(imatch)) 
+    if(length(inotmatch) > 0) {
+      if(length(inotmatch)==1) verb <- " was" else verb <- " were"
+      message("yeast.aa: ", paste(protein[[i]][inotmatch], collapse=" "), verb, " not matched")
+    }
+    aa <- data.frame(mydata[imatch, iaa])
+    # add the identifying columns
+    ref <- mydata$SGDID[imatch]
+    abbrv <- mydata$GENE[imatch]
+    chains <- rep(1, length(protein[[i]]))
+    chains[inotmatch] <- NA
+    org <- rep("Sce", length(protein[[i]]))
+    precols <- data.frame(protein[[i]], organism=org, ref, abbrv, chains, stringsAsFactors=FALSE)
+    colnames(precols)[1] <- "protein"
+    colnames(aa) <- aminoacids(3)
+    aa <- cbind(precols, aa)
+    out <- c(out, list(aa))
+  }
+  # done!
+  if(!waslist) return(out[[1]])
+  else return(out)
+}
+
+

Modified: pkg/CHNOSZ/demo/revisit.R
===================================================================
--- pkg/CHNOSZ/demo/revisit.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/demo/revisit.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -5,7 +5,7 @@
 loc <- "cell.periphery"
 y <- yeastgfp(loc)
 # get the amino acid compositions of the proteins
-aa <- more.aa(y$protein, "Sce")
+aa <- yeast.aa(y$protein)
 # don't use those with NA abundance or sequence
 ina <- is.na(y$abundance) | is.na(aa$chains)
 aa <- aa[!ina, ]

Modified: pkg/CHNOSZ/demo/wjd.R
===================================================================
--- pkg/CHNOSZ/demo/wjd.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/demo/wjd.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -36,7 +36,7 @@
 # get the proteins in the requested location
 y <- yeastgfp("cell.periphery")
 # get the amino acid compositions of the proteins
-aa <- more.aa(y$protein, "Sce")
+aa <- yeast.aa(y$protein)
 # don't use those with NA abundance or sequence
 ina <- is.na(y$abundance) | is.na(aa$chains)
 aa <- aa[!ina, ]

Modified: pkg/CHNOSZ/demo/yeastgfp.R
===================================================================
--- pkg/CHNOSZ/demo/yeastgfp.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/demo/yeastgfp.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -18,7 +18,7 @@
 # get protein names and abundances in each location
 gfp <- yeastgfp(names)
 # get amino acid compositions of proteins
-aa <- more.aa(gfp$protein, "Sce")
+aa <- yeast.aa(gfp$protein)
 # calculate average amino acid compositions 
 for(i in 1:length(names)) {
   avgaa <- aasum(aa[[i]], gfp$abundance[[i]], average=TRUE, protein=names[i])
@@ -53,7 +53,7 @@
 ## This figure is similar to Fig. 3 of Dick (2009). 
 locations <- yeastgfp()
 gfp <- yeastgfp(locations)
-aa <- more.aa(gfp$protein, "Sce")
+aa <- yeast.aa(gfp$protein)
 for(i in 1:length(locations)) {
   avgaa <- aasum(aa[[i]], gfp$abundance[[i]], average=TRUE, protein=locations[i])
   add.protein(avgaa)

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/inst/NEWS	2017-09-26 14:23:06 UTC (rev 221)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.1.0-19 (2017-09-26)
+CHANGES IN CHNOSZ 1.1.0-20 (2017-09-26)
 ---------------------------------------
 
 MAJOR CHANGES:
@@ -60,6 +60,8 @@
 - To save space, taxid_names.csv has been trimmed to hold only those
   taxids listed in extdata/bison/gi.taxid.txt.
 
+- Remove Eco.csv.xz and rename more.aa() to yeast.aa().
+
 CHANGES IN CHNOSZ 1.1.0 (2017-05-04)
 ------------------------------------
 

Deleted: pkg/CHNOSZ/inst/extdata/protein/Eco.csv.xz
===================================================================
(Binary files differ)

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/man/add.protein.Rd	2017-09-26 14:23:06 UTC (rev 221)
@@ -59,7 +59,7 @@
 }
 
 \seealso{
-\code{\link{read.fasta}}, \code{\link{uniprot.aa}}, \code{\link{more.aa}} for other ways of getting amino acid compositions.
+\code{\link{read.fasta}}, \code{\link{uniprot.aa}}, \code{\link{yeast.aa}} for other ways of getting amino acid compositions.
 
 \code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
 

Modified: pkg/CHNOSZ/man/extdata.Rd
===================================================================
--- pkg/CHNOSZ/man/extdata.Rd	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/man/extdata.Rd	2017-09-26 14:23:06 UTC (rev 221)
@@ -44,16 +44,12 @@
   }
 
   Files in \code{protein} contain amino acid compositions for proteins.
-  See \code{\link{more.aa}} and \code{\link{read.expr}} for examples that use these files.
+  See \code{\link{yeast.aa}} and \code{\link{read.expr}} for examples that use these files.
   \itemize{
     \item \code{Sce.csv.xz}
       Data frame of amino acid composition of 6716 proteins from the \emph{Saccharomyces} Genome Database (SGD).
       Values in the first three columns are the \code{ORF} names of proteins, \code{SGDID}, and \code{GENE} names. The remaining twenty columns (\code{ALA}..\code{VAL}) contain the numbers of the respective amino acids in each protein.
       The sources of data for \samp{Sce.csv} are the files \samp{protein_properties.tab} and \samp{SGD_features.tab} (for the gene names), downloaded from \url{http://www.yeastgenome.org} on 2013-08-24.
-    \item \code{Eco.csv.xz}
-      Amino acid compositions of 4407 proteins in \emph{Escherichia coli} strain K12.
-      Format is the one used \code{\link{thermo}$protein}, with columns \samp{protein} holding the gene name, \samp{organism} set to \samp{ECOLI}, and \code{abbrv} holding the UniProt ID.
-      The source of data is the file \samp{ECOLI.fas} downloaded from the HAMAP (High-quality Automated and Manual Annotation of microbial Proteomes system) FTP site (Gattiker et al., 2003) on 2010-09-25 (old URL: ftp://ftp.expasy.org/databases/hamap/complete_proteomes/fasta/bacteria).
     \item \code{DS11.csv}, \code{DS13.csv}
       These two files contain amino acid compositions of metagenomically encoded proteins, averaged together according to functional annotation (DS11) or taxonomic affiliation (DS13).
       The data are from Dick and Shock, 2011 and 2013.

Deleted: pkg/CHNOSZ/man/more.aa.Rd
===================================================================
--- pkg/CHNOSZ/man/more.aa.Rd	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/man/more.aa.Rd	2017-09-26 14:23:06 UTC (rev 221)
@@ -1,48 +0,0 @@
-\encoding{UTF-8}
-\name{more.aa}
-\alias{more.aa}
-\title{Proteins from Model Organisms}
-\description{
-  Retrieve the amino acid compositions of one or more proteins from \emph{Escherichia coli} or \emph{Saccharomyces cerevisiae}.
-}
-
-\usage{
-  more.aa(protein = NULL, organism)
-}
-
-\arguments{
-  \item{protein}{character, name of protein}
-  \item{organism}{character, name of organism (\samp{Eco} or \samp{Sce})}
-}
-
-\details{
-\code{more.aa} retrieves the amino acid composition(s) of the indicated proteins in either \emph{Escherichia coli} or \emph{Saccharomyces cerevisiae}.
-The value of \code{organism} can be one of \samp{Eco} or \samp{Sce}.
-The calculation depends on the data files \code{\link{extdata}/protein/Eco.csv.xz} and \code{Sce.csv.xz}, which contain the amino acid compositions of the proteins.
-The \code{protein} argument should be a vector or a list of vectors of one or more Ordered Locus Names (OLN) or Open Reading Frame (ORF) names that are found in these files.
-The output data frame contains rows with NA compositions for names that are not matched.
-}
-
-\value{
-A data frame, or list of data frames, containing the amino acid composition(s) of the specified protein(s) in the format of \code{\link{thermo}$protein}.
-}
-
-\seealso{
-\code{\link{extdata}} describes the sources of compositional data for the proteins. Other examples of usage of \code{more.aa} are shown for \code{\link{read.expr}}.
-}
-
-
-\examples{
-\dontshow{data(thermo)}
-# the first 13 names in UniProt for "aminotransferase ecoli"
-at.ecoli <- c("BIOA", "ARNB", "SERC", "AAT", "TYRB", "ARGD", 
-  "ILVE", "ALAA", "ALAC", "YBDL", "AVTA", "GLMS", "PUUE")
-# get the amino acid compositions
-# note that ALAA and ALAC are not matched
-at.aa <- more.aa(at.ecoli, "Eco")
-# what are their formulas and oxidation states
-protein.formula(at.aa)
-ZC(protein.formula(at.aa))
-}
-
-\concept{Protein thermodynamic modeling}

Modified: pkg/CHNOSZ/man/read.expr.Rd
===================================================================
--- pkg/CHNOSZ/man/read.expr.Rd	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/man/read.expr.Rd	2017-09-26 14:23:06 UTC (rev 221)
@@ -42,13 +42,13 @@
 For \code{yeastgfp}, if \code{location} is NULL, the function returns the names of all known locations, and if the length of \code{location} is >1, the \code{protein} and \code{abundance} values are lists of the results for each location.
 }
 
-\seealso{ \code{\link{more.aa}} for getting the amino acid compositions of the proteins. }
+\seealso{ \code{\link{yeast.aa}} for getting the amino acid compositions of the proteins. }
 
 \examples{\dontshow{data(thermo)}
 ## overall oxidation state of proteins exclusively localized 
 ## to cytoplasm of S. cerevisiae with/without abundance weighting
 y <- yeastgfp("cytoplasm")
-aa <- more.aa(y$protein, "Sce")
+aa <- yeast.aa(y$protein)
 aaavg <- aasum(aa, average=TRUE)
 ZC(protein.formula(aaavg))
 # the average composition weighted by abundance
@@ -61,17 +61,17 @@
 # read all protein names and abundances in ID and emPAI columns
 # (emPAI - exponentially modified protein abundance index)
 expr <- read.expr(file, "ID", "emPAI")
-# scatter plot of average oxidation state and emPAI
-aa <- more.aa(expr$protein, "Eco")
-pf <- protein.formula(aa)
-zc <- ZC(pf)
-# note we specify ylim here that excludes some high-emPAI values
-plot(zc, expr$abundance, xlab=expr.property("ZC"), ylim=c(0, 90), ylab="emPAI",
-  main="Proteins in E. coli cytosol\nAbundance vs oxidation state of carbon")
-legend("topleft", pch=1, legend="Ishihama et al., 2008")
-# what if we just want kinases?
-# "description" is the name of the column where we search for "kinase"
-expr.kinase <- read.expr(file, "ID", "emPAI", list(description="kinase"))
+## scatter plot of average oxidation state and emPAI
+#aa <- more.aa(expr$protein, "Eco")
+#pf <- protein.formula(aa)
+#zc <- ZC(pf)
+## note we specify ylim here that excludes some high-emPAI values
+#plot(zc, expr$abundance, xlab=expr.property("ZC"), ylim=c(0, 90), ylab="emPAI",
+#  main="Proteins in E. coli cytosol\nAbundance vs oxidation state of carbon")
+#legend("topleft", pch=1, legend="Ishihama et al., 2008")
+## what if we just want kinases?
+## "description" is the name of the column where we search for "kinase"
+#expr.kinase <- read.expr(file, "ID", "emPAI", list(description="kinase"))
 
 ## potential fields for overall protein compositions 
 ## transcriptionally induced and repressed in aerobic
@@ -87,11 +87,11 @@
   "Clim.anaerobic.down", "Clim.anaerobic.up")
 file <- system.file("extdata/abundance/TBD+05.csv", package="CHNOSZ")
 dat <- read.csv(file, as.is=TRUE)
-# more.aa: get the amino acid compositions
+# yeast.aa: get the amino acid compositions
 # aasum: average them together
 for(thisexpt in expt) {
   p <- dat$protein[dat[, thisexpt]]
-  aa <- more.aa(p, "Sce")
+  aa <- yeast.aa(p)
   aa <- aasum(aa, average=TRUE, protein=thisexpt)
   add.protein(aa)
 }

Copied: pkg/CHNOSZ/man/yeast.aa.Rd (from rev 220, pkg/CHNOSZ/man/more.aa.Rd)
===================================================================
--- pkg/CHNOSZ/man/yeast.aa.Rd	                        (rev 0)
+++ pkg/CHNOSZ/man/yeast.aa.Rd	2017-09-26 14:23:06 UTC (rev 221)
@@ -0,0 +1,47 @@
+\encoding{UTF-8}
+\name{yeast.aa}
+\alias{yeast.aa}
+\title{Proteins from Model Organisms}
+\description{
+  Retrieve the amino acid compositions of one or more proteins from \emph{Saccharomyces cerevisiae}.
+}
+
+\usage{
+  yeast.aa(protein = NULL)
+}
+
+\arguments{
+  \item{protein}{character, name of protein}
+}
+
+\details{
+\code{yeast.aa} retrieves the amino acid composition(s) of the indicated proteins in \emph{Saccharomyces cerevisiae}.
+The calculation depends on the data file \code{\link{extdata}/protein/Sce.csv.xz}, which contains the amino acid compositions of the proteins.
+The \code{protein} argument should be a vector or a list of vectors of one or more SGD IDs, Open Reading Frame (ORF) or gene names that are found in these files.
+The output data frame contains rows with NA compositions for names that are not matched.
+}
+
+\value{
+A data frame, or list of data frames, containing the amino acid composition(s) of the specified protein(s) in the format of \code{\link{thermo}$protein}.
+}
+
+\seealso{
+\code{\link{extdata}} describes the sources of compositional data for the proteins. Other examples of usage of \code{yeast.aa} are shown for \code{\link{read.expr}}.
+}
+
+
+\examples{
+\dontshow{data(thermo)}
+# the first few names in UniProt for "aminotransferase yeast"
+genes <- c("AATC", "ARO8", "BCA1", "AMPL", "BCA2", "ARO9")
+# the corresponding ORF names
+ORF <- c("YLR027C", "YGL202W", "YHR208W", "YKL103C", "YJR148W", "YHR137W")
+# we only match two of them by gene name, but all by ORF name
+aa <- yeast.aa(genes)
+aa <- yeast.aa(ORF)
+# what are their formulas and average oxidation states of carbon
+protein.formula(aa)
+ZC(protein.formula(aa))
+}
+
+\concept{Protein thermodynamic modeling}

Deleted: pkg/CHNOSZ/tests/testthat/test-more.aa.R
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-more.aa.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/tests/testthat/test-more.aa.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -1,7 +0,0 @@
-context("more.aa")
-
-test_that("unmatched proteins produce a message and NA row", {
-  expect_message(more.aa(c("ACCA", "XXX"), "Eco"), "XXX was not matched")
-  expect_equal(nrow(more.aa(c("ACCA", "XXX"), "Eco")), 2)
-})
-

Modified: pkg/CHNOSZ/tests/testthat/test-wjd.R
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-wjd.R	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/tests/testthat/test-wjd.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -23,7 +23,7 @@
   # use proteins in the lipid particle (n=19)
   y <- yeastgfp("lipid.particle")
   # get the amino acid compositions of the proteins
-  aa <- more.aa(y$protein, "Sce")
+  aa <- yeast.aa(y$protein)
   # don't use those with NA abundance or sequence (leaves n=17)
   ina <- is.na(y$abundance) | is.na(aa$chains)
   aa <- aa[!ina, ]

Copied: pkg/CHNOSZ/tests/testthat/test-yeast.aa.R (from rev 220, pkg/CHNOSZ/tests/testthat/test-more.aa.R)
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-yeast.aa.R	                        (rev 0)
+++ pkg/CHNOSZ/tests/testthat/test-yeast.aa.R	2017-09-26 14:23:06 UTC (rev 221)
@@ -0,0 +1,7 @@
+context("yeast.aa")
+
+test_that("unmatched proteins produce a message and NA row", {
+  expect_message(yeast.aa(c("YLR027C", "XXX")), "XXX was not matched")
+  expect_equal(nrow(yeast.aa(c("YLR027C", "XXX"))), 2)
+})
+

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2017-09-26 13:42:26 UTC (rev 220)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2017-09-26 14:23:06 UTC (rev 221)
@@ -1225,7 +1225,7 @@
 
 Next, we get the amino acid compositions of the proteins and add them to `thermo$protein`:
 ```{r add_protein_yeast, message=FALSE}
-aa <- more.aa(y$protein[!ina], "Sce")
+aa <- yeast.aa(y$protein[!ina])
 ip <- add.protein(aa)
 ```
 
@@ -1362,10 +1362,9 @@
 file <- system.file("extdata/protein/DS11.csv", package = "CHNOSZ")
 aa_bison <- read.csv(file, as.is = TRUE, nrows = 5)
 ```
-<span style="color:green">`more.aa()`</span> retrieves amino acid composition of proteins in *Saccharomyces cerevisiae* and *Escherichia coli* from data files that are included with CHNOSZ:
-```{r more_aa}
-aa_YML020W <- more.aa("YML020W", "Sce")
-aa_ILVE <- more.aa("ILVE", "Eco")
+<span style="color:green">`yeast.aa()`</span> retrieves amino acid composition of proteins in *Saccharomyces cerevisiae* from a data file that is included with CHNOSZ:
+```{r yeast_aa}
+aa_YML020W <- yeast.aa("YML020W")
 ```
 <span style="color:green">`read.fasta()`</span> reads a FASTA file and returns the amino acid compositions of the sequences.
 The `iseq` argument can be used to read those sequences from the file:
@@ -1446,7 +1445,7 @@
 
 These amino acid compositions can be processed using functions such as <span style="color:green">`protein.length()`</span> and <span style="color:green">`protein.formula()`</span>:
 ```{r protein_length}
-myaa <- rbind(aa_YML020W, aa_ILVE, aa_Ef, aa_PRIO)
+myaa <- rbind(aa_YML020W, aa_Ef, aa_PRIO)
 protein.length(myaa)
 ```
 



More information about the CHNOSZ-commits mailing list