[CHNOSZ-commits] r231 - in pkg/CHNOSZ: . R inst inst/extdata/abundance man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Sep 30 04:19:20 CEST 2017


Author: jedick
Date: 2017-09-30 04:19:19 +0200 (Sat, 30 Sep 2017)
New Revision: 231

Added:
   pkg/CHNOSZ/R/yeast.R
   pkg/CHNOSZ/man/yeast.Rd
Removed:
   pkg/CHNOSZ/R/read.expr.R
   pkg/CHNOSZ/R/yeast.aa.R
   pkg/CHNOSZ/inst/extdata/abundance/ISR+08.csv.xz
   pkg/CHNOSZ/man/read.expr.Rd
   pkg/CHNOSZ/man/yeast.aa.Rd
Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/NAMESPACE
   pkg/CHNOSZ/R/examples.R
   pkg/CHNOSZ/inst/NEWS
   pkg/CHNOSZ/man/add.protein.Rd
   pkg/CHNOSZ/man/extdata.Rd
Log:
remove read.expr() and ISR+08.csv


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/DESCRIPTION	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,6 +1,6 @@
-Date: 2017-09-29
+Date: 2017-09-30
 Package: CHNOSZ
-Version: 1.1.0-29
+Version: 1.1.0-30
 Title: Thermodynamic Calculations for Geobiochemistry
 Author: Jeffrey Dick
 Maintainer: Jeffrey Dick <j3ffdick at gmail.com>

Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/NAMESPACE	2017-09-30 02:19:19 UTC (rev 231)
@@ -30,7 +30,7 @@
   "getrank", "parent", "sciname", "allparents", "getnodes", "getnames",
   "protein.obigt", "hkf", "cgl", "which.pmax",
   "equil.boltzmann", "equil.reaction", "find.tp",
-  "ionize.aa", "MP90.cp", "aasum", "read.expr",
+  "ionize.aa", "MP90.cp", "aasum",
   "anim.carboxylase",
   "qqr", "RMSD", "CVRMSD", "spearman", "DGmix", "DDGmix", "DGtr",
   "ratlab",

Modified: pkg/CHNOSZ/R/examples.R
===================================================================
--- pkg/CHNOSZ/R/examples.R	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/R/examples.R	2017-09-30 02:19:19 UTC (rev 231)
@@ -10,7 +10,7 @@
     "util.fasta", "util.formula", "util.matrix", "util.misc", "util.seq", "util.units",
     "util.water", "taxonomy", "info", "protein.info", "hkf", "water", "IAPWS95", "subcrt",
     "makeup", "basis", "swap.basis", "species", "affinity", "equil.boltzmann", 
-    "diagram", "buffer", "nonideal", "add.protein", "protein", "ionize.aa", "yeast.aa", "read.expr",
+    "diagram", "buffer", "nonideal", "add.protein", "protein", "ionize.aa", "yeast.aa",
     "anim", "objective", "revisit", "EOSregress", "wjd")
   plot.it <- FALSE
   if(is.character(do.png))

Deleted: pkg/CHNOSZ/R/read.expr.R
===================================================================
--- pkg/CHNOSZ/R/read.expr.R	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/R/read.expr.R	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,90 +0,0 @@
-# CHNOSZ/read.expr.R
-# protein abundance and localization data from experiments
-# yeastgfp: protein localization and abundance from yeastgfp.csv
-# read.expr: protein abundance from other files (including in extdata/abundance)
-
-yeastgfp <- function(location=NULL, exclusive=TRUE) {
-  # return a list of ORFs and protein abundances for a subcellular location
-  # using data from the YeastGFP project 
-  # (yeastgfp.csv data file added to CHNOSZ_0.8, 20090422)
-  ypath <- "extdata/abundance/yeastgfp.csv.xz"
-  yfile <- system.file(ypath, package="CHNOSZ")
-  # yeastgfp preprocessing
-  ygfp <- read.csv(yfile)
-  # convert factors to numeric w/o NA coercion warnings
-  ygfp$abundance <- as.numeric.nowarn(as.character(ygfp$abundance))
-  # if location is NULL, just report on the content of the file
-  # and return the names of the locations
-  if(is.null(location)) {
-    message("yeastgfp: ", ypath, " has ", nrow(ygfp), " localizations and ",
-      length(ygfp$abundance[!is.na(ygfp$abundance)]), " abundances")
-    return(invisible(colnames(ygfp)[6:28]))
-  }
-  # iterate over multiple locations
-  out <- list()
-  for(i in 1:length(location)) {
-    # what location do we want?
-    ncol <- match(location[i], colnames(ygfp)[6:28]) + 5
-    if(is.na(ncol)) ncol <- agrep(location[i], colnames(ygfp)[6:28])[1] + 5
-    if(is.na(ncol)) stop(paste(location[i], "is not one of the subcellular locations in", ypath))
-    thisygfp <- ygfp[, ncol]
-    if(exclusive) {
-      # find the number of localizations of each ORF
-      localizations <- numeric(nrow(ygfp))
-      for(j in 6:28) localizations <- localizations + as.logical(ygfp[,j])
-      if(all(localizations[thisygfp] > 1)) message("yeastgfp: no exclusive localization found for ",location[i],
-        " ... using non-exclusive localizations",sep="")
-      else thisygfp <- thisygfp & ! localizations > 1
-    }
-    protein <- as.character(ygfp$yORF[thisygfp])
-    abundance <- ygfp$abundance[thisygfp]
-    if(length(location)==1) out <- list(protein=protein, abundance=abundance)
-    else {
-      out$protein <- c(out$protein, list(protein))
-      out$abundance <- c(out$abundance, list(abundance))
-    }
-  }
-  return(out)
-}
-
-read.expr <- function(file, idcol, abundcol, filter=NULL) {
-  ## read protein expression data from files
-  ## extracted from findit.R 20100926 jmd
-  ## file: the name of the file with sequence ids and abundance data
-  ## idcol: the column of the data file that has the sequence ids
-  ## abundcol: the column of the data file that has the abundances
-  ## filter: optional column names/search terms to filter the results
-  # the name of the data file
-  edata <- read.csv(file, stringsAsFactors=FALSE, check.names=FALSE)
-  # which columns for IDs and abundances
-  if(is.numeric(idcol)) iid <- idcol else iid <- match(idcol, colnames(edata))
-  if(is.na(iid)) stop("unidentified protein ID column in", file)
-  if(is.numeric(abundcol)) ia <- abundcol else ia <- match(abundcol, colnames(edata))
-  if(is.na(ia)) stop("unidentified protein abundance column in", file)
-  # first clean up the data file: duplicated sequences ids
-  idup <- duplicated(edata[, iid])
-  edata <- edata[!idup, ]
-  # remove NA sequence ids
-  ina <- is.na(edata[, iid])
-  edata <- edata[!ina, ]
-  # apply a filter if requested
-  if(!is.null(filter)) {
-    ifilter <- 1:nrow(edata)
-    for(i in 1:length(filter)) ifilter <- intersect(ifilter, grep(filter[[i]], edata[, names(filter)[[i]]]))
-    edata <- edata[unique(ifilter), ]
-  }
-  # that should be it
-  protein <- edata[, iid]
-  abundance <- edata[, ia]
-  return(list(protein=protein, abundance=abundance))
-#  # take their logarithms if they're not already taken
-#  if(missing(is.log)) {
-#    # make a guess: if the column name has "log" don't take the logarithm
-#    if(length(grep("log",colnames(edata)[ia]))==0) loga.target <- log10(loga.target)
-#  } else if(!is.log) loga.target <- log10(loga.target)
-#  # scale the abundances so that total activity of residues is unity
-#  if(!is.null(loga.total)) {
-#    pl <- rowSums(p[,6:25])
-#    loga.target <- unitize(loga.target,pl,loga.total)
-#  }
-}

Copied: pkg/CHNOSZ/R/yeast.R (from rev 230, pkg/CHNOSZ/R/yeast.aa.R)
===================================================================
--- pkg/CHNOSZ/R/yeast.R	                        (rev 0)
+++ pkg/CHNOSZ/R/yeast.R	2017-09-30 02:19:19 UTC (rev 231)
@@ -0,0 +1,104 @@
+# CHNOSZ/yeast.R
+# get amino acid compositions of proteins from Saccharomyces cerevisiae
+
+yeast.aa <- function(protein=NULL) {
+  # return the composition of one or more proteins from S. cerevisiae (Sce)
+  # extracted from get.protein 20120519
+  datapath <- paste("extdata/protein/Sce.csv.xz", sep="")
+  datafile <- system.file(datapath, package="CHNOSZ")
+  if(datafile=="") stop(paste("missing", datapath))
+  mydata <- read.csv(datafile, as.is=TRUE)
+  # if protein is not supplied, just give some information about the datafile
+  if(is.null(protein)) {
+    message("yeast.aa: ", datapath, " has data for ", nrow(mydata), " proteins")
+    return(invisible())
+  }
+  # which columns to search for matches
+  searchcols <- c("ORF", "SGDID", "GENE")
+  # which columns have the amino acids, in the order of thermo$protein 
+  iaa <- match(toupper(aminoacids(3)), toupper(colnames(mydata)))
+  # iterate over a list
+  waslist <- TRUE
+  out <- list()
+  if(!is.list(protein)) {
+    waslist <- FALSE
+    protein <- list(protein)
+  }
+  for(i in 1:length(protein)) {
+    # find the matches
+    imatch <- rep(NA, length(protein[[i]]))
+    for(cname in searchcols) {
+      icol <- match(cname, colnames(mydata))
+      if(is.na(icol)) next
+      iimatch <- match(protein[[i]], mydata[, icol])
+      imatch[!is.na(iimatch)] <- iimatch[!is.na(iimatch)]
+    }
+    # report and remember the unsuccessful matches
+    if(all(is.na(imatch))) stop("no proteins found!")
+    inotmatch <- which(is.na(imatch)) 
+    if(length(inotmatch) > 0) {
+      if(length(inotmatch)==1) verb <- " was" else verb <- " were"
+      message("yeast.aa: ", paste(protein[[i]][inotmatch], collapse=" "), verb, " not matched")
+    }
+    aa <- data.frame(mydata[imatch, iaa])
+    # add the identifying columns
+    ref <- mydata$SGDID[imatch]
+    abbrv <- mydata$GENE[imatch]
+    chains <- rep(1, length(protein[[i]]))
+    chains[inotmatch] <- NA
+    org <- rep("Sce", length(protein[[i]]))
+    precols <- data.frame(protein[[i]], organism=org, ref, abbrv, chains, stringsAsFactors=FALSE)
+    colnames(precols)[1] <- "protein"
+    colnames(aa) <- aminoacids(3)
+    aa <- cbind(precols, aa)
+    out <- c(out, list(aa))
+  }
+  # done!
+  if(!waslist) return(out[[1]])
+  else return(out)
+}
+
+# yeastgfp: protein localization and abundance from yeastgfp.csv
+yeastgfp <- function(location=NULL, exclusive=TRUE) {
+  # return a list of ORFs and protein abundances for a subcellular location
+  # using data from the YeastGFP project 
+  # (yeastgfp.csv data file added to CHNOSZ_0.8, 20090422)
+  ypath <- "extdata/abundance/yeastgfp.csv.xz"
+  yfile <- system.file(ypath, package="CHNOSZ")
+  # yeastgfp preprocessing
+  ygfp <- read.csv(yfile)
+  # convert factors to numeric w/o NA coercion warnings
+  ygfp$abundance <- as.numeric.nowarn(as.character(ygfp$abundance))
+  # if location is NULL, just report on the content of the file
+  # and return the names of the locations
+  if(is.null(location)) {
+    message("yeastgfp: ", ypath, " has ", nrow(ygfp), " localizations and ",
+      length(ygfp$abundance[!is.na(ygfp$abundance)]), " abundances")
+    return(invisible(colnames(ygfp)[6:28]))
+  }
+  # iterate over multiple locations
+  out <- list()
+  for(i in 1:length(location)) {
+    # what location do we want?
+    ncol <- match(location[i], colnames(ygfp)[6:28]) + 5
+    if(is.na(ncol)) ncol <- agrep(location[i], colnames(ygfp)[6:28])[1] + 5
+    if(is.na(ncol)) stop(paste(location[i], "is not one of the subcellular locations in", ypath))
+    thisygfp <- ygfp[, ncol]
+    if(exclusive) {
+      # find the number of localizations of each ORF
+      localizations <- numeric(nrow(ygfp))
+      for(j in 6:28) localizations <- localizations + as.logical(ygfp[,j])
+      if(all(localizations[thisygfp] > 1)) message("yeastgfp: no exclusive localization found for ",location[i],
+        " ... using non-exclusive localizations",sep="")
+      else thisygfp <- thisygfp & ! localizations > 1
+    }
+    protein <- as.character(ygfp$yORF[thisygfp])
+    abundance <- ygfp$abundance[thisygfp]
+    if(length(location)==1) out <- list(protein=protein, abundance=abundance)
+    else {
+      out$protein <- c(out$protein, list(protein))
+      out$abundance <- c(out$abundance, list(abundance))
+    }
+  }
+  return(out)
+}

Deleted: pkg/CHNOSZ/R/yeast.aa.R
===================================================================
--- pkg/CHNOSZ/R/yeast.aa.R	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/R/yeast.aa.R	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,61 +0,0 @@
-# CHNOSZ/yeast.aa.R
-# get amino acid compositions of proteins from Saccharomyces cerevisiae
-
-yeast.aa <- function(protein=NULL) {
-  # return the composition of one or more proteins from S. cerevisiae (Sce)
-  # extracted from get.protein 20120519
-  datapath <- paste("extdata/protein/Sce.csv.xz", sep="")
-  datafile <- system.file(datapath, package="CHNOSZ")
-  if(datafile=="") stop(paste("missing", datapath))
-  mydata <- read.csv(datafile, as.is=TRUE)
-  # if protein is not supplied, just give some information about the datafile
-  if(is.null(protein)) {
-    message("yeast.aa: ", datapath, " has data for ", nrow(mydata), " proteins")
-    return(invisible())
-  }
-  # which columns to search for matches
-  searchcols <- c("ORF", "SGDID", "GENE")
-  # which columns have the amino acids, in the order of thermo$protein 
-  iaa <- match(toupper(aminoacids(3)), toupper(colnames(mydata)))
-  # iterate over a list
-  waslist <- TRUE
-  out <- list()
-  if(!is.list(protein)) {
-    waslist <- FALSE
-    protein <- list(protein)
-  }
-  for(i in 1:length(protein)) {
-    # find the matches
-    imatch <- rep(NA, length(protein[[i]]))
-    for(cname in searchcols) {
-      icol <- match(cname, colnames(mydata))
-      if(is.na(icol)) next
-      iimatch <- match(protein[[i]], mydata[, icol])
-      imatch[!is.na(iimatch)] <- iimatch[!is.na(iimatch)]
-    }
-    # report and remember the unsuccessful matches
-    if(all(is.na(imatch))) stop("no proteins found!")
-    inotmatch <- which(is.na(imatch)) 
-    if(length(inotmatch) > 0) {
-      if(length(inotmatch)==1) verb <- " was" else verb <- " were"
-      message("yeast.aa: ", paste(protein[[i]][inotmatch], collapse=" "), verb, " not matched")
-    }
-    aa <- data.frame(mydata[imatch, iaa])
-    # add the identifying columns
-    ref <- mydata$SGDID[imatch]
-    abbrv <- mydata$GENE[imatch]
-    chains <- rep(1, length(protein[[i]]))
-    chains[inotmatch] <- NA
-    org <- rep("Sce", length(protein[[i]]))
-    precols <- data.frame(protein[[i]], organism=org, ref, abbrv, chains, stringsAsFactors=FALSE)
-    colnames(precols)[1] <- "protein"
-    colnames(aa) <- aminoacids(3)
-    aa <- cbind(precols, aa)
-    out <- c(out, list(aa))
-  }
-  # done!
-  if(!waslist) return(out[[1]])
-  else return(out)
-}
-
-

Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/inst/NEWS	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.1.0-28 (2017-09-29)
+CHANGES IN CHNOSZ 1.1.0-30 (2017-09-30)
 ---------------------------------------
 
 MAJOR CHANGES:
@@ -73,11 +73,16 @@
 - To save space, taxid_names.csv has been trimmed to hold only those
   taxids listed in extdata/bison/gi.taxid.txt.
 
+CLEANUP:
+
 - Remove Eco.csv.xz and rename more.aa() to yeast.aa().
 
 - Remove transfer() and associated functions draw.transfer(), apc(), and
   feldspar().
 
+- Remove read.expr() and extdata/abundance/ISR+08.csv (protein abundance
+  in E. coli).
+
 CHANGES IN CHNOSZ 1.1.0 (2017-05-04)
 ------------------------------------
 

Deleted: pkg/CHNOSZ/inst/extdata/abundance/ISR+08.csv.xz
===================================================================
(Binary files differ)

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/man/add.protein.Rd	2017-09-30 02:19:19 UTC (rev 231)
@@ -63,8 +63,6 @@
 
 \code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
 
-\code{\link{read.expr}} for working with protein abundance and subcellular localization data.
-
 \code{\link{protein}} for examples of affinity calculations and diagrams.
 }
 

Modified: pkg/CHNOSZ/man/extdata.Rd
===================================================================
--- pkg/CHNOSZ/man/extdata.Rd	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/man/extdata.Rd	2017-09-30 02:19:19 UTC (rev 231)
@@ -12,8 +12,7 @@
 
   Files in \code{abundance} contain protein abundance and microbial occurrence data:
   \itemize{
-    \item \code{TBD+05.csv} lists genes with transcriptomic expression changes in carbon limitation stress response experiments in yeast (Tai et al., 2005). See \code{\link{read.expr}} for an example that uses this file.
-    \item \code{ISR+08.csv} has columns excerpted from Additional File 2 of Ishihama et al. (2008) for protein abundances in \emph{E. coli} cytosol. The columns in this file are ID (Swiss-Prot ID), accession (Swiss-Prot accession), emPAI (exponentially modified protein abundance index), copynumber (emPAI-derived copy number/cell), GRAVY (Kyte-Doolittel), FunCat (FunCat class description), PSORT (PSORT localisation), ribosomal (yes/no). See \code{\link{read.expr}} for an example that uses this file.
+    \item \code{TBD+05.csv} lists genes with transcriptomic expression changes in carbon limitation stress response experiments in yeast (Tai et al., 2005). See \code{\link{yeast.aa}} for an example that uses this file.
     \item \code{yeastgfp.csv.xz} Has 28 columns; the names of the first five are \code{yORF}, \code{gene name}, \code{GFP tagged?}, \code{GFP visualized?}, and \code{abundance}. The remaining columns correspond to the 23 subcellular localizations considered in the YeastGFP project (Huh et al., 2003 and Ghaemmaghami et al., 2003) and hold values of either \code{T} or \code{F} for each protein. \samp{yeastgfp.csv} was downloaded on 2007-02-01 from http://yeastgfp.ucsf.edu using the Advanced Search, setting options to download the entire dataset and to include localization table and abundance, sorted by orf number. See \code{\link{yeastgfp}} and \code{demo("yeastgfp")} for examples that use this file.
     \item \code{microbes.csv} has data for microbial occurrence (i.e. relative enrichement) in colorectal cancer and normal tissue. The file is from the Supporting Information of Dick (2016). This file is used by \code{demo("bugstab")}.
   }
@@ -44,12 +43,12 @@
   }
 
   Files in \code{protein} contain amino acid compositions for proteins.
-  See \code{\link{yeast.aa}} and \code{\link{read.expr}} for examples that use these files.
   \itemize{
     \item \code{Sce.csv.xz}
       Data frame of amino acid composition of 6716 proteins from the \emph{Saccharomyces} Genome Database (SGD).
       Values in the first three columns are the \code{ORF} names of proteins, \code{SGDID}, and \code{GENE} names. The remaining twenty columns (\code{ALA}..\code{VAL}) contain the numbers of the respective amino acids in each protein.
       The sources of data for \samp{Sce.csv} are the files \samp{protein_properties.tab} and \samp{SGD_features.tab} (for the gene names), downloaded from \url{http://www.yeastgenome.org} on 2013-08-24.
+      See \code{\link{yeast.aa}} for an example.
     \item \code{DS11.csv}, \code{DS13.csv}
       These two files contain amino acid compositions of metagenomically encoded proteins, averaged together according to functional annotation (DS11) or taxonomic affiliation (DS13).
       The data are from Dick and Shock, 2011 and 2013.
@@ -129,8 +128,6 @@
 
 Hnědkovský, L. and Wood, R. H. (1997) Apparent molar heat capacities of aqueous solutions of CH4, CO2, H2S, and NH3 at temperatures from 304 K to 704 K at a pressure of 28 MPa. \emph{J. Chem. Thermodyn.} \bold{29}, 731--747. \url{https://doi.org/10.1006/jcht.1997.0192}
 
-Ishihama, Y., Schmidt, T., Rappsilber, J., Mann, M., Hartl, F. U., Kerner, M. J. and Frishman, D. (2008) Protein abundance profiling of the \emph{Escherichia coli} cytosol. \emph{BMC Genomics} \bold{9}:102. \url{https://doi.org/10.1186/1471-2164-9-102}
-
 Joint Genome Institute (2007) Bison Pool Environmental Genome. Protein sequence files downloaded from IMG/M (\url{http://img.jgi.doe.gov/cgi-bin/m/main.cgi?section=FindGenomes&page=findGenomes})
 
 Privalov, P. L. and Makhatadze, G. I. (1990) Heat capacity of proteins. II. Partial molar heat capacity of the unfolded polypeptide chain of proteins: Protein unfolding effects. \emph{J. Mol. Biol.} \bold{213}, 385--391. \url{https://doi.org/10.1016/S0022-2836(05)80198-6}

Deleted: pkg/CHNOSZ/man/read.expr.Rd
===================================================================
--- pkg/CHNOSZ/man/read.expr.Rd	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/man/read.expr.Rd	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,119 +0,0 @@
-\encoding{UTF-8}
-\name{read.expr}
-\alias{yeastgfp}
-\alias{read.expr}
-
-\title{Experimental Data for Protein Abundances and Localizations}
-
-\description{
-Get abundance data from a protein expression experiment and add the proteins to the current list of proteins.
-Retrieve the amino acid compositions of proteins with localizations and abundances taken from the YeastGFP project.
-}
-
-\usage{
-  yeastgfp(location, exclusive = TRUE)
-  read.expr(file, idcol, abundcol, filter=NULL)
-}
-
-\arguments{
-  \item{location}{character, name of subcellular location (compartment)}
-  \item{exclusive}{logical, report only proteins exclusively localized to a compartment?}
-  \item{file}{character, name of file with sequence IDs and abundance data}
-  \item{idcol}{character, name of the column with sequence IDs}
-  \item{abundcol}{character, name of the column with abundances}
-  \item{filter}{list, optional filters to apply}
-}
-
-\details{
-\code{read.expr} and \code{yeastgfp} read data files stored in \code{\link{extdata}/abundance} to retrieve identities and possibly abundances of proteins in certain conditions.
-
-\code{yeastgfp} returns the identities and abundances of proteins with the requested subcellular localization(s) (specified in \code{location}) using data from the YeastGFP project that is stored in \code{\link{extdata}/abundance/yeastgfp.csv.xz}.
-If \code{exclusive} is \code{FALSE}, the function grabs all proteins that are localized to a compartment even if they are also localized to other compartments.
-If \code{exclusive} is \code{TRUE} (the default), only those proteins that are localized exclusively to the requested compartments are identified, unless there are no such proteins, then the non-exclusive localizations are used (applies to the \samp{bud} localization).
-
-\code{read.expr} reads a \code{file} (CSV format) that contains protein sequence names or IDs and protein abundance data.
-\code{idcol} and \code{abundcol} are either the names of the columns holding the sequence IDs and protein abundances, or numeric values indicating the column numbers where these data are found.
-The column indicated by \code{abundcol} might not actually be abundance (it is likely to be abundance ratios).
-The data can be filtered to only include records that contain the term in the named argument \code{filter}, the name of which indicates the column to apply the filter to.
-}
-
-\value{
-Each of these functions returns a list with elements named \code{protein} (names of proteins) and \code{abundance} (counts or concentrations without any conversion from the units in the data file).
-For \code{yeastgfp}, if \code{location} is NULL, the function returns the names of all known locations, and if the length of \code{location} is >1, the \code{protein} and \code{abundance} values are lists of the results for each location.
-}
-
-\seealso{ \code{\link{yeast.aa}} for getting the amino acid compositions of the proteins. }
-
-\examples{\dontshow{data(thermo)}
-## overall oxidation state of proteins exclusively localized 
-## to cytoplasm of S. cerevisiae with/without abundance weighting
-y <- yeastgfp("cytoplasm")
-aa <- yeast.aa(y$protein)
-aaavg <- aasum(aa, average=TRUE)
-ZC(protein.formula(aaavg))
-# the average composition weighted by abundance
-waaavg <- aasum(aa, abundance=y$abundance, average=TRUE)
-ZC(protein.formula(waaavg))
-
-## read.expr using one of the provided data files,
-## from Ishihama et al., 2008
-file <- system.file("extdata/abundance/ISR+08.csv.xz", package="CHNOSZ")
-# read all protein names and abundances in ID and emPAI columns
-# (emPAI - exponentially modified protein abundance index)
-expr <- read.expr(file, "ID", "emPAI")
-## scatter plot of average oxidation state and emPAI
-#aa <- more.aa(expr$protein, "Eco")
-#pf <- protein.formula(aa)
-#zc <- ZC(pf)
-## note we specify ylim here that excludes some high-emPAI values
-#plot(zc, expr$abundance, xlab=expr.property("ZC"), ylim=c(0, 90), ylab="emPAI",
-#  main="Proteins in E. coli cytosol\nAbundance vs oxidation state of carbon")
-#legend("topleft", pch=1, legend="Ishihama et al., 2008")
-## what if we just want kinases?
-## "description" is the name of the column where we search for "kinase"
-#expr.kinase <- read.expr(file, "ID", "emPAI", list(description="kinase"))
-
-## potential fields for overall protein compositions 
-## transcriptionally induced and repressed in aerobic
-## and anaerobic carbon limitation
-## (experiments of Tai et al., 2005)
-# the activities of ammonium and sulfate used here
-# are similar to the non-growth-limiting concentrations
-# used by Boer et al., 2003
-basis(c("glucose", "H2O", "NH4+", "hydrogen", "SO4-2", "H+"),
-  c(-1, 0, -1.3, 999, -1.4, -7))
-# the names of the experiments in TBD+05.csv
-expt <- c("Clim.aerobic.down", "Clim.aerobic.up",
-  "Clim.anaerobic.down", "Clim.anaerobic.up")
-file <- system.file("extdata/abundance/TBD+05.csv", package="CHNOSZ")
-dat <- read.csv(file, as.is=TRUE)
-# yeast.aa: get the amino acid compositions
-# aasum: average them together
-for(thisexpt in expt) {
-  p <- dat$protein[dat[, thisexpt]]
-  aa <- yeast.aa(p)
-  aa <- aasum(aa, average=TRUE, protein=thisexpt)
-  add.protein(aa)
-}
-species(expt, "Sce")
-a <- affinity(C6H12O6=c(-30, 0), H2=c(-20, 0))
-d <- diagram(a, normalize=TRUE, fill=NULL)
-title(main=paste("Formation potential of proteins associated with\n",
-  "transcriptional response to carbon limitation in yeast"))
-# the affinity of formation favors the proteins upregulated 
-# by carbon limitation at low chemical potentials of C6H12O6 ...
-stopifnot(c(d$predominant[1,1], d$predominant[1,128])==grep("up", expt))
-# ... and favors proteins downregulated by aerobic conditions
-# at high hydrogen fugacities
-stopifnot(c(d$predominant[128, 128], d$predominant[128, 1])==grep("down", expt))
-}
-
-\references{
-Boer, V. M., de Winde, J. H., Pronk, J. T. and Piper, M. D. W. (2003) The genome-wide transcriptional responses of \emph{Saccharomyces cerevisiae} grown on glucose in aerobic chemostat cultures limited for carbon, nitrogen, phosphorus, or sulfur. \emph{J. Biol. Chem.} \bold{278}, 3265--3274. \url{https://doi.org/10.1074/jbc.M209759200}
-
-Ishihama, Y., Schmidt, T., Rappsilber, J., Mann, M., Hartl, F. U., Kerner, M. J. and Frishman, D. (2008) Protein abundance profiling of the \emph{Escherichia coli} cytosol. \emph{BMC Genomics} \bold{9}:102. \url{https://doi.org/10.1186/1471-2164-9-102}
-
-Tai, S. L., Boer, V. M., Daran-Lapujade, P., Walsh, M. C., de Winde, J. H., Daran, J.-M. and Pronk, J. T. (2005) Two-dimensional transcriptome analysis in chemostat cultures: Combinatorial effects of oxygen availability and macronutrient limitation in \emph{Saccharomyces cerevisiae}. \emph{J. Biol. Chem.} \bold{280}, 437--447. \url{https://doi.org/10.1074/jbc.M410573200}
-}
-
-\concept{Protein thermodynamic modeling}

Copied: pkg/CHNOSZ/man/yeast.Rd (from rev 230, pkg/CHNOSZ/man/yeast.aa.Rd)
===================================================================
--- pkg/CHNOSZ/man/yeast.Rd	                        (rev 0)
+++ pkg/CHNOSZ/man/yeast.Rd	2017-09-30 02:19:19 UTC (rev 231)
@@ -0,0 +1,108 @@
+\encoding{UTF-8}
+\name{yeast}
+\alias{yeast.aa}
+\alias{yeastgfp}
+\title{Composition, Localization, and Abundances of Proteins in Yeast}
+\description{
+  Retrieve the amino acid compositions of one or more proteins from \emph{Saccharomyces cerevisiae} and get localizations and abundances reported by the YeastGFP project.
+}
+
+\usage{
+  yeast.aa(protein = NULL)
+  yeastgfp(location, exclusive = TRUE)
+
+}
+
+\arguments{
+  \item{protein}{character, name of protein}
+  \item{location}{character, name of subcellular location (compartment)}
+  \item{exclusive}{logical, report only proteins exclusively localized to a compartment?}
+}
+
+\details{
+\code{yeast.aa} retrieves the amino acid composition(s) of the indicated proteins in \emph{Saccharomyces cerevisiae}.
+The calculation depends on the data file \code{\link{extdata}/protein/Sce.csv.xz}, which contains the amino acid compositions of the proteins.
+The \code{protein} argument should be a vector or a list of vectors of one or more SGD IDs, Open Reading Frame (ORF) or gene names that are found in these files.
+The output data frame contains rows with NA compositions for names that are not matched.
+
+\code{yeastgfp} returns the identities and abundances of proteins with the requested subcellular localization(s) (specified in \code{location}) using data from the YeastGFP project that is stored in \code{\link{extdata}/abundance/yeastgfp.csv.xz}.
+If \code{exclusive} is \code{FALSE}, the function grabs all proteins that are localized to a compartment even if they are also localized to other compartments.
+If \code{exclusive} is \code{TRUE} (the default), only those proteins that are localized exclusively to the requested compartments are identified, unless there are no such proteins, then the non-exclusive localizations are used (applies to the \samp{bud} localization).
+}
+
+\value{
+For \code{yeast.aa}, a data frame, or list of data frames, containing the amino acid composition(s) of the specified protein(s) in the format of \code{\link{thermo}$protein}.
+
+For \code{yeastgfp}, a list with elements named \code{protein} (names of proteins) and \code{abundance} (counts or concentrations without any conversion from the units in the data file).
+If \code{location} is NULL, \code{yeastgfp} returns the names of all known locations, and if the length of \code{location} is >1, the \code{protein} and \code{abundance} values are lists of the results for each location.
+}
+
+\seealso{
+\code{\link{demos}("yeastgfp")}
+}
+
+\examples{
+\dontshow{data(thermo)}
+# the first few names in UniProt for "aminotransferase yeast"
+genes <- c("AATC", "ARO8", "BCA1", "AMPL", "BCA2", "ARO9")
+# the corresponding ORF names
+ORF <- c("YLR027C", "YGL202W", "YHR208W", "YKL103C", "YJR148W", "YHR137W")
+# we only match two of them by gene name, but all by ORF name
+aa <- yeast.aa(genes)
+aa <- yeast.aa(ORF)
+# what are their formulas and average oxidation states of carbon
+protein.formula(aa)
+ZC(protein.formula(aa))
+
+## potential fields for overall protein compositions 
+## transcriptionally induced and repressed in aerobic
+## and anaerobic carbon limitation
+## (experiments of Tai et al., 2005)
+# the activities of ammonium and sulfate used here
+# are similar to the non-growth-limiting concentrations
+# used by Boer et al., 2003
+basis(c("glucose", "H2O", "NH4+", "hydrogen", "SO4-2", "H+"),
+  c(-1, 0, -1.3, 999, -1.4, -7))
+# the names of the experiments in TBD+05.csv
+expt <- c("Clim.aerobic.down", "Clim.aerobic.up",
+  "Clim.anaerobic.down", "Clim.anaerobic.up")
+file <- system.file("extdata/abundance/TBD+05.csv", package="CHNOSZ")
+dat <- read.csv(file, as.is=TRUE)
+# yeast.aa: get the amino acid compositions
+# aasum: average them together
+for(thisexpt in expt) {
+  p <- dat$protein[dat[, thisexpt]]
+  aa <- yeast.aa(p)
+  aa <- aasum(aa, average=TRUE, protein=thisexpt)
+  add.protein(aa)
+}
+species(expt, "Sce")
+a <- affinity(C6H12O6=c(-30, 0), H2=c(-20, 0))
+d <- diagram(a, normalize=TRUE, fill=NULL)
+title(main=paste("Formation potential of proteins associated with\n",
+  "transcriptional response to carbon limitation in yeast"))
+# the affinity of formation favors the proteins upregulated 
+# by carbon limitation at low chemical potentials of C6H12O6 ...
+stopifnot(c(d$predominant[1,1], d$predominant[1,128])==grep("up", expt))
+# ... and favors proteins downregulated by aerobic conditions
+# at high hydrogen fugacities
+stopifnot(c(d$predominant[128, 128], d$predominant[128, 1])==grep("down", expt))
+
+## overall oxidation state of proteins exclusively localized 
+## to cytoplasm of S. cerevisiae with/without abundance weighting
+y <- yeastgfp("cytoplasm")
+aa <- yeast.aa(y$protein)
+aaavg <- aasum(aa, average=TRUE)
+ZC(protein.formula(aaavg))
+# the average composition weighted by abundance
+waaavg <- aasum(aa, abundance=y$abundance, average=TRUE)
+ZC(protein.formula(waaavg))
+}
+
+\references{
+Boer, V. M., de Winde, J. H., Pronk, J. T. and Piper, M. D. W. (2003) The genome-wide transcriptional responses of \emph{Saccharomyces cerevisiae} grown on glucose in aerobic chemostat cultures limited for carbon, nitrogen, phosphorus, or sulfur. \emph{J. Biol. Chem.} \bold{278}, 3265--3274. \url{https://doi.org/10.1074/jbc.M209759200}
+
+Tai, S. L., Boer, V. M., Daran-Lapujade, P., Walsh, M. C., de Winde, J. H., Daran, J.-M. and Pronk, J. T. (2005) Two-dimensional transcriptome analysis in chemostat cultures: Combinatorial effects of oxygen availability and macronutrient limitation in \emph{Saccharomyces cerevisiae}. \emph{J. Biol. Chem.} \bold{280}, 437--447. \url{https://doi.org/10.1074/jbc.M410573200}
+}
+
+\concept{Protein thermodynamic modeling}

Deleted: pkg/CHNOSZ/man/yeast.aa.Rd
===================================================================
--- pkg/CHNOSZ/man/yeast.aa.Rd	2017-09-29 12:37:06 UTC (rev 230)
+++ pkg/CHNOSZ/man/yeast.aa.Rd	2017-09-30 02:19:19 UTC (rev 231)
@@ -1,47 +0,0 @@
-\encoding{UTF-8}
-\name{yeast.aa}
-\alias{yeast.aa}
-\title{Proteins from Model Organisms}
-\description{
-  Retrieve the amino acid compositions of one or more proteins from \emph{Saccharomyces cerevisiae}.
-}
-
-\usage{
-  yeast.aa(protein = NULL)
-}
-
-\arguments{
-  \item{protein}{character, name of protein}
-}
-
-\details{
-\code{yeast.aa} retrieves the amino acid composition(s) of the indicated proteins in \emph{Saccharomyces cerevisiae}.
-The calculation depends on the data file \code{\link{extdata}/protein/Sce.csv.xz}, which contains the amino acid compositions of the proteins.
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/chnosz -r 231


More information about the CHNOSZ-commits mailing list