[CHNOSZ-commits] r771 - in pkg/CHNOSZ: . R inst inst/tinytest man vignettes

Thu Mar 2 11:40:47 CET 2023

Author: jedick
Date: 2023-03-02 11:40:47 +0100 (Thu, 02 Mar 2023)
New Revision: 771

Modified:
   pkg/CHNOSZ/DESCRIPTION
   pkg/CHNOSZ/NAMESPACE
   pkg/CHNOSZ/R/util.fasta.R
   pkg/CHNOSZ/R/util.seq.R
   pkg/CHNOSZ/inst/CHECKLIST
   pkg/CHNOSZ/inst/NEWS.Rd
   pkg/CHNOSZ/inst/tinytest/test-util.seq.R
   pkg/CHNOSZ/man/add.protein.Rd
   pkg/CHNOSZ/man/util.fasta.Rd
   pkg/CHNOSZ/man/util.seq.Rd
   pkg/CHNOSZ/vignettes/anintro.Rmd
Log:
Remove uniprot.aa(), nucleic.formula(), and nucleic.complement()


Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================

--- pkg/CHNOSZ/DESCRIPTION	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/DESCRIPTION	2023-03-02 10:40:47 UTC (rev 771)
@@ -1,6 +1,6 @@
 Date: 2023-03-02
 Package: CHNOSZ
-Version: 1.9.9-62
+Version: 1.9.9-63
 Title: Thermodynamic Calculations and Diagrams for Geochemistry
 Authors at R: c(
     person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),

Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/NAMESPACE	2023-03-02 10:40:47 UTC (rev 771)
@@ -20,7 +20,7 @@
   "mass", "entropy", "GHS", "water",
   "i2A",
   "dPdTtr", "Ttr",
-  "count.aa", "nucleic.complement", "nucleic.formula",
+  "count.aa",
   "rho.IAPWS95", "IAPWS95", "water.AW90", "WP02.auxiliary", "water.IAPWS95",
   "getrank", "parent", "sciname", "allparents", "getnodes", "getnames",
   "protein.OBIGT", "which.pmax",
@@ -41,7 +41,7 @@
   "checkEOS", "checkGHS", "check.OBIGT",
   "basis.elements", "element.mu", "ibasis",
   "water.SUPCRT92",
-  "nonideal", "uniprot.aa",
+  "nonideal",
 # added 20170301 or later
   "GHS_Tr", "calculateDensity", "calculateGibbsOfWater",
   "calculateEpsilon", "calculateQ", "water.DEW", "Berman",

Modified: pkg/CHNOSZ/R/util.fasta.R
===================================================================
--- pkg/CHNOSZ/R/util.fasta.R	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/R/util.fasta.R	2023-03-02 10:40:47 UTC (rev 771)
@@ -105,50 +105,6 @@
   } else return(sequences)
 }
 
-uniprot.aa <- function(protein, start=NULL, stop=NULL) {
-  # Download protein sequence information from UniProt
-  iprotein <- numeric()
-  # Construct the initial URL
-  proteinURL <- paste("https://www.uniprot.org/uniprot/", protein, sep="")
-  message("uniprot.aa: trying ", proteinURL, " ...", appendLF=FALSE)
-  # Try loading the URL, hiding any warnings
-  oldopt <- options(warn=-1)
-  URLstuff <- try(readLines(proteinURL),TRUE)
-  options(oldopt)
-  if(inherits(URLstuff, "try-error")) {
-    message(" ::: FAILED :::")
-    return(NA)
-  }
-  # 20091102: Look for a link to a fasta file
-  link <- grep("/uniprot/.*fasta", URLstuff)
-  if(length(link) > 0) linkline <- URLstuff[[link[1]]]
-  else {
-    message(" ::: FAILED :::")
-    return(NA)
-  }
-  # Extract accession number from the link
-  linkhead <- strsplit(linkline, ".fasta", fixed=TRUE)[[1]][1]
-  accession.number <- tail(strsplit(linkhead, "/uniprot/", fixed=TRUE)[[1]], 1)
-  message(" accession ", accession.number, " ...")
-  # Now download the fasta file
-  fastaURL <- paste("https://www.uniprot.org/uniprot/", accession.number, ".fasta", sep="")
-  URLstuff <- readLines(fastaURL)
-  # Get the header information / show  the user
-  header <- URLstuff[[1]]
-  header3 <- strsplit(header, "|", fixed=TRUE)[[1]][3]
-  headerP_O <- strsplit(header3, " ")[[1]][1]
-  header.id <- strsplit(header, headerP_O)[[1]][1]
-  header.id <- substr(header.id, 2, nchar(header.id)-1)
-  header.organism <- strsplit(headerP_O, "_")[[1]][2]
-  message(paste0(header), appendLF=FALSE)
-  # 20130206 Use read.fasta with lines, start, stop arguments
-  aa <- read.fasta(file="", lines=URLstuff, start=start, stop=stop)
-  message(" (length ", sum(aa[1, 6:25]), ")", sep="")
-  aa$protein <- header.id
-  aa$organism <- header.organism
-  return(aa)
-}
-
 count.aa <- function(seq, start=NULL, stop=NULL, type="protein") {
   # Count amino acids or DNA bases in one or more sequences given as elements of the list seq
   if(type=="protein") letts <- aminoacids(1)

Modified: pkg/CHNOSZ/R/util.seq.R
===================================================================
--- pkg/CHNOSZ/R/util.seq.R	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/R/util.seq.R	2023-03-02 10:40:47 UTC (rev 771)
@@ -31,45 +31,3 @@
   else if(nchar=="Z") return(aacharged[iaa])
 }
 
-nucleic.formula <- function(nucleic=NULL) {
-  # Compute the formula, e.g.
-  # DNA <- count.aa(list("AGCT", "TTTT"), type="DNA")  # a dataframe of counts
-  # nf <- nucleic.formula(DNA)  # a series of formulas
-  # FIXME: This only adds the formulas of the nucleobases; dehydration and phosphorylation are not yet accounted for!
-  # 20090926 jmd
-  letts <- c("A", "C", "G", "T", "U")
-  names <- c("adenine", "cytosine", "guanine", "thymine", "uracil")
-  # The locations of the letters in the data frame
-  i.lett <- match(letts, colnames(nucleic))
-  # We'll normally have at least one NA (U or A for DNA or RNA)
-  ina <- is.na(i.lett)
-  # The species indices of the bases, in the order appearing above
-  i.base <- suppressMessages(info(names[!ina]))
-  # The chemical formula of bases
-  f.base <- get("thermo", CHNOSZ)$OBIGT$formula[i.base]
-  # Loop over the base counts
-  f.out <- character()
-  for(i in 1:nrow(nucleic)) {
-    # Use makeup() with multipliers and sum=TRUE  20120119 jmd
-    f <- as.chemical.formula(makeup(f.base, multiplier=as.numeric(nucleic[i, i.lett[!ina]]), sum=TRUE))
-    f.out <- c(f.out, f)
-  }
-  return(f.out)
-}
-
-nucleic.complement <- function(nucleic=NULL, type="DNA") {
-  # Return the nucleobase complement
-  # nucleic.complement(nucleic, "DNA")  # DNA complement
-  # nucleic.complement(nucleic, "RNA")  # RNA complement
-  # The reference sequence, and its DNA and RNA complements
-  ref <- c("A", "C", "G", "T", "U")
-  DNA <- c("T", "G", "C", "A", "A")
-  RNA <- c("U", "G", "C", "A", "A")
-  iref <- match(colnames(nucleic), ref)
-  i.base <- which(!is.na(iref))
-  colnames(nucleic)[i.base] <- get(type)[iref[i.base]]
-  # Be nice and re-alphabetize the columns
-  o.base <- order(colnames(nucleic)[i.base])
-  nucleic <- nucleic[, i.base[o.base], drop=FALSE]
-  return(nucleic)
-}

Modified: pkg/CHNOSZ/inst/CHECKLIST
===================================================================
--- pkg/CHNOSZ/inst/CHECKLIST	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/CHECKLIST	2023-03-02 10:40:47 UTC (rev 771)
@@ -1,6 +1,6 @@
 ****************************
 Release checklist for CHNOSZ
-    (updated 2022-02-08)
+    (updated 2023-03-02)
 ****************************
 
 - Run examples() and demos() and inspect their output (especially plots)
@@ -8,8 +8,6 @@
 - Run the package tests with:
   suppressMessages(tinytest::test_package("CHNOSZ", at_home = TRUE))
 
-- Check that uniprot.aa() works with current UniProt web pages
-
 - Check reverse dependencies on CRAN: canprot, accucor, ecipex, iemisc, LipidMS as of 2019-08-02
 
 **********************

Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/NEWS.Rd	2023-03-02 10:40:47 UTC (rev 771)
@@ -12,7 +12,7 @@
 % links to vignettes 20220723
 \newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
 
-\section{Changes in CHNOSZ version 1.9.9-57 (2023-02-15)}{
+\section{Changes in CHNOSZ version 1.9.9-63 (2023-03-02)}{
 
   \subsection{MAJOR USER-VISIBLE CHANGES}{
     \itemize{
@@ -161,14 +161,12 @@
       \code{subcrt()} should be used for all calculations of thermodynamic
       properties.
 
-      \item \code{EOSregress()} and the associated demo and vignette have been
-      removed.
+      \item Remove \code{EOSregress()} and the associated demo and vignette.
 
-      \item Remove parallel calculations in \code{read.fasta()} and
-      \code{count.aa()} (they just made things slower in my tests).
+      \item Remove parallel calculations in \code{read.fasta()} and \code{count.aa()}.
 
-      \item Remove \code{revisit()} and \code{findit()} (functions for
-      \dQuote{computations on chemical activities} in the extended workflow).
+      \item Remove \code{revisit()}, \code{findit()}, \code{uniprot.aa()},
+      \code{nucleic.formula()}, and \code{nucleic.complement()}.
 
     }
   }

Modified: pkg/CHNOSZ/inst/tinytest/test-util.seq.R
===================================================================
--- pkg/CHNOSZ/inst/tinytest/test-util.seq.R	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/tinytest/test-util.seq.R	2023-03-02 10:40:47 UTC (rev 771)
@@ -8,14 +8,9 @@
 expect_equal(count.aa(myseq, start = 6)[, "A"], 0, info = info)
 expect_equal(count.aa(myseq, start = 5, stop = 6)[, c("A", "G")], c(1, 1), check.attributes = FALSE, info = info)
 
-info <- "Nucleobase sequences can be processed with count.aa(), nucleic.formula() and nucleic.complement()"
+info <- "Nucleobase sequences can be processed with count.aa()"
 expect_message(dna <- count.aa("ABCDEFGHIJ", type = "DNA"), "count.aa: unrecognized letter\\(s\\) in DNA sequence: B D E F H I J", info = info)
 expect_equal(as.numeric(dna), c(1, 1, 1, 0), info = info)
-expect_equal(nucleic.formula(dna), "C14H15N13O2", info = info)
-# Nucleobases can be in any order
-expect_equal(nucleic.formula(dna[, 4:1, drop = FALSE]), "C14H15N13O2", info = info)
-# ACG -> UGC (RNA complement)
-expect_equal(nucleic.formula(nucleic.complement(dna, "RNA")), "C13H14N10O4", info = info)
 
 info <- "count.aa() correctly processes a longer nucleobase sequence"
 seq <- "ATGTCCCGTTTCTTAGTTGCATTGGTTGCCGCACTTTTAGGAGTTGCAATTGAGATGTCCCTTCTCGTTCGCGCTCAGGGGCAGCAAACCTTGCTTTTGGCTGAAGAAAGCAAGCATTTGTCGCAATTGCGTCAACTGACTTTTGAAGGCACCAATGCCGAAGCGTATTGGTCGCCTGACGGGAAATGGTTGGTCTTTCAATCCACACGCCCACCTTACAAGGCTGACCAAATCTTCATCATGAGAGCGGATGGCTCGGGAGTTCGTGTCGTCAGCACGGGCAAAGGTCGTTGCACTTGTGCCTATTTCACGCCAGATGGCAAAGGCGTTATCTTTGCTACGACCCACCTTGCTGGACCAGAACCGCCGCAAGTGCCCAAACTGGACATTCCACGCTATGTTTGGGGCGTGTTCCCAAGTTACGAACTTTACCTGCGGCGTTTGGACACGATGGAACTTATCCGCTTGACCGATAACGAAGGCTACGACGCTGAAGCGACCATTTGCTGGAAGACTGGGCGAATTGTCTTCACAAGTTACCGCAATGGCGACCTTGACCTTTACAGCATGAAATTAGACGGCAGCGATTTGAAGCGATTGACGAAAACCATCGGCTACGAGGGCGGAGCGTTCTACTCGCCCGACGGGAAGCGGATTGTCTTCCGAGCCTATTTGCCAAAGACGCCTGACGAAATTGACGAATACAAGCGGTTGCTCCAGTTAGGCGTCATAAGCCCACCAAAGATGGAGTGGGTCGTCATGGACGCCGACGGTCGCAACATGAAGCAAATC"

Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/add.protein.Rd	2023-03-02 10:40:47 UTC (rev 771)
@@ -73,7 +73,7 @@
 }
 
 \seealso{
-\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions that can be used with \code{add.protein}.
+\code{\link{read.fasta}} for another way of getting amino acid compositions that can be used with \code{add.protein}.
 
 \code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
 }

Modified: pkg/CHNOSZ/man/util.fasta.Rd
===================================================================
--- pkg/CHNOSZ/man/util.fasta.Rd	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/util.fasta.Rd	2023-03-02 10:40:47 UTC (rev 771)
@@ -2,7 +2,6 @@
 \name{util.fasta}
 \alias{util.fasta}
 \alias{read.fasta}
-\alias{uniprot.aa}
 \alias{count.aa}
 \title{Functions for Reading FASTA Files and Downloading from UniProt}
 
@@ -14,7 +13,6 @@
   read.fasta(file, iseq = NULL, ret = "count", lines = NULL, 
     ihead = NULL, start=NULL, stop=NULL, type="protein", id = NULL)
   count.aa(seq, start=NULL, stop=NULL, type="protein")
-  uniprot.aa(protein, start=NULL, stop=NULL)
 }
 
 \arguments{
@@ -28,7 +26,6 @@
   \item{type}{character, sequence type (protein or DNA)}
   \item{id}{character, value to be used for \code{protein} in output table}
   \item{seq}{character, amino acid sequence of a protein}
-  \item{protein}{character, entry name for protein in UniProt}
 }
 
 \details{
@@ -47,10 +44,6 @@
 A warning is generated if any character in \code{seq}, excluding spaces, is not one of the single-letter amino acid or nucleobase abbreviations.
 \code{start} and/or \code{stop} can be provided to count a fragment of the sequence (extracted using \code{\link{substr}}).
 If only one of \code{start} or \code{stop} is present, the other defaults to 1 (\code{start}) or the length of the sequence (\code{stop}).
-
-\code{uniprot.aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, retrieved from the protein sequence if it is available from UniProt (\url{https://www.uniprot.org/}).
-The \code{protein} argument corresponds to the \samp{Entry name} on the UniProt search pages.
-
 }
 
 \value{
@@ -59,7 +52,6 @@
 
 \seealso{
 \code{\link{seq2aa}}, like \code{count.aa}, counts amino acids in a user-input sequence, but returns a data frame in the format of \code{thermo()$protein}.
-\code{\link{nucleic.formula}} for an example of counting nucleobases in a DNA sequence.
 }
 
 \examples{\dontshow{reset()}
@@ -74,23 +66,6 @@
 protein.length(aa)
 
 \dontrun{
-# download amino acid composition of a protein
-# start at position 2 to remove the initiator methionine
-aa <- uniprot.aa("ALAT1_HUMAN", start=2)
-# change the name from "sp|P24298" to "ALAT1" 20201110
-aa$protein <- "ALAT1"
-# add it to thermo()$protein
-ip <- add.protein(aa)
-# now it's possible to calculate some properties
-protein.length(ip)
-protein.formula(ip)
-subcrt("ALAT1_HUMAN", c("cr", "aq"), c(-1, 1))
-# the amino acid composition can be saved for future use
-write.csv(aa, "saved.aa.csv", row.names=FALSE)
-# in another R session, the protein can be loaded without using uniprot.aa()
-aa <- read.csv("saved.aa.csv", as.is=TRUE)
-add.protein(aa)
-
 ## count amino acids in a sequence
 count.aa("GGSGG")
 # warnings are issued for unrecognized characters

Modified: pkg/CHNOSZ/man/util.seq.Rd
===================================================================
--- pkg/CHNOSZ/man/util.seq.Rd	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/util.seq.Rd	2023-03-02 10:40:47 UTC (rev 771)
@@ -2,26 +2,20 @@
 \name{util.seq}
 \alias{util.seq}
 \alias{aminoacids}
-\alias{nucleic.formula}
-\alias{nucleic.complement}
 
 \title{Functions to Work with Sequence Data}
 
 \description{
-  Return one- or three-letter abbreviations of amino acids; count nucleotides in nucleic acid sequences, calculate DNA and RNA complements of nucleic acid sequences.
+  Return names or one- or three-letter abbreviations of amino acids.
 }
 
 \usage{
-  aminoacids(nchar=1, which=NULL)
-  nucleic.formula(nucleic = NULL)
-  nucleic.complement(nucleic = NULL, type="DNA")
+  aminoacids(nchar = 1, which = NULL)
 }
 
 \arguments{
   \item{nchar}{numeric, \eqn{1} to return one-letter, \eqn{3} to return three-letter abbreviations for amino acids}
   \item{which}{character, which amino acids to name}
-  \item{nucleic}{data frame, counts of nucleic-acid bases}
-  \item{type}{character, target type of nucleic acid (DNA or RNA)}
 }
 
 \details{
@@ -29,13 +23,6 @@
 \code{aminoacids} returns the one-letter abbreviations (\code{nchar}=\samp{1}) or the three-letter abbreviations (\code{nchar}=\samp{3}) or the names of the neutral amino acids (\code{nchar}=\samp{""}) or the names of the amino acids with ionized side chains (\code{nchar}=\samp{"Z"}).
 The output includes 20 amino acids in alphabetic order by 1-letter abbreviation (the order used in \code{thermo()$protein}), unless \code{which} is provided, indicating the desired amino acids (either as 1- or 3-letter abbreviations or names of the neutral amino acids).
 
-
-\code{nucleic.formula} returns a string representation of the chemical formula for each nucleic-acid composition contained in \code{nucleic}.
-The names of the bases are indicated by the column names of \code{nucleic}.
-At present, the formula is computed as the sum of the chemical formulas of the bases themselves, with no contribution from polymerization (dehydration) or phosphorylation.
-
-\code{nucleic.complement} calculates the complement of the base composition given in \code{nucleic}. 
-\code{type} specifies the type of nucleic acid of the complement - \samp{DNA} (A, G, C, T) or \samp{RNA} (A, G, C, U).
 }
 
 \seealso{\code{\link{count.aa}} for counting amino acids or nucleic-acid bases in a sequence; \code{\link{protein.formula}} for calculating the chemical formulas of proteins.}
@@ -42,13 +29,7 @@
 
 \examples{\dontshow{reset()}
 ## count nucleobases in a sequence
-bases <- count.aa("ACCGGGTTT", type="DNA")
-# the DNA complement of that sequence
-DNA.comp <- nucleic.complement(bases)
-# the RNA complement of the DNA complement
-RNA.comp <- nucleic.complement(DNA.comp, type="RNA")
-# the formula of the RNA complement (bases only)
-nucleic.formula(RNA.comp)  # C40H42N32O11
+bases <- count.aa("ACCGGGTTT", type = "DNA")
 }
 
 \concept{Protein properties}

Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd	2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd	2023-03-02 10:40:47 UTC (rev 771)
@@ -1444,67 +1444,10 @@
 ILLISFLIFLIVG
 ", "PRIO_HUMAN")
 ```
-<span style="color:green">`uniprot.aa()`</span> returns the amino acid composition of a single amino acid sequence downloaded from UniProt.
-To get sequences for many proteins, use R's `lapply()`, `do.call()`, and `rbind()`:
-```{r uniprot_aa, eval=FALSE}
-IDs <- c("ALAT1_HUMAN", "P02452")
-aa <- lapply(IDs, uniprot.aa)
-## uniprot.aa: trying http://www.uniprot.org/uniprot/ALAT1_HUMAN ... accession P24298 ...
-## >sp|P24298|ALAT1_HUMAN Alanine aminotransferase 1 OS=Homo sapiens GN=GPT PE=1 SV=3 (length 496)
-## uniprot.aa: trying http://www.uniprot.org/uniprot/P02452 ... accession P02452 ...
-## >sp|P02452|CO1A1_HUMAN Collagen alpha-1(I) chain OS=Homo sapiens GN=COL1A1 PE=1 SV=5 (length 1464)
-aa_UniProt <- do.call(rbind, aa)
-```
 
-```{r uniprot_aa_offline, echo=FALSE}
-aa_ALAT1 <- seq2aa("
-MASSTGDRSQAVRHGLRAKVLTLDGMNPRVRRVEYAVRGPIVQRALELEQELRQGVKKPF
-TEVIRANIGDAQAMGQRPITFLRQVLALCVNPDLLSSPNFPDDAKKRAERILQACGGHSL
-GAYSVSSGIQLIREDVARYIERRDGGIPADPNNVFLSTGASDAIVTVLKLLVAGEGHTRT
-GVLIPIPQYPLYSATLAELGAVQVDYYLDEERAWALDVAELHRALGQARDHCRPRALCVI
-NPGNPTGQVQTRECIEAVIRFAFEERLFLLADEVYQDNVYAAGSQFHSFKKVLMEMGPPY
-AGQQELASFHSTSKGYMGECGFRGGYVEVVNMDAAVQQQMLKLMSVRLCPPVPGQALLDL
-VVSPPAPTDPSFAQFQAEKQAVLAELAAKAKLTEQVFNEAPGISCNPVQGAMYSFPRVQL
-PPRAVERAQELGLAPDMFFCLRLLEETGICVVPGSGFGQREGTYHFRMTILPPLEKLRLL
-LEKLSRFHAKFTLEYS
-", "ALAT1_HUMAN")
-aa_CO1A1 <- seq2aa("
-MFSFVDLRLLLLLAATALLTHGQEEGQVEGQDEDIPPITCVQNGLRYHDRDVWKPEPCRI
-CVCDNGKVLCDDVICDETKNCPGAEVPEGECCPVCPDGSESPTDQETTGVEGPKGDTGPR
-GPRGPAGPPGRDGIPGQPGLPGPPGPPGPPGPPGLGGNFAPQLSYGYDEKSTGGISVPGP
-MGPSGPRGLPGPPGAPGPQGFQGPPGEPGEPGASGPMGPRGPPGPPGKNGDDGEAGKPGR
-PGERGPPGPQGARGLPGTAGLPGMKGHRGFSGLDGAKGDAGPAGPKGEPGSPGENGAPGQ
-MGPRGLPGERGRPGAPGPAGARGNDGATGAAGPPGPTGPAGPPGFPGAVGAKGEAGPQGP
-RGSEGPQGVRGEPGPPGPAGAAGPAGNPGADGQPGAKGANGAPGIAGAPGFPGARGPSGP
-QGPGGPPGPKGNSGEPGAPGSKGDTGAKGEPGPVGVQGPPGPAGEEGKRGARGEPGPTGL
-PGPPGERGGPGSRGFPGADGVAGPKGPAGERGSPGPAGPKGSPGEAGRPGEAGLPGAKGL
-TGSPGSPGPDGKTGPPGPAGQDGRPGPPGPPGARGQAGVMGFPGPKGAAGEPGKAGERGV
-PGPPGAVGPAGKDGEAGAQGPPGPAGPAGERGEQGPAGSPGFQGLPGPAGPPGEAGKPGE
-QGVPGDLGAPGPSGARGERGFPGERGVQGPPGPAGPRGANGAPGNDGAKGDAGAPGAPGS
-QGAPGLQGMPGERGAAGLPGPKGDRGDAGPKGADGSPGKDGVRGLTGPIGPPGPAGAPGD
-KGESGPSGPAGPTGARGAPGDRGEPGPPGPAGFAGPPGADGQPGAKGEPGDAGAKGDAGP
-PGPAGPAGPPGPIGNVGAPGAKGARGSAGPPGATGFPGAAGRVGPPGPSGNAGPPGPPGP
-AGKEGGKGPRGETGPAGRPGEVGPPGPPGPAGEKGSPGADGPAGAPGTPGPQGIAGQRGV
-VGLPGQRGERGFPGLPGPSGEPGKQGPSGASGERGPPGPMGPPGLAGPPGESGREGAPGA
-EGSPGRDGSPGAKGDRGETGPAGPPGAPGAPGAPGPVGPAGKSGDRGETGPAGPTGPVGP
-VGARGPAGPQGPRGDKGETGEQGDRGIKGHRGFSGLQGPPGPPGSPGEQGPSGASGPAGP
-RGPPGSAGAPGKDGLNGLPGPIGPPGPRGRTGDAGPVGPPGPPGPPGPPGPPSAGFDFSF
-LPQPPQEKAHDGGRYYRADDANVVRDRDLEVDTTLKSLSQQIENIRSPEGSRKNPARTCR
-DLKMCHSDWKSGEYWIDPNQGCNLDAIKVFCNMETGETCVYPTQPSVAQKNWYISKNPKD
-KRHVWFGESMTDGFQFEYGGQGSDPADVAIQLTFLRLMSTEASQNITYHCKNSVAYMDQQ
-TGNLKKALLLQGSNEIEIRAEGNSRFTYSVTVDGCTSHTGAWGKTVIEYKTTKTSRLPII
-DVAPLDVGAPDQEFGFDVGPVCFL
-", "CO1A1_HUMAN")
-aa_UniProt <- rbind(aa_ALAT1, aa_CO1A1)
-aa_UniProt$abbrv <- c("ALAT1", "CO1A1")
-```
-```{r aa_UniProt, cache=TRUE}
-aa_UniProt
-```
-
 These amino acid compositions can be processed using functions such as <span style="color:green">`protein.length()`</span> and <span style="color:green">`protein.formula()`</span>:
 ```{r protein_length}
-myaa <- rbind(aa_Ef, aa_PRIO, aa_ALAT1)
+myaa <- rbind(aa_Ef, aa_PRIO)
 protein.length(myaa)
 ```