[CHNOSZ-commits] r771 - in pkg/CHNOSZ: . R inst inst/tinytest man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Mar 2 11:40:47 CET 2023
Author: jedick
Date: 2023-03-02 11:40:47 +0100 (Thu, 02 Mar 2023)
New Revision: 771
Modified:
pkg/CHNOSZ/DESCRIPTION
pkg/CHNOSZ/NAMESPACE
pkg/CHNOSZ/R/util.fasta.R
pkg/CHNOSZ/R/util.seq.R
pkg/CHNOSZ/inst/CHECKLIST
pkg/CHNOSZ/inst/NEWS.Rd
pkg/CHNOSZ/inst/tinytest/test-util.seq.R
pkg/CHNOSZ/man/add.protein.Rd
pkg/CHNOSZ/man/util.fasta.Rd
pkg/CHNOSZ/man/util.seq.Rd
pkg/CHNOSZ/vignettes/anintro.Rmd
Log:
Remove uniprot.aa(), nucleic.formula(), and nucleic.complement()
Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/DESCRIPTION 2023-03-02 10:40:47 UTC (rev 771)
@@ -1,6 +1,6 @@
Date: 2023-03-02
Package: CHNOSZ
-Version: 1.9.9-62
+Version: 1.9.9-63
Title: Thermodynamic Calculations and Diagrams for Geochemistry
Authors at R: c(
person("Jeffrey", "Dick", , "j3ffdick at gmail.com", role = c("aut", "cre"),
Modified: pkg/CHNOSZ/NAMESPACE
===================================================================
--- pkg/CHNOSZ/NAMESPACE 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/NAMESPACE 2023-03-02 10:40:47 UTC (rev 771)
@@ -20,7 +20,7 @@
"mass", "entropy", "GHS", "water",
"i2A",
"dPdTtr", "Ttr",
- "count.aa", "nucleic.complement", "nucleic.formula",
+ "count.aa",
"rho.IAPWS95", "IAPWS95", "water.AW90", "WP02.auxiliary", "water.IAPWS95",
"getrank", "parent", "sciname", "allparents", "getnodes", "getnames",
"protein.OBIGT", "which.pmax",
@@ -41,7 +41,7 @@
"checkEOS", "checkGHS", "check.OBIGT",
"basis.elements", "element.mu", "ibasis",
"water.SUPCRT92",
- "nonideal", "uniprot.aa",
+ "nonideal",
# added 20170301 or later
"GHS_Tr", "calculateDensity", "calculateGibbsOfWater",
"calculateEpsilon", "calculateQ", "water.DEW", "Berman",
Modified: pkg/CHNOSZ/R/util.fasta.R
===================================================================
--- pkg/CHNOSZ/R/util.fasta.R 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/R/util.fasta.R 2023-03-02 10:40:47 UTC (rev 771)
@@ -105,50 +105,6 @@
} else return(sequences)
}
-uniprot.aa <- function(protein, start=NULL, stop=NULL) {
- # Download protein sequence information from UniProt
- iprotein <- numeric()
- # Construct the initial URL
- proteinURL <- paste("https://www.uniprot.org/uniprot/", protein, sep="")
- message("uniprot.aa: trying ", proteinURL, " ...", appendLF=FALSE)
- # Try loading the URL, hiding any warnings
- oldopt <- options(warn=-1)
- URLstuff <- try(readLines(proteinURL),TRUE)
- options(oldopt)
- if(inherits(URLstuff, "try-error")) {
- message(" ::: FAILED :::")
- return(NA)
- }
- # 20091102: Look for a link to a fasta file
- link <- grep("/uniprot/.*fasta", URLstuff)
- if(length(link) > 0) linkline <- URLstuff[[link[1]]]
- else {
- message(" ::: FAILED :::")
- return(NA)
- }
- # Extract accession number from the link
- linkhead <- strsplit(linkline, ".fasta", fixed=TRUE)[[1]][1]
- accession.number <- tail(strsplit(linkhead, "/uniprot/", fixed=TRUE)[[1]], 1)
- message(" accession ", accession.number, " ...")
- # Now download the fasta file
- fastaURL <- paste("https://www.uniprot.org/uniprot/", accession.number, ".fasta", sep="")
- URLstuff <- readLines(fastaURL)
- # Get the header information / show the user
- header <- URLstuff[[1]]
- header3 <- strsplit(header, "|", fixed=TRUE)[[1]][3]
- headerP_O <- strsplit(header3, " ")[[1]][1]
- header.id <- strsplit(header, headerP_O)[[1]][1]
- header.id <- substr(header.id, 2, nchar(header.id)-1)
- header.organism <- strsplit(headerP_O, "_")[[1]][2]
- message(paste0(header), appendLF=FALSE)
- # 20130206 Use read.fasta with lines, start, stop arguments
- aa <- read.fasta(file="", lines=URLstuff, start=start, stop=stop)
- message(" (length ", sum(aa[1, 6:25]), ")", sep="")
- aa$protein <- header.id
- aa$organism <- header.organism
- return(aa)
-}
-
count.aa <- function(seq, start=NULL, stop=NULL, type="protein") {
# Count amino acids or DNA bases in one or more sequences given as elements of the list seq
if(type=="protein") letts <- aminoacids(1)
Modified: pkg/CHNOSZ/R/util.seq.R
===================================================================
--- pkg/CHNOSZ/R/util.seq.R 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/R/util.seq.R 2023-03-02 10:40:47 UTC (rev 771)
@@ -31,45 +31,3 @@
else if(nchar=="Z") return(aacharged[iaa])
}
-nucleic.formula <- function(nucleic=NULL) {
- # Compute the formula, e.g.
- # DNA <- count.aa(list("AGCT", "TTTT"), type="DNA") # a dataframe of counts
- # nf <- nucleic.formula(DNA) # a series of formulas
- # FIXME: This only adds the formulas of the nucleobases; dehydration and phosphorylation are not yet accounted for!
- # 20090926 jmd
- letts <- c("A", "C", "G", "T", "U")
- names <- c("adenine", "cytosine", "guanine", "thymine", "uracil")
- # The locations of the letters in the data frame
- i.lett <- match(letts, colnames(nucleic))
- # We'll normally have at least one NA (U or A for DNA or RNA)
- ina <- is.na(i.lett)
- # The species indices of the bases, in the order appearing above
- i.base <- suppressMessages(info(names[!ina]))
- # The chemical formula of bases
- f.base <- get("thermo", CHNOSZ)$OBIGT$formula[i.base]
- # Loop over the base counts
- f.out <- character()
- for(i in 1:nrow(nucleic)) {
- # Use makeup() with multipliers and sum=TRUE 20120119 jmd
- f <- as.chemical.formula(makeup(f.base, multiplier=as.numeric(nucleic[i, i.lett[!ina]]), sum=TRUE))
- f.out <- c(f.out, f)
- }
- return(f.out)
-}
-
-nucleic.complement <- function(nucleic=NULL, type="DNA") {
- # Return the nucleobase complement
- # nucleic.complement(nucleic, "DNA") # DNA complement
- # nucleic.complement(nucleic, "RNA") # RNA complement
- # The reference sequence, and its DNA and RNA complements
- ref <- c("A", "C", "G", "T", "U")
- DNA <- c("T", "G", "C", "A", "A")
- RNA <- c("U", "G", "C", "A", "A")
- iref <- match(colnames(nucleic), ref)
- i.base <- which(!is.na(iref))
- colnames(nucleic)[i.base] <- get(type)[iref[i.base]]
- # Be nice and re-alphabetize the columns
- o.base <- order(colnames(nucleic)[i.base])
- nucleic <- nucleic[, i.base[o.base], drop=FALSE]
- return(nucleic)
-}
Modified: pkg/CHNOSZ/inst/CHECKLIST
===================================================================
--- pkg/CHNOSZ/inst/CHECKLIST 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/CHECKLIST 2023-03-02 10:40:47 UTC (rev 771)
@@ -1,6 +1,6 @@
****************************
Release checklist for CHNOSZ
- (updated 2022-02-08)
+ (updated 2023-03-02)
****************************
- Run examples() and demos() and inspect their output (especially plots)
@@ -8,8 +8,6 @@
- Run the package tests with:
suppressMessages(tinytest::test_package("CHNOSZ", at_home = TRUE))
-- Check that uniprot.aa() works with current UniProt web pages
-
- Check reverse dependencies on CRAN: canprot, accucor, ecipex, iemisc, LipidMS as of 2019-08-02
**********************
Modified: pkg/CHNOSZ/inst/NEWS.Rd
===================================================================
--- pkg/CHNOSZ/inst/NEWS.Rd 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/NEWS.Rd 2023-03-02 10:40:47 UTC (rev 771)
@@ -12,7 +12,7 @@
% links to vignettes 20220723
\newcommand{\viglink}{\ifelse{html}{\out{<a href="../CHNOSZ/doc/#1.html"><strong>#1.Rmd</strong></a>}}{\bold{#1.Rmd}}}
-\section{Changes in CHNOSZ version 1.9.9-57 (2023-02-15)}{
+\section{Changes in CHNOSZ version 1.9.9-63 (2023-03-02)}{
\subsection{MAJOR USER-VISIBLE CHANGES}{
\itemize{
@@ -161,14 +161,12 @@
\code{subcrt()} should be used for all calculations of thermodynamic
properties.
- \item \code{EOSregress()} and the associated demo and vignette have been
- removed.
+ \item Remove \code{EOSregress()} and the associated demo and vignette.
- \item Remove parallel calculations in \code{read.fasta()} and
- \code{count.aa()} (they just made things slower in my tests).
+ \item Remove parallel calculations in \code{read.fasta()} and \code{count.aa()}.
- \item Remove \code{revisit()} and \code{findit()} (functions for
- \dQuote{computations on chemical activities} in the extended workflow).
+ \item Remove \code{revisit()}, \code{findit()}, \code{uniprot.aa()},
+ \code{nucleic.formula()}, and \code{nucleic.complement()}.
}
}
Modified: pkg/CHNOSZ/inst/tinytest/test-util.seq.R
===================================================================
--- pkg/CHNOSZ/inst/tinytest/test-util.seq.R 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/inst/tinytest/test-util.seq.R 2023-03-02 10:40:47 UTC (rev 771)
@@ -8,14 +8,9 @@
expect_equal(count.aa(myseq, start = 6)[, "A"], 0, info = info)
expect_equal(count.aa(myseq, start = 5, stop = 6)[, c("A", "G")], c(1, 1), check.attributes = FALSE, info = info)
-info <- "Nucleobase sequences can be processed with count.aa(), nucleic.formula() and nucleic.complement()"
+info <- "Nucleobase sequences can be processed with count.aa()"
expect_message(dna <- count.aa("ABCDEFGHIJ", type = "DNA"), "count.aa: unrecognized letter\\(s\\) in DNA sequence: B D E F H I J", info = info)
expect_equal(as.numeric(dna), c(1, 1, 1, 0), info = info)
-expect_equal(nucleic.formula(dna), "C14H15N13O2", info = info)
-# Nucleobases can be in any order
-expect_equal(nucleic.formula(dna[, 4:1, drop = FALSE]), "C14H15N13O2", info = info)
-# ACG -> UGC (RNA complement)
-expect_equal(nucleic.formula(nucleic.complement(dna, "RNA")), "C13H14N10O4", info = info)
info <- "count.aa() correctly processes a longer nucleobase sequence"
seq <- "ATGTCCCGTTTCTTAGTTGCATTGGTTGCCGCACTTTTAGGAGTTGCAATTGAGATGTCCCTTCTCGTTCGCGCTCAGGGGCAGCAAACCTTGCTTTTGGCTGAAGAAAGCAAGCATTTGTCGCAATTGCGTCAACTGACTTTTGAAGGCACCAATGCCGAAGCGTATTGGTCGCCTGACGGGAAATGGTTGGTCTTTCAATCCACACGCCCACCTTACAAGGCTGACCAAATCTTCATCATGAGAGCGGATGGCTCGGGAGTTCGTGTCGTCAGCACGGGCAAAGGTCGTTGCACTTGTGCCTATTTCACGCCAGATGGCAAAGGCGTTATCTTTGCTACGACCCACCTTGCTGGACCAGAACCGCCGCAAGTGCCCAAACTGGACATTCCACGCTATGTTTGGGGCGTGTTCCCAAGTTACGAACTTTACCTGCGGCGTTTGGACACGATGGAACTTATCCGCTTGACCGATAACGAAGGCTACGACGCTGAAGCGACCATTTGCTGGAAGACTGGGCGAATTGTCTTCACAAGTTACCGCAATGGCGACCTTGACCTTTACAGCATGAAATTAGACGGCAGCGATTTGAAGCGATTGACGAAAACCATCGGCTACGAGGGCGGAGCGTTCTACTCGCCCGACGGGAAGCGGATTGTCTTCCGAGCCTATTTGCCAAAGACGCCTGACGAAATTGACGAATACAAGCGGTTGCTCCAGTTAGGCGTCATAAGCCCACCAAAGATGGAGTGGGTCGTCATGGACGCCGACGGTCGCAACATGAAGCAAATC"
Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/add.protein.Rd 2023-03-02 10:40:47 UTC (rev 771)
@@ -73,7 +73,7 @@
}
\seealso{
-\code{\link{read.fasta}} and \code{\link{uniprot.aa}} for other ways of getting amino acid compositions that can be used with \code{add.protein}.
+\code{\link{read.fasta}} for another way of getting amino acid compositions that can be used with \code{add.protein}.
\code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species).
}
Modified: pkg/CHNOSZ/man/util.fasta.Rd
===================================================================
--- pkg/CHNOSZ/man/util.fasta.Rd 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/util.fasta.Rd 2023-03-02 10:40:47 UTC (rev 771)
@@ -2,7 +2,6 @@
\name{util.fasta}
\alias{util.fasta}
\alias{read.fasta}
-\alias{uniprot.aa}
\alias{count.aa}
\title{Functions for Reading FASTA Files and Downloading from UniProt}
@@ -14,7 +13,6 @@
read.fasta(file, iseq = NULL, ret = "count", lines = NULL,
ihead = NULL, start=NULL, stop=NULL, type="protein", id = NULL)
count.aa(seq, start=NULL, stop=NULL, type="protein")
- uniprot.aa(protein, start=NULL, stop=NULL)
}
\arguments{
@@ -28,7 +26,6 @@
\item{type}{character, sequence type (protein or DNA)}
\item{id}{character, value to be used for \code{protein} in output table}
\item{seq}{character, amino acid sequence of a protein}
- \item{protein}{character, entry name for protein in UniProt}
}
\details{
@@ -47,10 +44,6 @@
A warning is generated if any character in \code{seq}, excluding spaces, is not one of the single-letter amino acid or nucleobase abbreviations.
\code{start} and/or \code{stop} can be provided to count a fragment of the sequence (extracted using \code{\link{substr}}).
If only one of \code{start} or \code{stop} is present, the other defaults to 1 (\code{start}) or the length of the sequence (\code{stop}).
-
-\code{uniprot.aa} returns a data frame of amino acid composition, in the format of \code{thermo()$protein}, retrieved from the protein sequence if it is available from UniProt (\url{https://www.uniprot.org/}).
-The \code{protein} argument corresponds to the \samp{Entry name} on the UniProt search pages.
-
}
\value{
@@ -59,7 +52,6 @@
\seealso{
\code{\link{seq2aa}}, like \code{count.aa}, counts amino acids in a user-input sequence, but returns a data frame in the format of \code{thermo()$protein}.
-\code{\link{nucleic.formula}} for an example of counting nucleobases in a DNA sequence.
}
\examples{\dontshow{reset()}
@@ -74,23 +66,6 @@
protein.length(aa)
\dontrun{
-# download amino acid composition of a protein
-# start at position 2 to remove the initiator methionine
-aa <- uniprot.aa("ALAT1_HUMAN", start=2)
-# change the name from "sp|P24298" to "ALAT1" 20201110
-aa$protein <- "ALAT1"
-# add it to thermo()$protein
-ip <- add.protein(aa)
-# now it's possible to calculate some properties
-protein.length(ip)
-protein.formula(ip)
-subcrt("ALAT1_HUMAN", c("cr", "aq"), c(-1, 1))
-# the amino acid composition can be saved for future use
-write.csv(aa, "saved.aa.csv", row.names=FALSE)
-# in another R session, the protein can be loaded without using uniprot.aa()
-aa <- read.csv("saved.aa.csv", as.is=TRUE)
-add.protein(aa)
-
## count amino acids in a sequence
count.aa("GGSGG")
# warnings are issued for unrecognized characters
Modified: pkg/CHNOSZ/man/util.seq.Rd
===================================================================
--- pkg/CHNOSZ/man/util.seq.Rd 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/man/util.seq.Rd 2023-03-02 10:40:47 UTC (rev 771)
@@ -2,26 +2,20 @@
\name{util.seq}
\alias{util.seq}
\alias{aminoacids}
-\alias{nucleic.formula}
-\alias{nucleic.complement}
\title{Functions to Work with Sequence Data}
\description{
- Return one- or three-letter abbreviations of amino acids; count nucleotides in nucleic acid sequences, calculate DNA and RNA complements of nucleic acid sequences.
+ Return names or one- or three-letter abbreviations of amino acids.
}
\usage{
- aminoacids(nchar=1, which=NULL)
- nucleic.formula(nucleic = NULL)
- nucleic.complement(nucleic = NULL, type="DNA")
+ aminoacids(nchar = 1, which = NULL)
}
\arguments{
\item{nchar}{numeric, \eqn{1} to return one-letter, \eqn{3} to return three-letter abbreviations for amino acids}
\item{which}{character, which amino acids to name}
- \item{nucleic}{data frame, counts of nucleic-acid bases}
- \item{type}{character, target type of nucleic acid (DNA or RNA)}
}
\details{
@@ -29,13 +23,6 @@
\code{aminoacids} returns the one-letter abbreviations (\code{nchar}=\samp{1}) or the three-letter abbreviations (\code{nchar}=\samp{3}) or the names of the neutral amino acids (\code{nchar}=\samp{""}) or the names of the amino acids with ionized side chains (\code{nchar}=\samp{"Z"}).
The output includes 20 amino acids in alphabetic order by 1-letter abbreviation (the order used in \code{thermo()$protein}), unless \code{which} is provided, indicating the desired amino acids (either as 1- or 3-letter abbreviations or names of the neutral amino acids).
-
-\code{nucleic.formula} returns a string representation of the chemical formula for each nucleic-acid composition contained in \code{nucleic}.
-The names of the bases are indicated by the column names of \code{nucleic}.
-At present, the formula is computed as the sum of the chemical formulas of the bases themselves, with no contribution from polymerization (dehydration) or phosphorylation.
-
-\code{nucleic.complement} calculates the complement of the base composition given in \code{nucleic}.
-\code{type} specifies the type of nucleic acid of the complement - \samp{DNA} (A, G, C, T) or \samp{RNA} (A, G, C, U).
}
\seealso{\code{\link{count.aa}} for counting amino acids or nucleic-acid bases in a sequence; \code{\link{protein.formula}} for calculating the chemical formulas of proteins.}
@@ -42,13 +29,7 @@
\examples{\dontshow{reset()}
## count nucleobases in a sequence
-bases <- count.aa("ACCGGGTTT", type="DNA")
-# the DNA complement of that sequence
-DNA.comp <- nucleic.complement(bases)
-# the RNA complement of the DNA complement
-RNA.comp <- nucleic.complement(DNA.comp, type="RNA")
-# the formula of the RNA complement (bases only)
-nucleic.formula(RNA.comp) # C40H42N32O11
+bases <- count.aa("ACCGGGTTT", type = "DNA")
}
\concept{Protein properties}
Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd 2023-03-02 10:06:03 UTC (rev 770)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd 2023-03-02 10:40:47 UTC (rev 771)
@@ -1444,67 +1444,10 @@
ILLISFLIFLIVG
", "PRIO_HUMAN")
```
-<span style="color:green">`uniprot.aa()`</span> returns the amino acid composition of a single amino acid sequence downloaded from UniProt.
-To get sequences for many proteins, use R's `lapply()`, `do.call()`, and `rbind()`:
-```{r uniprot_aa, eval=FALSE}
-IDs <- c("ALAT1_HUMAN", "P02452")
-aa <- lapply(IDs, uniprot.aa)
-## uniprot.aa: trying http://www.uniprot.org/uniprot/ALAT1_HUMAN ... accession P24298 ...
-## >sp|P24298|ALAT1_HUMAN Alanine aminotransferase 1 OS=Homo sapiens GN=GPT PE=1 SV=3 (length 496)
-## uniprot.aa: trying http://www.uniprot.org/uniprot/P02452 ... accession P02452 ...
-## >sp|P02452|CO1A1_HUMAN Collagen alpha-1(I) chain OS=Homo sapiens GN=COL1A1 PE=1 SV=5 (length 1464)
-aa_UniProt <- do.call(rbind, aa)
-```
-```{r uniprot_aa_offline, echo=FALSE}
-aa_ALAT1 <- seq2aa("
-MASSTGDRSQAVRHGLRAKVLTLDGMNPRVRRVEYAVRGPIVQRALELEQELRQGVKKPF
-TEVIRANIGDAQAMGQRPITFLRQVLALCVNPDLLSSPNFPDDAKKRAERILQACGGHSL
-GAYSVSSGIQLIREDVARYIERRDGGIPADPNNVFLSTGASDAIVTVLKLLVAGEGHTRT
-GVLIPIPQYPLYSATLAELGAVQVDYYLDEERAWALDVAELHRALGQARDHCRPRALCVI
-NPGNPTGQVQTRECIEAVIRFAFEERLFLLADEVYQDNVYAAGSQFHSFKKVLMEMGPPY
-AGQQELASFHSTSKGYMGECGFRGGYVEVVNMDAAVQQQMLKLMSVRLCPPVPGQALLDL
-VVSPPAPTDPSFAQFQAEKQAVLAELAAKAKLTEQVFNEAPGISCNPVQGAMYSFPRVQL
-PPRAVERAQELGLAPDMFFCLRLLEETGICVVPGSGFGQREGTYHFRMTILPPLEKLRLL
-LEKLSRFHAKFTLEYS
-", "ALAT1_HUMAN")
-aa_CO1A1 <- seq2aa("
-MFSFVDLRLLLLLAATALLTHGQEEGQVEGQDEDIPPITCVQNGLRYHDRDVWKPEPCRI
-CVCDNGKVLCDDVICDETKNCPGAEVPEGECCPVCPDGSESPTDQETTGVEGPKGDTGPR
-GPRGPAGPPGRDGIPGQPGLPGPPGPPGPPGPPGLGGNFAPQLSYGYDEKSTGGISVPGP
-MGPSGPRGLPGPPGAPGPQGFQGPPGEPGEPGASGPMGPRGPPGPPGKNGDDGEAGKPGR
-PGERGPPGPQGARGLPGTAGLPGMKGHRGFSGLDGAKGDAGPAGPKGEPGSPGENGAPGQ
-MGPRGLPGERGRPGAPGPAGARGNDGATGAAGPPGPTGPAGPPGFPGAVGAKGEAGPQGP
-RGSEGPQGVRGEPGPPGPAGAAGPAGNPGADGQPGAKGANGAPGIAGAPGFPGARGPSGP
-QGPGGPPGPKGNSGEPGAPGSKGDTGAKGEPGPVGVQGPPGPAGEEGKRGARGEPGPTGL
-PGPPGERGGPGSRGFPGADGVAGPKGPAGERGSPGPAGPKGSPGEAGRPGEAGLPGAKGL
-TGSPGSPGPDGKTGPPGPAGQDGRPGPPGPPGARGQAGVMGFPGPKGAAGEPGKAGERGV
-PGPPGAVGPAGKDGEAGAQGPPGPAGPAGERGEQGPAGSPGFQGLPGPAGPPGEAGKPGE
-QGVPGDLGAPGPSGARGERGFPGERGVQGPPGPAGPRGANGAPGNDGAKGDAGAPGAPGS
-QGAPGLQGMPGERGAAGLPGPKGDRGDAGPKGADGSPGKDGVRGLTGPIGPPGPAGAPGD
-KGESGPSGPAGPTGARGAPGDRGEPGPPGPAGFAGPPGADGQPGAKGEPGDAGAKGDAGP
-PGPAGPAGPPGPIGNVGAPGAKGARGSAGPPGATGFPGAAGRVGPPGPSGNAGPPGPPGP
-AGKEGGKGPRGETGPAGRPGEVGPPGPPGPAGEKGSPGADGPAGAPGTPGPQGIAGQRGV
-VGLPGQRGERGFPGLPGPSGEPGKQGPSGASGERGPPGPMGPPGLAGPPGESGREGAPGA
-EGSPGRDGSPGAKGDRGETGPAGPPGAPGAPGAPGPVGPAGKSGDRGETGPAGPTGPVGP
-VGARGPAGPQGPRGDKGETGEQGDRGIKGHRGFSGLQGPPGPPGSPGEQGPSGASGPAGP
-RGPPGSAGAPGKDGLNGLPGPIGPPGPRGRTGDAGPVGPPGPPGPPGPPGPPSAGFDFSF
-LPQPPQEKAHDGGRYYRADDANVVRDRDLEVDTTLKSLSQQIENIRSPEGSRKNPARTCR
-DLKMCHSDWKSGEYWIDPNQGCNLDAIKVFCNMETGETCVYPTQPSVAQKNWYISKNPKD
-KRHVWFGESMTDGFQFEYGGQGSDPADVAIQLTFLRLMSTEASQNITYHCKNSVAYMDQQ
-TGNLKKALLLQGSNEIEIRAEGNSRFTYSVTVDGCTSHTGAWGKTVIEYKTTKTSRLPII
-DVAPLDVGAPDQEFGFDVGPVCFL
-", "CO1A1_HUMAN")
-aa_UniProt <- rbind(aa_ALAT1, aa_CO1A1)
-aa_UniProt$abbrv <- c("ALAT1", "CO1A1")
-```
-```{r aa_UniProt, cache=TRUE}
-aa_UniProt
-```
-
These amino acid compositions can be processed using functions such as <span style="color:green">`protein.length()`</span> and <span style="color:green">`protein.formula()`</span>:
```{r protein_length}
-myaa <- rbind(aa_Ef, aa_PRIO, aa_ALAT1)
+myaa <- rbind(aa_Ef, aa_PRIO)
protein.length(myaa)
```
More information about the CHNOSZ-commits
mailing list