[CHNOSZ-commits] r174 - in pkg/CHNOSZ: . R data inst inst/extdata/thermo man tests/testthat vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Feb 24 07:46:29 CET 2017
Author: jedick
Date: 2017-02-24 07:46:29 +0100 (Fri, 24 Feb 2017)
New Revision: 174
Modified:
pkg/CHNOSZ/DESCRIPTION
pkg/CHNOSZ/R/add.protein.R
pkg/CHNOSZ/R/protein.info.R
pkg/CHNOSZ/data/protein.csv
pkg/CHNOSZ/inst/NEWS
pkg/CHNOSZ/inst/extdata/thermo/obigt_check.csv
pkg/CHNOSZ/man/add.protein.Rd
pkg/CHNOSZ/man/protein.Rd
pkg/CHNOSZ/man/protein.info.Rd
pkg/CHNOSZ/man/util.fasta.Rd
pkg/CHNOSZ/tests/testthat/test-add.protein.R
pkg/CHNOSZ/tests/testthat/test-affinity.R
pkg/CHNOSZ/vignettes/anintro.Rmd
pkg/CHNOSZ/vignettes/hotspring.Rnw
pkg/CHNOSZ/vignettes/hotspring.lyx
Log:
remove read.aa()
Modified: pkg/CHNOSZ/DESCRIPTION
===================================================================
--- pkg/CHNOSZ/DESCRIPTION 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/DESCRIPTION 2017-02-24 06:46:29 UTC (rev 174)
@@ -1,6 +1,6 @@
Date: 2017-02-24
Package: CHNOSZ
-Version: 1.0.8-63
+Version: 1.0.8-64
Title: Chemical Thermodynamics and Activity Diagrams
Author: Jeffrey Dick
Maintainer: Jeffrey Dick <j3ffdick at gmail.com>
Modified: pkg/CHNOSZ/R/add.protein.R
===================================================================
--- pkg/CHNOSZ/R/add.protein.R 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/R/add.protein.R 2017-02-24 06:46:29 UTC (rev 174)
@@ -5,7 +5,6 @@
# add.protein - add amino acid counts to thermo$protein (returns iprotein)
# seq2aa - calculate amino acid counts from a sequence
# aasum - combine amino acid counts (sum, average, or weighted sum by abundance)
-# read.aa - read amino acid counts from a file
seq2aa <- function(protein, sequence) {
# remove newlines and whitespace
@@ -55,21 +54,12 @@
return(out)
}
-read.aa <- function(file="protein.csv", ...) {
- # 20090428 added colClasses here
- # 20140128 added as.is=TRUE (in case numeric values are stored in ref or abbrv column)
- aa <- read.csv(file, colClasses=c(rep("character", 2), NA, NA, rep("numeric", 21)), as.is=TRUE, ...)
- if(!identical(colnames(aa), colnames(get("thermo")$protein)))
- stop(paste("format of", file, "is incompatible with thermo$protein"))
- return(aa)
-}
-
add.protein <- function(aa) {
# add a properly constructed data frame of
# amino acid counts to thermo$protein
thermo <- get("thermo")
if(!identical(colnames(aa), colnames(thermo$protein)))
- stop("the value of 'aa' is not a data frame with the same columns as thermo$protein")
+ stop("'aa' does not have the same columns as thermo$protein")
# find any protein IDs that are duplicated
po <- paste(aa$protein, aa$organism, sep="_")
ip <- pinfo(po)
Modified: pkg/CHNOSZ/R/protein.info.R
===================================================================
--- pkg/CHNOSZ/R/protein.info.R 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/R/protein.info.R 2017-02-24 06:46:29 UTC (rev 174)
@@ -8,7 +8,7 @@
# protein.basis: coefficients of basis species in formation reactions of [ionized] proteins [residues]
# protein.equil: step-by-step example of protein equilibrium calculation
-pinfo <- function(protein, organism=NULL, residue=FALSE) {
+pinfo <- function(protein, organism=NULL, residue=FALSE, regexp=FALSE) {
# return the `protein` (possibly per residue) for:
# dataframe `protein`
# return the rownumber(s) of thermo$protein for:
@@ -29,11 +29,19 @@
# compute per-residue counts if requested
if(residue) out[, 5:25] <- out[, 5:25]/rowSums(out[, 6:25])
} else {
- # search for protein or protein_organism in thermo$protein
- t_p_names <- paste(t_p$protein, t_p$organism, sep="_")
- if(is.null(organism)) my_names <- protein
- else my_names <- paste(protein, organism, sep="_")
- iprotein <- match(my_names, t_p_names)
+ # search for protein by regular expression
+ if(regexp) {
+ iprotein <- grepl(protein, t_p$protein)
+ iorganism <- iprotein
+ if(!is.null(organism)) iorganism <- grepl(organism, t_p$organism)
+ iprotein <- which(iprotein & iorganism)
+ } else {
+ # search for protein or protein_organism in thermo$protein
+ t_p_names <- paste(t_p$protein, t_p$organism, sep="_")
+ if(is.null(organism)) my_names <- protein
+ else my_names <- paste(protein, organism, sep="_")
+ iprotein <- match(my_names, t_p_names)
+ }
out <- iprotein
}
out
Modified: pkg/CHNOSZ/data/protein.csv
===================================================================
--- pkg/CHNOSZ/data/protein.csv 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/data/protein.csv 2017-02-24 06:46:29 UTC (rev 174)
@@ -468,3 +468,41 @@
PTC1,HUMAN,UniProt,Q13635,1,116,27,63,78,64,101,42,62,50,153,31,49,102,57,79,105,83,108,21,56
SMO,HUMAN,UniProt,Q99835,1,70,26,28,36,37,57,15,33,29,71,14,25,61,26,47,52,43,53,19,18
GLI3R,HUMAN,UniProt,P10071,1,10,0,6,4,5,5,12,7,0,7,5,3,26,0,7,17,6,3,0,8
+HXA1,HUMAN,UniProt,P49639,1,25,7,9,17,9,26,24,7,14,19,5,16,25,20,14,46,20,14,2,16
+HXA2,HUMAN,UniProt,O43364,1,31,8,16,29,16,22,10,9,20,38,4,18,30,23,15,42,24,13,2,6
+HXA3,HUMAN,UniProt,O43365,1,49,5,8,15,8,44,18,6,19,28,8,17,75,27,15,49,20,9,2,21
+HXA4,HUMAN,UniProt,Q00056,1,37,3,4,14,6,28,15,7,16,21,5,8,52,19,20,24,14,10,3,14
+HXA5,HUMAN,UniProt,P20719,1,30,3,10,14,8,25,12,8,10,11,7,10,20,11,21,41,8,5,3,13
+HXA6,HUMAN,UniProt,P31267,1,16,5,10,12,10,20,4,5,13,16,4,9,15,15,17,28,10,4,3,17
+HXA7,HUMAN,UniProt,P31268,1,33,5,13,20,9,18,4,6,11,14,3,10,12,8,15,18,11,4,3,13
+HXA9,HUMAN,UniProt,P31269,1,33,4,14,15,9,17,12,4,14,21,5,13,26,8,19,17,15,11,4,11
+HXA10,HUMAN,UniProt,P31260,1,43,9,14,22,12,59,5,6,19,32,6,13,51,16,22,43,14,7,3,14
+HXA11,HUMAN,UniProt,P31270,1,32,6,12,21,12,20,6,5,15,18,5,12,25,12,24,39,18,13,2,16
+HXA13,HUMAN,UniProt,P31271,1,93,6,8,17,10,35,11,6,19,19,10,13,37,13,16,29,12,13,5,16
+HXB1,HUMAN,UniProt,P14653,1,29,5,9,19,11,29,4,2,12,16,4,11,39,15,15,37,17,8,2,17
+HXB2,HUMAN,UniProt,P14652,1,39,9,14,29,19,27,4,7,12,35,3,6,53,16,20,33,14,11,3,2
+HXB3,HUMAN,UniProt,P14651,1,42,7,8,15,10,64,15,4,22,28,10,20,60,21,13,46,19,6,2,19
+HXB4,HUMAN,UniProt,P17483,1,22,7,5,13,6,19,7,5,11,13,4,9,46,10,23,21,6,10,3,11
+HXB5,HUMAN,UniProt,P09067,1,30,3,9,15,11,16,7,7,10,16,8,11,19,12,19,46,13,3,3,11
+HXB6,HUMAN,UniProt,P17509,1,18,6,6,20,10,17,4,4,12,14,4,5,19,13,17,23,10,4,3,15
+HXB7,HUMAN,UniProt,P09629,1,26,4,5,18,10,22,3,5,10,11,6,8,10,11,17,20,13,3,3,12
+HXB9,HUMAN,UniProt,P17482,1,21,3,6,22,7,17,7,6,16,20,5,8,24,14,17,26,8,8,4,11
+HXB13,HUMAN,UniProt,Q92826,1,33,7,9,14,8,26,5,6,16,19,4,8,33,13,16,22,14,12,4,15
+HXC4,HUMAN,UniProt,P09017,1,20,5,6,18,4,12,14,10,12,13,5,9,35,16,20,31,12,5,3,14
+HXC5,HUMAN,UniProt,Q00444,1,25,4,6,14,5,12,6,8,14,13,8,12,20,12,17,20,9,3,3,11
+HXC6,HUMAN,UniProt,P09630,1,14,3,8,17,9,18,5,9,11,14,5,15,8,17,20,24,15,7,3,13
+HXC8,HUMAN,UniProt,P31273,1,13,4,7,26,11,19,10,3,16,15,4,13,14,15,15,26,6,9,3,13
+HXC9,HUMAN,UniProt,P31274,1,24,4,15,14,9,15,7,4,16,18,7,8,27,7,20,24,12,11,4,14
+HXC10,HUMAN,UniProt,Q9NYD6,1,25,9,11,32,9,20,5,6,24,26,8,18,30,10,23,42,18,9,3,14
+HXC11,HUMAN,UniProt,O43248,1,28,8,7,25,15,21,8,5,17,19,6,17,28,9,21,35,8,11,2,14
+HXC12,HUMAN,UniProt,P31275,1,18,5,11,17,11,42,3,4,12,32,2,14,30,9,21,26,4,8,3,10
+HXC13,HUMAN,UniProt,P31276,1,28,7,11,16,7,37,11,7,19,23,4,5,36,13,17,37,12,18,5,17
+HXD1,HUMAN,UniProt,Q9GZZ0,1,48,6,10,13,17,33,5,6,17,25,3,8,36,12,14,34,14,14,2,11
+HXD3,HUMAN,UniProt,P31249,1,44,9,9,19,11,42,15,7,22,29,9,18,59,26,15,48,18,11,2,19
+HXD4,HUMAN,UniProt,P09016,1,18,4,8,12,7,27,9,4,15,15,6,7,33,15,15,24,10,10,3,13
+HXD8,HUMAN,UniProt,P13378,1,36,5,8,21,12,24,11,6,15,14,4,11,37,18,18,17,10,7,3,13
+HXD9,HUMAN,UniProt,P28356,1,43,7,7,20,11,46,6,7,17,17,8,9,36,13,21,43,17,10,4,10
+HXD10,HUMAN,UniProt,P28358,1,16,9,8,30,9,12,3,9,23,24,14,20,27,19,20,41,22,20,3,11
+HXD11,HUMAN,UniProt,P31277,1,48,6,11,19,16,50,2,4,16,16,6,10,41,15,21,23,7,10,2,15
+HXD12,HUMAN,UniProt,P35452,1,39,4,7,13,10,24,0,4,16,28,3,11,28,15,19,17,10,9,3,10
+HXD13,HUMAN,UniProt,P35453,1,54,5,11,13,11,32,7,7,18,14,7,11,23,13,19,45,12,20,5,16
Modified: pkg/CHNOSZ/inst/NEWS
===================================================================
--- pkg/CHNOSZ/inst/NEWS 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/inst/NEWS 2017-02-24 06:46:29 UTC (rev 174)
@@ -1,4 +1,4 @@
-CHANGES IN CHNOSZ 1.0.8-63 (2017-02-24)
+CHANGES IN CHNOSZ 1.0.8-64 (2017-02-24)
---------------------------------------
DOCUMENTATION:
@@ -64,9 +64,6 @@
marks and lines; this is used in diagram() to redraw the axes on
filled diagrams.
-- Add `...` argument to read.aa() (additional arguments for
- read.csv()).
-
- seq2aa() removes newlines and whitespace before counting the
letters in the sequence.
@@ -136,7 +133,8 @@
summary table).
- New function pinfo() merges functionality of old iprotein() and
- ip2aa(), which have been removed (along with protein.info()).
+ ip2aa(). Add `regexp` argument to control whether matches are made
+ using a regular expression.
- Rename aa2eos() to protein.obigt().
@@ -145,6 +143,8 @@
- Remove stress() and stress.csv; move data from Tai et al., 2005 (used
in an example in ?read.expr) to TBD+05.csv.
+- Remove read.aa() - replaced by read.csv() with as.is=TRUE.
+
CHANGES IN CHNOSZ 1.0.8 (2016-05-28)
------------------------------------
Modified: pkg/CHNOSZ/inst/extdata/thermo/obigt_check.csv
===================================================================
--- pkg/CHNOSZ/inst/extdata/thermo/obigt_check.csv 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/inst/extdata/thermo/obigt_check.csv 2017-02-24 06:46:29 UTC (rev 174)
@@ -210,34 +210,34 @@
"OBIGT",1793,"Gly-Tyr-Gly","aq",,-190.24,
"OBIGT",1794,"Gly-Val-Gly","aq",,-155.72,
"OBIGT",1795,"[GXGBB]","aq",,-98.93,
-"OBIGT",1833,"methyldiethanolamine","aq",1.61,,
-"OBIGT",1865,"MgAsO4-","aq",1.3,,
-"OBIGT",1868,"MnAsO4-","aq",-1.45,,
-"OBIGT",1964,"antigorite","cr1",,,812
-"OBIGT",1998,"clinochlore,7a","cr1",,,666
-"OBIGT",2017,"daphnite,14a","cr",,,-836
-"OBIGT",2042,"ferrosilite","cr1",,,694
-"OBIGT",2043,"ferrosilite","cr2",,,694
-"OBIGT",2058,"greenalite","cr",,,142507
-"OBIGT",2073,"hydromagnesite","cr",,,-2569
-"OBIGT",2204,"n-octadecane","cr",-2.63,,
-"OBIGT",2205,"n-nonadecane","cr",-13.32,,
-"OBIGT",2206,"n-eicosane","cr",-2.79,,
-"OBIGT",2207,"n-heneicosane","cr",-8.61,,
-"OBIGT",2208,"n-docosane","cr",-2.63,,
-"OBIGT",2209,"n-tricosane","cr",-5.22,,
-"OBIGT",2210,"n-tetracosane","cr",-2.02,,
-"OBIGT",2211,"n-pentacosane","cr",-2.93,,
-"OBIGT",2212,"n-hexacosane","cr",-1.29,,
-"OBIGT",2213,"n-heptacosane","cr",-1.23,,
-"OBIGT",2265,"carbazole","cr",-43.39,,
-"OBIGT",2306,"triphenylene","cr",,,541
-"OBIGT",2619,"deoxyadenosine","cr",,,-2977
-"OBIGT",2626,"acetamide","cr",-67.91,,
-"OBIGT",2671,"jarosite","cr",,,20697
-"OBIGT",2672,"natrojarosite","cr",,,17554
-"OBIGT",2744,"n-nonacontane","liq",,,635
-"OBIGT",2751,"2-methyloctane","liq",10,,
-"OBIGT",3164,"5,6-dithiadecane","liq",2,,
-"OBIGT",3239,"ethylene","gas",-4.59,,
-"OBIGT",3249,"3,5-dimethylphenol","gas",,,628
+"OBIGT",1839,"methyldiethanolamine","aq",1.61,,
+"OBIGT",1871,"MgAsO4-","aq",1.3,,
+"OBIGT",1874,"MnAsO4-","aq",-1.45,,
+"OBIGT",1970,"antigorite","cr1",,,812
+"OBIGT",2004,"clinochlore,7a","cr1",,,666
+"OBIGT",2023,"daphnite,14a","cr",,,-836
+"OBIGT",2048,"ferrosilite","cr1",,,694
+"OBIGT",2049,"ferrosilite","cr2",,,694
+"OBIGT",2064,"greenalite","cr",,,142507
+"OBIGT",2079,"hydromagnesite","cr",,,-2569
+"OBIGT",2210,"n-octadecane","cr",-2.63,,
+"OBIGT",2211,"n-nonadecane","cr",-13.32,,
+"OBIGT",2212,"n-eicosane","cr",-2.79,,
+"OBIGT",2213,"n-heneicosane","cr",-8.61,,
+"OBIGT",2214,"n-docosane","cr",-2.63,,
+"OBIGT",2215,"n-tricosane","cr",-5.22,,
+"OBIGT",2216,"n-tetracosane","cr",-2.02,,
+"OBIGT",2217,"n-pentacosane","cr",-2.93,,
+"OBIGT",2218,"n-hexacosane","cr",-1.29,,
+"OBIGT",2219,"n-heptacosane","cr",-1.23,,
+"OBIGT",2271,"carbazole","cr",-43.39,,
+"OBIGT",2312,"triphenylene","cr",,,541
+"OBIGT",2625,"deoxyadenosine","cr",,,-2977
+"OBIGT",2632,"acetamide","cr",-67.91,,
+"OBIGT",2677,"jarosite","cr",,,20697
+"OBIGT",2678,"natrojarosite","cr",,,17554
+"OBIGT",2750,"n-nonacontane","liq",,,635
+"OBIGT",2757,"2-methyloctane","liq",10,,
+"OBIGT",3170,"5,6-dithiadecane","liq",2,,
+"OBIGT",3245,"ethylene","gas",-4.59,,
+"OBIGT",3255,"3,5-dimethylphenol","gas",,,628
Modified: pkg/CHNOSZ/man/add.protein.Rd
===================================================================
--- pkg/CHNOSZ/man/add.protein.Rd 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/man/add.protein.Rd 2017-02-24 06:46:29 UTC (rev 174)
@@ -1,7 +1,6 @@
\name{add.protein}
\alias{add.protein}
\alias{seq2aa}
-\alias{read.aa}
\alias{aasum}
\title{Amino Acid Compositions of Proteins}
\description{
@@ -11,7 +10,6 @@
\usage{
add.protein(aa)
seq2aa(protein, sequence)
- read.aa(file = "protein.csv", ...)
aasum(aa, abundance = 1, average = FALSE, protein = NULL, organism = NULL)
}
@@ -19,7 +17,6 @@
\item{aa}{data frame, amino acid composition in the format of \code{thermo$protein}}
\item{protein}{character, name of protein; numeric, indices of proteins (rownumbers of \code{\link{thermo}$protein})}
\item{sequence}{character, protein sequence}
- \item{file}{character, path to file with amino acid compositions}
\item{...}{additional arguments passed to \code{\link{read.csv}}}
\item{abundance}{numeric, abundances of proteins}
\item{average}{logical, return the weighted average of amino acid counts?}
@@ -41,8 +38,6 @@
If \code{average} is TRUE the final sum is divided by the number of input compositions.
The name used in the output is taken from the first row of \code{aa} or from \code{protein} and \code{organism} if they are specified.
-\code{read.aa} returns a data frame of amino acid composition based on the contents of the indicated \code{file}, which should be a CSV file with the same column names as \code{thermo$protein}.
-
Given amino acid composition returned by the \code{*aa} functions described above, \code{add.protein} adds them to \code{thermo$protein} for use by other functions in CHNOSZ.
The amino acid compositions of proteins in \code{aa} with the same name as one in \code{thermo$protein} are replaced.
The value returned by this function is the rownumbers of \code{thermo$protein} that are added and/or replaced.
Modified: pkg/CHNOSZ/man/protein.Rd
===================================================================
--- pkg/CHNOSZ/man/protein.Rd 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/man/protein.Rd 2017-02-24 06:46:29 UTC (rev 174)
@@ -18,7 +18,8 @@
## logfO2-pH potential diagram
# with a charged basis, we calculate properties of ionized proteins
basis("CHNOS+")
-aa <- read.aa(system.file("extdata/protein/DS11.csv", package = "CHNOSZ"))
+file <- system.file("extdata/protein/DS11.csv", package = "CHNOSZ")
+aa <- read.csv(file, as.is=TRUE)
aa <- aa[grep("transferase", aa$protein), ]
ip <- add.protein(aa)
a <- affinity(pH=c(0, 14), O2=c(-64, -61), T=75, iprotein=ip)
Modified: pkg/CHNOSZ/man/protein.info.Rd
===================================================================
--- pkg/CHNOSZ/man/protein.info.Rd 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/man/protein.info.Rd 2017-02-24 06:46:29 UTC (rev 174)
@@ -14,7 +14,7 @@
}
\usage{
- pinfo(protein, organism=NULL, residue=FALSE)
+ pinfo(protein, organism=NULL, residue=FALSE, regexp=FALSE)
protein.length(protein, organism = NULL)
protein.formula(protein, organism = NULL, residue = FALSE)
protein.obigt(protein, organism = NULL, state=get("thermo")$opt$state)
@@ -26,6 +26,7 @@
\item{protein}{character, names of proteins; numeric, species index of proteins; data frame; amino acid composition of proteins}
\item{organism}{character, names of organisms}
\item{residue}{logical, return per-residue values (those of the proteins divided by their lengths)?}
+ \item{regexp}{logical, find matches using regular expressions?}
\item{normalize}{logical, return per-residue values (those of the proteins divided by their lengths)?}
\item{state}{character, physical state}
\item{T}{numeric, temperature in \eqn{^{\circ}}{°}C}
@@ -38,6 +39,9 @@
The names can be supplied in the single \code{protein} argument (with an underscore, denoting protein_organism) or as pairs of \code{protein}s and \code{organism}s.
NA is returned for any unmatched proteins, including those for which no \code{organism} is given or that do not have an underscore in \code{protein}.
+Alternatively, if \code{regexp} is TRUE, the \code{protein} argument is used as a pattern (regular expression); rownumbers of all matches of \code{thermo$protein$protein} to this pattern are returned.
+When using \code{regexp}, the \code{organism} can optionally be provided to return only those entries that also match \code{thermo$protein$organism}.
+
For numeric \code{protein}, \code{pinfo} returns the corresponding row(s) of \code{thermo$protein}.
Set \code{residue} to TRUE to return the per-residue composition (i.e. amino acid composition of the protein divided by total number of residues).
@@ -115,7 +119,7 @@
# get amino acid compositions of microbial proteins
# generated from the RefSeq database
file <- system.file("extdata/refseq/protein_refseq.csv.xz", package="CHNOSZ")
-ip <- add.protein(read.aa(file))
+ip <- add.protein(read.csv(file, as.is=TRUE))
# only use those organisms with a certain
# number of sequenced bases
ip <- ip[as.numeric(thermo$protein$abbrv[ip]) > 50000]
@@ -139,7 +143,42 @@
axis(1, 1:15, terms, las=2)
title(main=paste("Average oxidation state of carbon in proteins",
"by taxID in NCBI RefSeq (after Dick, 2014)", sep="\n"))
+
+\dontshow{opar <- par(no.readonly=TRUE)}
+# using pinfo() with regexp=TRUE:
+# plot ZC and nH2O/residue of HOX proteins
+# basis species: glutamine-glutamic acid-cysteine-O2-H2O
+basis("QEC")
+# device setup
+par(mfrow=c(2, 2))
+# a red-blue scale from 1-13
+col <- ZC.col(1:13)
+# axis labels
+ZClab <- axis.label("ZC")
+nH2Olab <- expression(bar(italic(n))[H[2]*O])
+# loop over HOX gene clusters
+for(cluster in c("A", "B", "C", "D")) {
+ # get protein indices
+ pattern <- paste0("^HX", cluster)
+ ip <- pinfo(pattern, "HUMAN", regexp=TRUE)
+ # calculate ZC and nH2O/residue
+ thisZC <- ZC(protein.formula(ip))
+ thisH2O <- protein.basis(ip)[, "H2O"] / protein.length(ip)
+ # plot lines
+ plot(thisZC, thisH2O, type="l", xlab=ZClab, ylab=nH2Olab)
+ # the number of the HOX gene
+ pname <- pinfo(ip)$protein
+ nHOX <- as.numeric(gsub("[A-Za-z]*", "", pname))
+ # plot colored points
+ points(thisZC, thisH2O, pch=19, col=col[nHOX], cex=3.5)
+ points(thisZC, thisH2O, pch=19, col="white", cex=2.5)
+ # plot the number of the HOX gene
+ text(thisZC, thisH2O, nHOX)
+ # add title
+ title(main=paste0("HOX", cluster))
}
+\dontshow{par(opar)}
+}
\references{
Dick, J. M., LaRowe, D. E. and Helgeson, H. C. (2006) Temperature, pressure, and electrochemical constraints on protein speciation: Group additivity calculation of the standard molal thermodynamic properties of ionized unfolded proteins. \emph{Biogeosciences} \bold{3}, 311--336. \url{http://dx.doi.org/10.5194/bg-3-311-2006}
Modified: pkg/CHNOSZ/man/util.fasta.Rd
===================================================================
--- pkg/CHNOSZ/man/util.fasta.Rd 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/man/util.fasta.Rd 2017-02-24 06:46:29 UTC (rev 174)
@@ -105,7 +105,7 @@
# the amino acid composition can be saved for future use
write.csv(aa, "saved.aa.csv", row.names=FALSE)
# in another R session, the protein can be loaded without using uniprot.aa()
-aa <- read.aa("saved.aa.csv")
+aa <- read.csv("saved.aa.csv", as.is=TRUE)
add.protein(aa)
## count amino acids in a sequence
Modified: pkg/CHNOSZ/tests/testthat/test-add.protein.R
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-add.protein.R 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/tests/testthat/test-add.protein.R 2017-02-24 06:46:29 UTC (rev 174)
@@ -11,12 +11,12 @@
ip <- add.protein(aa)
# the replaces the proteins (with the same ones)
expect_error(ip <- add.protein(aa), "converting factors causes problems replacing protein data")
- # ... should use read.csv(file, stringsAsFactors=FALSE)
+ # ... should use read.csv(file, as.is=TRUE)
})
test_that("errors and messages occur in some circumstances", {
expect_error(seq2aa("LYS_CHICK", "XXX"), "no characters match an amino acid")
- expect_error(add.protein(count.aa("AAA")), "not a data frame with the same columns as thermo\\$protein")
+ expect_error(add.protein(count.aa("AAA")), "does not have the same columns as thermo\\$protein")
expect_message(add.protein(pinfo(pinfo("CYC_BOVIN"))), "replaced 1 existing protein\\(s\\)")
})
Modified: pkg/CHNOSZ/tests/testthat/test-affinity.R
===================================================================
--- pkg/CHNOSZ/tests/testthat/test-affinity.R 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/tests/testthat/test-affinity.R 2017-02-24 06:46:29 UTC (rev 174)
@@ -101,7 +101,8 @@
basis(c("HCO3-", "H2O", "NH3", "HS-", "H2", "H+"),
"aq", c(-3, 0, -4, -7, 999, 999))
sites <- c("N", "S", "R", "Q", "P")
- aa <- read.aa(system.file("extdata/protein/DS11.csv", package="CHNOSZ"))
+ file <- system.file("extdata/protein/DS11.csv", package="CHNOSZ")
+ aa <- read.csv(file, as.is=TRUE)
ip <- add.protein(aa[1:5, ])
# to reproduce, we need use the "old" parameters for [Met] from Dick et al., 2006
mod.obigt("[Met]", G=-35245, H=-59310)
Modified: pkg/CHNOSZ/vignettes/anintro.Rmd
===================================================================
--- pkg/CHNOSZ/vignettes/anintro.Rmd 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/vignettes/anintro.Rmd 2017-02-24 06:46:29 UTC (rev 174)
@@ -1350,22 +1350,23 @@
In the Rubisco example above, we saw the use of <span style="color:green">`read.fasta()`</span> to read amino acid sequences from a FASTA file.
There are several other methods for inputting amino acid compositions.
+R's `read.csv()` can be used to read amino acid compositions from a CSV file with the same columns that are present in `thermo$protein`.
+Note the use of `as.is = TRUE` to prevent reading character data as factors.
+The `nrows` argument can be added to read that number of rows:
+```{r read_csv}
+file <- system.file("extdata/protein/DS11.csv", package = "CHNOSZ")
+aa_bison <- read.csv(file, as.is = TRUE, nrows = 5)
+```
<span style="color:green">`more.aa()`</span> retrieves amino acid composition of proteins in *Saccharomyces cerevisiae* and *Escherichia coli* from data files that are included with CHNOSZ:
```{r more_aa}
aa_YML020W <- more.aa("YML020W", "Sce")
aa_ILVE <- more.aa("ILVE", "Eco")
```
-<span style="color:green">`read.aa()`</span> is used to read amino acid compositions from a CSV file with the same columns that are present in `thermo$protein`.
-The `nrows` argument can be added to read that number of rows:
-```{r read_aa}
-aa_bison <- read.aa(system.file("extdata/protein/DS11.csv",
- package = "CHNOSZ"), nrows = 5)
-```
<span style="color:green">`read.fasta()`</span> reads a FASTA file and returns the amino acid compositions of the sequences.
The `iseq` argument can be used to read those sequences from the file:
```{r read_fasta, message=FALSE}
-aa_Ef <- read.fasta(system.file("extdata/fasta/EF-Tu.aln",
- package = "CHNOSZ"), iseq = 1:2)
+file <- system.file("extdata/fasta/EF-Tu.aln", package = "CHNOSZ")
+aa_Ef <- read.fasta(file, iseq = 1:2)
```
<span style="color:green">`seq2aa()`</span> counts the amino acids in a user-supplied sequence and generates a data frame of the amino acid composition:
```{marginfigure}
@@ -1476,7 +1477,8 @@
Then we add the proteins and get their indices using <span style="color:red">`add.protein()`</span>, set the basis, calculate the affinities, and make a potential diagram with temperature and activity of dissolved hydrogen as variables:
```{r bison_transferase, fig.margin=TRUE, fig.width=4, fig.height=4, small.mar=TRUE, dpi=dpi, out.width="100%", echo=FALSE, results="hide", message=FALSE, fig.cap='Potential diagram for metagenomically identified sequences of transferases in Bison Pool hot spring. See also the vignette [<span style="color:blue">*Hot-spring proteins in CHNOSZ*</span>](hotspring.pdf).', cache=TRUE, pngquant=pngquant, timeit=timeit}
-aa <- read.aa(system.file("extdata/protein/DS11.csv", package = "CHNOSZ"))
+file <- system.file("extdata/protein/DS11.csv", package = "CHNOSZ")
+aa <- read.csv(file, as.is = TRUE)
aa <- aa[grep("transferase", aa$protein), ]
ip <- add.protein(aa)
bspecies <- c("HCO3-", "H2O", "NH3", "HS-", "H2", "H+")
@@ -1492,12 +1494,12 @@
lines(T, logaH2, lty = 2, lwd = 2)
points(T, logaH2, pch = 21, bg = "white", cex = 1.5)
```
-```{r bison_transferase, eval=FALSE, echo=1:11}
+```{r bison_transferase, eval=FALSE, echo=1:12}
```
Site numbers 1--5 correspond to a cooling gradient along the outflow channel of the hot spring.
The colors represent the relative `r zc` of the proteins (red is more reduced).
The points indicate the *T* and log*a*<sub>H<sub>2</sub></sub> that optimize a thermodynamic model for relative abundances of phyla that were estimated by taxonomic classification of metagenomic sequences [@DS13]:
-```{r bison_transferase, eval=FALSE, echo=12:15}
+```{r bison_transferase, eval=FALSE, echo=13:16}
```
# Experimental features
Modified: pkg/CHNOSZ/vignettes/hotspring.Rnw
===================================================================
--- pkg/CHNOSZ/vignettes/hotspring.Rnw 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/vignettes/hotspring.Rnw 2017-02-24 06:46:29 UTC (rev 174)
@@ -132,8 +132,8 @@
<<proteins>>=
# read the amino acid compositions
-aa.annot <- read.aa(system.file("extdata/protein/DS11.csv", package="CHNOSZ"))
-aa.phyla <- read.aa(system.file("extdata/protein/DS13.csv", package="CHNOSZ"))
+aa.annot <- read.csv(system.file("extdata/protein/DS11.csv", package="CHNOSZ"), as.is=TRUE)
+aa.phyla <- read.csv(system.file("extdata/protein/DS13.csv", package="CHNOSZ"), as.is=TRUE)
@
Here are the site names for the sampling locations (also referred
Modified: pkg/CHNOSZ/vignettes/hotspring.lyx
===================================================================
--- pkg/CHNOSZ/vignettes/hotspring.lyx 2017-02-24 02:07:50 UTC (rev 173)
+++ pkg/CHNOSZ/vignettes/hotspring.lyx 2017-02-24 06:46:29 UTC (rev 174)
@@ -482,12 +482,14 @@
\begin_layout Plain Layout
-aa.annot <- read.aa(system.file("extdata/protein/DS11.csv", package="CHNOSZ"))
+aa.annot <- read.csv(system.file("extdata/protein/DS11.csv", package="CHNOSZ"),
+ as.is=TRUE)
\end_layout
\begin_layout Plain Layout
-aa.phyla <- read.aa(system.file("extdata/protein/DS13.csv", package="CHNOSZ"))
+aa.phyla <- read.csv(system.file("extdata/protein/DS13.csv", package="CHNOSZ"),
+ as.is=TRUE)
\end_layout
\begin_layout Plain Layout
More information about the CHNOSZ-commits
mailing list