[Seqinr-commits] r1792 - www/src/mainmatter
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jun 6 18:20:57 CEST 2014
Author: jeanlobry
Date: 2014-06-06 18:20:57 +0200 (Fri, 06 Jun 2014)
New Revision: 1792
Modified:
www/src/mainmatter/getseqacnuc.rnw
www/src/mainmatter/getseqacnuc.tex
Log:
compil 3.0-11
Modified: www/src/mainmatter/getseqacnuc.rnw
===================================================================
--- www/src/mainmatter/getseqacnuc.rnw 2014-06-06 11:51:19 UTC (rev 1791)
+++ www/src/mainmatter/getseqacnuc.rnw 2014-06-06 16:20:57 UTC (rev 1792)
@@ -6,6 +6,7 @@
\author{Charif, D. \and Lobry, J.R.}
\begin{document}
+\SweaveOpts{concordance=TRUE}
\SweaveInput{../config/commonrnw.rnw}
\maketitle
\tableofcontents
Modified: www/src/mainmatter/getseqacnuc.tex
===================================================================
--- www/src/mainmatter/getseqacnuc.tex 2014-06-06 11:51:19 UTC (rev 1791)
+++ www/src/mainmatter/getseqacnuc.tex 2014-06-06 16:20:57 UTC (rev 1792)
@@ -7,6 +7,7 @@
\usepackage{Sweave}
\begin{document}
+\input{getseqacnuc-concordance}
%
% To change the R input/output style:
%
@@ -75,17 +76,15 @@
choosebank()
\end{Sinput}
\begin{Soutput}
- [1] "genbank" "embl" "emblwgs" "swissprot"
- [5] "ensembl" "refseq" "refseqViruses" "nrsub"
- [9] "hobacnucl" "hobacprot" "hovergendna" "hovergen"
-[13] "hogenom5" "hogenom5dna" "hogenom" "hogenomdna"
-[17] "hogennucl" "hogenprot" "hoverclnu" "hoverclpr"
-[21] "homolens3" "homolens3dna" "homolens" "homolensdna"
-[25] "greviews" "polymorphix" "emglib" "HAMAPnucl"
-[29] "HAMAPprot" "taxobacgen" "apis" "anopheles"
-[33] "caenorhabditis" "ciona_savignyi" "danio" "drosophila"
-[37] "felis" "gallus" "human" "mouse"
-[41] "saccharomyces" "tetraodon" "xenopus"
+ [1] "genbank" "embl" "emblwgs" "swissprot"
+ [5] "ensembl" "hogenom" "hogenomdna" "hovergendna"
+ [9] "hovergen" "hogenom5" "hogenom5dna" "hogenom4"
+[13] "hogenom4dna" "homolens" "homolensdna" "hobacnucl"
+[17] "hobacprot" "phever2" "phever2dna" "refseq"
+[21] "greviews" "bacterial" "protozoan" "ensprotists"
+[25] "ensfungi" "ensmetazoa" "ensplants" "ensemblbacteria"
+[29] "mito" "polymorphix" "emglib" "taxobacgen"
+[33] "refseqViruses"
\end{Soutput}
\end{Schunk}
@@ -106,10 +105,10 @@
1 genbank on
2 embl on
3 emblwgs on
- info
-1 GenBank Rel. 174 (15 October 2009) Last Updated: Nov 5, 2009
-2 EMBL Library Release 101 (September 2009) Last Updated: Nov 5, 2009
-3 EMBL Whole Genome Shotgun sequences Release 101 (September 2009)
+ info
+1 GenBank Release 201 (15 April 2014) Last Updated: Jun 2, 2014
+2 EMBL Nucleotide Archive Release 119 (March 2014) Last Updated: Jun 1, 2014
+3 EMBL Whole Genome Shotgun sequences Release 119 (March 2014)
\end{Soutput}
\end{Schunk}
@@ -142,12 +141,9 @@
choosebank(tagbank = "TP", infobank = TRUE)
\end{Sinput}
\begin{Soutput}
- bank status info
-1 emblTP on frozen EMBL release
-2 swissprotTP on frozen SwissProt release
-3 hoverprotTP on frozen Hovergen release - protein sequences
-4 hovernuclTP on frozen Hovergen release - nucleotide sequences
-5 trypano on frozen trypano database
+ bank status info
+1 trypano on frozen trypano database
+2 emblTP on frozen EMBL release
\end{Soutput}
\end{Schunk}
@@ -167,12 +163,12 @@
.. ..- attr(*, "conn_id")=<externalptr>
$ bankname: chr "genbank"
$ banktype: chr "GENBANK"
- $ totseqs : num 1.19e+08
- $ totspecs: num 686783
- $ totkeys : num 15791073
- $ release : chr " GenBank Rel. 174 (15 October 2009) Last Updated: Nov 5, 2009"
+ $ totseqs : num 1.91e+08
+ $ totspecs: num 1242014
+ $ totkeys : num 43531291
+ $ release : chr " GenBank Release 201 (15 April 2014) Last Updated: Jun 2, 2014"
$ status :Class 'AsIs' chr "on"
- $ details : chr [1:4] " **** ACNUC Data Base Content **** " " GenBank Rel. 174 (15 October 2009) Last Updated: Nov 5, 2009" "108,943,317,873 bases; 111,355,981 sequences; 7,239,424 subseqs; 573,990 refers." "Software by M. Gouy, Lab. Biometrie et Biologie Evolutive, Universite Lyon I "
+ $ details : chr [1:4] " **** ACNUC Data Base Content **** " " GenBank Release 201 (15 April 2014) Last Updated: Jun 2, 2014" "160,671,579,040 bases; 172,482,713 sequences; 18,666,226 subseqs; 786,167 refers." "Software by M. Gouy, Lab. Biometrie et Biologie Evolutive, Universite Lyon I "
\end{Soutput}
\begin{Sinput}
closebank()
@@ -182,15 +178,15 @@
The components of \texttt{banknameSocket} means that in the database
called \texttt{genbank} at the compilation time
of this document there were
-\texttt{118,595,406}
+\texttt{191,148,940}
sequences from
-\texttt{686,783}
+\texttt{1,242,014}
species and a total of
-\texttt{15,791,073}
+\texttt{43,531,291}
keywords. The status of the bank was
\texttt{on},
and the release information was
-\texttt{ GenBank Rel. 174 (15 October 2009) Last Updated: Nov 5, 2009}.
+\texttt{ GenBank Release 201 (15 April 2014) Last Updated: Jun 2, 2014}.
For specialized databases, some relevant informations are also given in the
\texttt{details} component, for instance:
@@ -252,24 +248,24 @@
\begin{Sinput}
banks <- c(choosebank(), choosebank(tagbank = "TP"))
nbanks <- length(banks)
- ntaxa <- numeric(nbanks)
- for (i in seq_len(nbanks)) {
- bkopenres <- try(choosebank(banks[i]))
- if (inherits(bkopenres, "try-error")) {
- ntaxa[i] <- NA
- }
- else {
- ntaxa[i] <- as.numeric(banknameSocket$totspecs)
- closebank()
- }
+ ntaxa <- numeric(nbanks) # pre-allocate
+ for(i in seq_len(nbanks)){
+ bkopenres <- try(choosebank(banks[i]))
+ if(inherits(bkopenres, "try-error")){
+ ntaxa[i] <- NA
+ } else {
+ ntaxa[i] <- as.numeric(banknameSocket$totspecs)
+ closebank()
+ }
}
names(ntaxa) <- banks
\end{Sinput}
\end{Schunk}
\begin{Schunk}
\begin{Sinput}
- dotchart(log10(ntaxa[order(ntaxa)]), pch = 19, main = "Number of taxa in available databases",
- xlab = "Log10(number of taxa)")
+ dotchart(log10(ntaxa[order(ntaxa)]), pch = 19,
+ main = "Number of taxa in available databases",
+ xlab = "Log10(number of taxa)")
\end{Sinput}
\end{Schunk}
\includegraphics{../figs/getseqacnuc-plottaxaperbank}
@@ -386,15 +382,15 @@
allcds$nelem
\end{Sinput}
\begin{Soutput}
-[1] 7992598
+[1] 20580107
\end{Soutput}
\end{Schunk}
-There are therefore \texttt{7,992,598} coding
+There are therefore \texttt{20,580,107} coding
sequences in this version of GenBank\footnote{
which is stored in the \texttt{release} component of the object \texttt{banknameSocket}
and current value is today (\today): \texttt{banknameSocket\$release =
- GenBank Rel. 174 (15 October 2009) Last Updated: Nov 5, 2009}.
+ GenBank Release 201 (15 April 2014) Last Updated: Jun 2, 2014}.
}.
It would be long to get all the informations for the elements
of this list, so we have set the parameter \texttt{virtual} to \texttt{TRUE} and the \texttt{req}
@@ -421,11 +417,11 @@
small$nelem
\end{Sinput}
\begin{Soutput}
-[1] 979
+[1] 3346
\end{Soutput}
\end{Schunk}
-There are then \texttt{979} elements in
+There are then \texttt{3,346} elements in
the list \texttt{small}, so that we can safely repeat the previous query without asking for a
virtual list:
@@ -435,8 +431,8 @@
getName(small$req[1:10])
\end{Sinput}
\begin{Soutput}
- [1] "AY191424" "AY386807" "AY386808" "AY386809" "AY386810" "AY386811"
- [7] "AY386812" "AY386813" "AY386814" "AY386815"
+ [1] "AB919117" "AB919118" "AB919119" "AB919120" "AB919121" "AY191424"
+ [7] "AY386807" "AY386808" "AY386809" "AY386810"
\end{Soutput}
\end{Schunk}
@@ -447,59 +443,59 @@
\item[\textbf{Man.}] How many sequences are available for our species?
\begin{Schunk}
\begin{Sinput}
- query("man", "sp=homo sapiens", virtual = T)
+ query("man","sp=homo sapiens",virtual=T)
man$nelem
\end{Sinput}
\begin{Soutput}
-[1] 13042724
+[1] 20581455
\end{Soutput}
\end{Schunk}
-There are \texttt{13,042,724} sequences from \textit{Homo sapiens}.
+There are \texttt{20,581,455} sequences from \textit{Homo sapiens}.
\item[\textbf{Sex.}] How many sequences are annotated with a keyword starting by sex?
\begin{Schunk}
\begin{Sinput}
- query("sex", "k=sex@", virtual = T)
+ query("sex","k=sex@",virtual=T)
sex$nelem
\end{Sinput}
\begin{Soutput}
-[1] 1465
+[1] 2977
\end{Soutput}
\end{Schunk}
-There are \texttt{1,465} such sequences.
+There are \texttt{2,977} such sequences.
\item[\textbf{tRNA.}] How many complete tRNA sequences are available?
\begin{Schunk}
\begin{Sinput}
- query("trna", "t=trna AND NOT k=partial", virtual = T)
+ query("trna","t=trna AND NOT k=partial",virtual=T)
trna$nelem
\end{Sinput}
\begin{Soutput}
-[1] 413347
+[1] 1260401
\end{Soutput}
\end{Schunk}
-There are \texttt{413,347} complete tRNA sequences.
+There are \texttt{1,260,401} complete tRNA sequences.
\item[\textbf{Nature vs. Science.}] In which journal were the more sequences published?
\begin{Schunk}
\begin{Sinput}
- query("nature", "j=nature", virtual = T)
+ query("nature","j=nature",virtual=T)
nature$nelem
\end{Sinput}
\begin{Soutput}
-[1] 1992670
+[1] 2619977
\end{Soutput}
\begin{Sinput}
- query("science", "j=science", virtual = T)
+ query("science","j=science",virtual=T)
science$nelem
\end{Sinput}
\begin{Soutput}
-[1] 1530942
+[1] 2227746
\end{Soutput}
\end{Schunk}
-There are \texttt{1,992,670} sequences published
+There are \texttt{2,619,977} sequences published
in \textit{Nature} and
-\texttt{1,530,942} sequences published in
+\texttt{2,227,746} sequences published in
\textit{Science}, so that the winner is
\textit{Nature}.
@@ -510,47 +506,47 @@
\item[\textbf{Smith.}] How many sequences have Smith (last name) as author?
\begin{Schunk}
\begin{Sinput}
- query("smith", "au=smith", virtual = T)
+ query("smith","au=smith",virtual=T)
smith$nelem
\end{Sinput}
\begin{Soutput}
-[1] 4809459
+[1] 6239128
\end{Soutput}
\end{Schunk}
-There are \texttt{4,809,459} such sequences.
+There are \texttt{6,239,128} such sequences.
\item[\textbf{YK2.}] How many sequences were published after year 2000 (included)?
\begin{Schunk}
\begin{Sinput}
- query("yk2", "y>2000", virtual = T)
+ query("yk2","y>2000",virtual=T)
yk2$nelem
\end{Sinput}
\begin{Soutput}
-[1] 99752843
+[1] 160690121
\end{Soutput}
\end{Schunk}
-There are \texttt{99,752,843} sequences published after year 2000.
+There are \texttt{160,690,121} sequences published after year 2000.
\item[\textbf{Organelle contest.}] Do we have more sequences from chloroplast genomes or from mitochondion genomes?
\begin{Schunk}
\begin{Sinput}
- query("chloro", "o=chloroplast", virtual = T)
+ query("chloro","o=chloroplast",virtual=T)
chloro$nelem
\end{Sinput}
\begin{Soutput}
-[1] 255832
+[1] 644245
\end{Soutput}
\begin{Sinput}
- query("mito", "o=mitochondrion", virtual = T)
+ query("mito","o=mitochondrion",virtual=T)
mito$nelem
\end{Sinput}
\begin{Soutput}
-[1] 815726
+[1] 2235491
\end{Soutput}
\end{Schunk}
-There are \texttt{255,832} sequences from
+There are \texttt{644,245} sequences from
chloroplast genomes and
-\texttt{815,726} sequences from mitochondrion
+\texttt{2,235,491} sequences from mitochondrion
genomes, so that the winner is
mitochondrion.
@@ -632,8 +628,7 @@
\begin{Schunk}
\begin{Sinput}
- substr(getSequence(completeCatsCDS$req[[1]], as.string = TRUE),
- 1, 50)
+ substr(getSequence(completeCatsCDS$req[[1]], as.string = TRUE), 1, 50)
\end{Sinput}
\begin{Soutput}
[1] "atgaccaacattcgaaaatcacacccccttaccaaaattattaatcactc"
@@ -652,9 +647,9 @@
\begin{Schunk}
\begin{Sinput}
- query("trs", "N=AE003734.PE35")
- annots <- getAnnot(trs$req[[1]])
- cat(annots, sep = "\n")
+ query("trs","N=AE003734.PE35")
+ getAnnot(trs$req[[1]]) -> annots
+ cat(annots, sep="\n")
\end{Sinput}
\begin{Soutput}
FT CDS join(complement(153944..154157),complement(153727..153866),
@@ -749,9 +744,9 @@
aacount <- table(getTrans(transspliced$req[[1]]))
aacount <- aacount[order(aacount)]
names(aacount) <- aaa(names(aacount))
- dotchart(aacount, pch = 19, xlab = "Stop and amino-acid counts",
- main = "There is only one stop codon in AE003734.PE35")
- abline(v = 1, lty = 2)
+ dotchart(aacount, pch = 19, xlab = "Stop and amino-acid counts",
+ main = "There is only one stop codon in AE003734.PE35")
+ abline(v=1, lty = 2)
\end{Sinput}
\end{Schunk}
\includegraphics{../figs/getseqacnuc-transp4}
@@ -855,20 +850,20 @@
This part was compiled under the following \Rlogo{}~environment:
\begin{itemize}\raggedright
- \item R version 2.10.0 (2009-10-26), \verb|i386-apple-darwin8.11.1|
- \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/C/C|
+ \item R version 3.1.0 (2014-04-10), \verb|x86_64-apple-darwin13.1.0|
+ \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/fr_FR.UTF-8/fr_FR.UTF-8|
\item Base packages: base, datasets, graphics, grDevices, grid,
methods, stats, utils
- \item Other packages: ade4~1.4-13, ape~2.4, grImport~0.4-4,
- MASS~7.3-3, quadprog~1.4-11, seqinr~2.0-7, tseries~0.10-21,
- XML~2.6-0, xtable~1.5-5, zoo~1.5-8
- \item Loaded via a namespace (and not attached): gee~4.13-14,
- lattice~0.17-26, nlme~3.1-96, tools~2.10.0
+ \item Other packages: ade4~1.6-2, ape~3.1-2, grImport~0.9-0,
+ MASS~7.3-31, seqinr~3.0-11, tseries~0.10-32, XML~3.98-1.1,
+ xtable~1.7-3
+ \item Loaded via a namespace (and not attached): lattice~0.20-29,
+ nlme~3.1-117, quadprog~1.5-5, tools~3.1.0, zoo~1.7-11
\end{itemize}
There were two compilation steps:
\begin{itemize}
- \item \Rlogo{} compilation time was: Thu Nov 5 12:36:14 2009
+ \item \Rlogo{} compilation time was: Fri Jun 6 18:11:38 2014
\item \LaTeX{} compilation time was: \today
\end{itemize}
More information about the Seqinr-commits
mailing list