[Seqinr-commits] r1792 - www/src/mainmatter

Fri Jun 6 18:20:57 CEST 2014

Author: jeanlobry
Date: 2014-06-06 18:20:57 +0200 (Fri, 06 Jun 2014)
New Revision: 1792

Modified:
   www/src/mainmatter/getseqacnuc.rnw
   www/src/mainmatter/getseqacnuc.tex
Log:
compil 3.0-11

Modified: www/src/mainmatter/getseqacnuc.rnw
===================================================================

--- www/src/mainmatter/getseqacnuc.rnw	2014-06-06 11:51:19 UTC (rev 1791)
+++ www/src/mainmatter/getseqacnuc.rnw	2014-06-06 16:20:57 UTC (rev 1792)
@@ -6,6 +6,7 @@
 \author{Charif, D. \and Lobry, J.R.}
 
 \begin{document}
+\SweaveOpts{concordance=TRUE}
 \SweaveInput{../config/commonrnw.rnw}
 \maketitle
 \tableofcontents

Modified: www/src/mainmatter/getseqacnuc.tex
===================================================================
--- www/src/mainmatter/getseqacnuc.tex	2014-06-06 11:51:19 UTC (rev 1791)
+++ www/src/mainmatter/getseqacnuc.tex	2014-06-06 16:20:57 UTC (rev 1792)
@@ -7,6 +7,7 @@
 
 \usepackage{Sweave}
 \begin{document}
+\input{getseqacnuc-concordance}
 %
 % To change the R input/output style:
 %
@@ -75,17 +76,15 @@
  choosebank()
 \end{Sinput}
 \begin{Soutput}
- [1] "genbank"        "embl"           "emblwgs"        "swissprot"     
- [5] "ensembl"        "refseq"         "refseqViruses"  "nrsub"         
- [9] "hobacnucl"      "hobacprot"      "hovergendna"    "hovergen"      
-[13] "hogenom5"       "hogenom5dna"    "hogenom"        "hogenomdna"    
-[17] "hogennucl"      "hogenprot"      "hoverclnu"      "hoverclpr"     
-[21] "homolens3"      "homolens3dna"   "homolens"       "homolensdna"   
-[25] "greviews"       "polymorphix"    "emglib"         "HAMAPnucl"     
-[29] "HAMAPprot"      "taxobacgen"     "apis"           "anopheles"     
-[33] "caenorhabditis" "ciona_savignyi" "danio"          "drosophila"    
-[37] "felis"          "gallus"         "human"          "mouse"         
-[41] "saccharomyces"  "tetraodon"      "xenopus"       
+ [1] "genbank"         "embl"            "emblwgs"         "swissprot"      
+ [5] "ensembl"         "hogenom"         "hogenomdna"      "hovergendna"    
+ [9] "hovergen"        "hogenom5"        "hogenom5dna"     "hogenom4"       
+[13] "hogenom4dna"     "homolens"        "homolensdna"     "hobacnucl"      
+[17] "hobacprot"       "phever2"         "phever2dna"      "refseq"         
+[21] "greviews"        "bacterial"       "protozoan"       "ensprotists"    
+[25] "ensfungi"        "ensmetazoa"      "ensplants"       "ensemblbacteria"
+[29] "mito"            "polymorphix"     "emglib"          "taxobacgen"     
+[33] "refseqViruses"  
 \end{Soutput}
 \end{Schunk}
  
@@ -106,10 +105,10 @@
 1 genbank     on
 2    embl     on
 3 emblwgs     on
-                                                                  info
-1        GenBank Rel. 174 (15 October 2009) Last Updated: Nov  5, 2009
-2 EMBL Library Release 101 (September 2009) Last Updated: Nov  5, 2009
-3    EMBL Whole Genome Shotgun sequences Release 101  (September 2009)
+                                                                         info
+1              GenBank Release 201 (15 April 2014) Last Updated: Jun  2, 2014
+2 EMBL Nucleotide Archive Release 119 (March 2014) Last Updated: Jun  1, 2014
+3               EMBL Whole Genome Shotgun sequences Release 119  (March 2014)
 \end{Soutput}
 \end{Schunk}
 
@@ -142,12 +141,9 @@
  choosebank(tagbank = "TP", infobank = TRUE)
 \end{Sinput}
 \begin{Soutput}
-         bank status                                           info
-1      emblTP     on                            frozen EMBL release
-2 swissprotTP     on                       frozen SwissProt release
-3 hoverprotTP     on    frozen Hovergen release - protein sequences
-4 hovernuclTP     on frozen Hovergen release - nucleotide sequences
-5     trypano     on                        frozen trypano database
+     bank status                    info
+1 trypano     on frozen trypano database
+2  emblTP     on     frozen EMBL release
 \end{Soutput}
 \end{Schunk}
 
@@ -167,12 +163,12 @@
   .. ..- attr(*, "conn_id")=<externalptr> 
  $ bankname: chr "genbank"
  $ banktype: chr "GENBANK"
- $ totseqs : num 1.19e+08
- $ totspecs: num 686783
- $ totkeys : num 15791073
- $ release : chr "         GenBank Rel. 174 (15 October 2009) Last Updated: Nov  5, 2009"
+ $ totseqs : num 1.91e+08
+ $ totspecs: num 1242014
+ $ totkeys : num 43531291
+ $ release : chr "         GenBank Release 201 (15 April 2014) Last Updated: Jun  2, 2014"
  $ status  :Class 'AsIs'  chr "on"
- $ details : chr [1:4] "             ****     ACNUC Data Base Content      ****                         " "         GenBank Rel. 174 (15 October 2009) Last Updated: Nov  5, 2009" "108,943,317,873 bases; 111,355,981 sequences; 7,239,424 subseqs; 573,990 refers." "Software by M. Gouy, Lab. Biometrie et Biologie Evolutive, Universite Lyon I "
+ $ details : chr [1:4] "             ****     ACNUC Data Base Content      ****                         " "         GenBank Release 201 (15 April 2014) Last Updated: Jun  2, 2014" "160,671,579,040 bases; 172,482,713 sequences; 18,666,226 subseqs; 786,167 refers." "Software by M. Gouy, Lab. Biometrie et Biologie Evolutive, Universite Lyon I "
 \end{Soutput}
 \begin{Sinput}
  closebank()
@@ -182,15 +178,15 @@
 The components of \texttt{banknameSocket} means that in the database
 called \texttt{genbank} at the compilation time
 of this document there were 
-\texttt{118,595,406}
+\texttt{191,148,940}
 sequences from
-\texttt{686,783}
+\texttt{1,242,014}
 species and a total of
-\texttt{15,791,073}
+\texttt{43,531,291}
 keywords. The status of the bank was
 \texttt{on}, 
 and the release information was
-\texttt{         GenBank Rel. 174 (15 October 2009) Last Updated: Nov  5, 2009}.
+\texttt{         GenBank Release 201 (15 April 2014) Last Updated: Jun  2, 2014}.
 For specialized databases, some relevant informations are also given in the
 \texttt{details} component, for instance:
 
@@ -252,24 +248,24 @@
 \begin{Sinput}
  banks <- c(choosebank(), choosebank(tagbank = "TP"))
  nbanks <- length(banks)
- ntaxa <- numeric(nbanks)
- for (i in seq_len(nbanks)) {
-     bkopenres <- try(choosebank(banks[i]))
-     if (inherits(bkopenres, "try-error")) {
-         ntaxa[i] <- NA
-     }
-     else {
-         ntaxa[i] <- as.numeric(banknameSocket$totspecs)
-         closebank()
-     }
+ ntaxa <- numeric(nbanks) # pre-allocate
+ for(i in seq_len(nbanks)){
+   bkopenres <- try(choosebank(banks[i]))
+   if(inherits(bkopenres, "try-error")){
+     ntaxa[i] <- NA
+   } else {
+     ntaxa[i] <- as.numeric(banknameSocket$totspecs)
+     closebank()
+   }
  }
  names(ntaxa) <- banks
 \end{Sinput}
 \end{Schunk}
 \begin{Schunk}
 \begin{Sinput}
- dotchart(log10(ntaxa[order(ntaxa)]), pch = 19, main = "Number of taxa in available databases", 
-     xlab = "Log10(number of taxa)")
+ dotchart(log10(ntaxa[order(ntaxa)]), pch = 19,
+ main = "Number of taxa in available databases",
+ xlab = "Log10(number of taxa)")
 \end{Sinput}
 \end{Schunk}
 \includegraphics{../figs/getseqacnuc-plottaxaperbank}
@@ -386,15 +382,15 @@
  allcds$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 7992598
+[1] 20580107
 \end{Soutput}
 \end{Schunk}
 
-There are therefore \texttt{7,992,598} coding
+There are therefore \texttt{20,580,107} coding
 sequences in this version of GenBank\footnote{
 which is stored in the \texttt{release} component of the object \texttt{banknameSocket}
 and current value is today (\today): \texttt{banknameSocket\$release = 
-         GenBank Rel. 174 (15 October 2009) Last Updated: Nov  5, 2009}.
+         GenBank Release 201 (15 April 2014) Last Updated: Jun  2, 2014}.
 }. 
 It would be long to get all the informations for the elements
 of this list, so we have set the parameter \texttt{virtual} to \texttt{TRUE} and the \texttt{req}
@@ -421,11 +417,11 @@
  small$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 979
+[1] 3346
 \end{Soutput}
 \end{Schunk}
 
-There are then \texttt{979} elements in
+There are then \texttt{3,346} elements in
 the list \texttt{small}, so that we can safely repeat the previous query without asking for a
 virtual list:
 
@@ -435,8 +431,8 @@
  getName(small$req[1:10])
 \end{Sinput}
 \begin{Soutput}
- [1] "AY191424" "AY386807" "AY386808" "AY386809" "AY386810" "AY386811"
- [7] "AY386812" "AY386813" "AY386814" "AY386815"
+ [1] "AB919117" "AB919118" "AB919119" "AB919120" "AB919121" "AY191424"
+ [7] "AY386807" "AY386808" "AY386809" "AY386810"
 \end{Soutput}
 \end{Schunk}
 
@@ -447,59 +443,59 @@
 \item[\textbf{Man.}] How many sequences are available for our species?
 \begin{Schunk}
 \begin{Sinput}
- query("man", "sp=homo sapiens", virtual = T)
+ query("man","sp=homo sapiens",virtual=T)
  man$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 13042724
+[1] 20581455
 \end{Soutput}
 \end{Schunk}
-There are \texttt{13,042,724} sequences from \textit{Homo sapiens}.
+There are \texttt{20,581,455} sequences from \textit{Homo sapiens}.
 
 \item[\textbf{Sex.}] How many sequences are annotated with a keyword starting by sex?
 \begin{Schunk}
 \begin{Sinput}
- query("sex", "k=sex@", virtual = T)
+ query("sex","k=sex@",virtual=T)
  sex$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 1465
+[1] 2977
 \end{Soutput}
 \end{Schunk}
-There are \texttt{1,465} such sequences.
+There are \texttt{2,977} such sequences.
 
 \item[\textbf{tRNA.}] How many complete tRNA sequences are available?
 \begin{Schunk}
 \begin{Sinput}
- query("trna", "t=trna AND NOT k=partial", virtual = T)
+ query("trna","t=trna AND NOT k=partial",virtual=T)
  trna$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 413347
+[1] 1260401
 \end{Soutput}
 \end{Schunk}
-There are \texttt{413,347} complete tRNA sequences.
+There are \texttt{1,260,401} complete tRNA sequences.
 
 \item[\textbf{Nature vs. Science.}] In which journal were the more sequences published?
 \begin{Schunk}
 \begin{Sinput}
- query("nature", "j=nature", virtual = T)
+ query("nature","j=nature",virtual=T)
  nature$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 1992670
+[1] 2619977
 \end{Soutput}
 \begin{Sinput}
- query("science", "j=science", virtual = T)
+ query("science","j=science",virtual=T)
  science$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 1530942
+[1] 2227746
 \end{Soutput}
 \end{Schunk}
-There are \texttt{1,992,670} sequences published
+There are \texttt{2,619,977} sequences published
 in \textit{Nature} and
-\texttt{1,530,942} sequences published in
+\texttt{2,227,746} sequences published in
 \textit{Science}, so that the winner is 
 \textit{Nature}.
 
@@ -510,47 +506,47 @@
 \item[\textbf{Smith.}] How many sequences have Smith (last name) as author?
 \begin{Schunk}
 \begin{Sinput}
- query("smith", "au=smith", virtual = T)
+ query("smith","au=smith",virtual=T)
  smith$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 4809459
+[1] 6239128
 \end{Soutput}
 \end{Schunk}
-There are \texttt{4,809,459} such sequences.
+There are \texttt{6,239,128} such sequences.
 
 \item[\textbf{YK2.}] How many sequences were published after year 2000 (included)?
 \begin{Schunk}
 \begin{Sinput}
- query("yk2", "y>2000", virtual = T)
+ query("yk2","y>2000",virtual=T)
  yk2$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 99752843
+[1] 160690121
 \end{Soutput}
 \end{Schunk}
-There are \texttt{99,752,843} sequences published after year 2000.
+There are \texttt{160,690,121} sequences published after year 2000.
 
 \item[\textbf{Organelle contest.}] Do we have more sequences from chloroplast genomes or from mitochondion genomes?
 \begin{Schunk}
 \begin{Sinput}
- query("chloro", "o=chloroplast", virtual = T)
+ query("chloro","o=chloroplast",virtual=T)
  chloro$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 255832
+[1] 644245
 \end{Soutput}
 \begin{Sinput}
- query("mito", "o=mitochondrion", virtual = T)
+ query("mito","o=mitochondrion",virtual=T)
  mito$nelem
 \end{Sinput}
 \begin{Soutput}
-[1] 815726
+[1] 2235491
 \end{Soutput}
 \end{Schunk}
-There are \texttt{255,832} sequences from
+There are \texttt{644,245} sequences from
 chloroplast genomes and
-\texttt{815,726} sequences from mitochondrion
+\texttt{2,235,491} sequences from mitochondrion
 genomes, so that the winner is 
 mitochondrion.
 
@@ -632,8 +628,7 @@
 
 \begin{Schunk}
 \begin{Sinput}
- substr(getSequence(completeCatsCDS$req[[1]], as.string = TRUE), 
-     1, 50)
+ substr(getSequence(completeCatsCDS$req[[1]], as.string = TRUE), 1, 50)
 \end{Sinput}
 \begin{Soutput}
 [1] "atgaccaacattcgaaaatcacacccccttaccaaaattattaatcactc"
@@ -652,9 +647,9 @@
 
 \begin{Schunk}
 \begin{Sinput}
- query("trs", "N=AE003734.PE35")
- annots <- getAnnot(trs$req[[1]])
- cat(annots, sep = "\n")
+ query("trs","N=AE003734.PE35")
+ getAnnot(trs$req[[1]]) -> annots
+ cat(annots, sep="\n")
 \end{Sinput}
 \begin{Soutput}
 FT   CDS             join(complement(153944..154157),complement(153727..153866),
@@ -749,9 +744,9 @@
  aacount <- table(getTrans(transspliced$req[[1]]))
  aacount <- aacount[order(aacount)]
  names(aacount) <- aaa(names(aacount))
- dotchart(aacount, pch = 19, xlab = "Stop and amino-acid counts", 
-     main = "There is only one stop codon in AE003734.PE35")
- abline(v = 1, lty = 2)
+ dotchart(aacount, pch = 19, xlab = "Stop and amino-acid counts",
+ main = "There is only one stop codon in AE003734.PE35")
+ abline(v=1, lty = 2)
 \end{Sinput}
 \end{Schunk}
 \includegraphics{../figs/getseqacnuc-transp4}
@@ -855,20 +850,20 @@
 This part was compiled under the following \Rlogo{}~environment:
 
 \begin{itemize}\raggedright
-  \item R version 2.10.0 (2009-10-26), \verb|i386-apple-darwin8.11.1|
-  \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/C/C|
+  \item R version 3.1.0 (2014-04-10), \verb|x86_64-apple-darwin13.1.0|
+  \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/fr_FR.UTF-8/fr_FR.UTF-8|
   \item Base packages: base, datasets, graphics, grDevices, grid,
     methods, stats, utils
-  \item Other packages: ade4~1.4-13, ape~2.4, grImport~0.4-4,
-    MASS~7.3-3, quadprog~1.4-11, seqinr~2.0-7, tseries~0.10-21,
-    XML~2.6-0, xtable~1.5-5, zoo~1.5-8
-  \item Loaded via a namespace (and not attached): gee~4.13-14,
-    lattice~0.17-26, nlme~3.1-96, tools~2.10.0
+  \item Other packages: ade4~1.6-2, ape~3.1-2, grImport~0.9-0,
+    MASS~7.3-31, seqinr~3.0-11, tseries~0.10-32, XML~3.98-1.1,
+    xtable~1.7-3
+  \item Loaded via a namespace (and not attached): lattice~0.20-29,
+    nlme~3.1-117, quadprog~1.5-5, tools~3.1.0, zoo~1.7-11
 \end{itemize}
 There were two compilation steps:
 
 \begin{itemize}
-  \item \Rlogo{} compilation time was: Thu Nov  5 12:36:14 2009
+  \item \Rlogo{} compilation time was: Fri Jun  6 18:11:38 2014
   \item \LaTeX{} compilation time was: \today
 \end{itemize}