[Seqinr-commits] r1801 - www/src/mainmatter

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jun 6 21:27:54 CEST 2014


Author: jeanlobry
Date: 2014-06-06 21:27:53 +0200 (Fri, 06 Jun 2014)
New Revision: 1801

Modified:
   www/src/mainmatter/nonparastats.rnw
   www/src/mainmatter/nonparastats.tex
Log:
Mouse chromosome 1 now as a single contig in Ensembl

Modified: www/src/mainmatter/nonparastats.rnw
===================================================================
--- www/src/mainmatter/nonparastats.rnw	2014-06-06 18:09:12 UTC (rev 1800)
+++ www/src/mainmatter/nonparastats.rnw	2014-06-06 19:27:53 UTC (rev 1801)
@@ -5,6 +5,7 @@
 \author{Palmeira, L. \and Lobry, J.R.}
 
 \begin{document}
+\SweaveOpts{concordance=TRUE}
 \SweaveInput{../config/commonrnw.rnw}
 \maketitle
 \tableofcontents
@@ -51,7 +52,8 @@
 (omega <- which(x))
 @ 
 
-With one exception, the statistics names are the same as in the ANALSEQ software.
+With one exception\footnote{for GC in the ANALSEQ software renamed as CC here}, 
+the statistics names are the same as in the ANALSEQ software.
 
 As a practical application, we want to study the isochore structure in \textit{Mus
 musculus} chromosome 1 using non-overlapping windows of 100 kb. Data were 
@@ -60,19 +62,17 @@
 %
 % Data saved in *.RData format to avoid long computation
 %
-<<getdatafomensemble,fig=F,eval=FALSE>>=
+<<getdatafomensemble,fig=F,eval=F>>=
 choosebank("ensembl")
-n <- 201
-res <- rep(-1,10*n)
-chr1 <- paste("MOUSE1_",1:n, sep = "")
-i <- 1
-for(frag in chr1){
-	myseq <- gfrag(frag, 1, 10^7)
-	for(w in seq(1, nchar(myseq), by = 10^5)){
-	  res[i] <- GC(s2c(substr(myseq, start = w, stop = w + 10^5 - 1)))
-          i <- i + 1
-  }
+n <- 196 # Mus musculus chromosome size in Mb (ceil of)
+myseq <- gfrag("MOUSE_1", 1, n*10^6)
+res <- rep(-1, 10*n)
+
+for(w in seq(1, nchar(myseq), by = 10^5)){
+  res[i] <- GC(s2c(substr(myseq, start = w, stop = w + 10^5 - 1)))
+  i <- i + 1
 }
+
 res <- res[res >= 0]
 res[res == 0] <- NA
 res <- 100*res
@@ -540,7 +540,7 @@
 chromosome from the Genome Reviews database. Let's use, for instance, 
 the following CDS:
 
-<<querycds,fig=F,eval=F>>=
+<<querycds,fig=F,eval=T>>=
 choosebank("greviews")
 query("coli","N=U00096_GR ET T=CDS ET K=2.3.1.79@")
 sequence <- getSequence(coli$req[[1]])

Modified: www/src/mainmatter/nonparastats.tex
===================================================================
--- www/src/mainmatter/nonparastats.tex	2014-06-06 18:09:12 UTC (rev 1800)
+++ www/src/mainmatter/nonparastats.tex	2014-06-06 19:27:53 UTC (rev 1801)
@@ -6,6 +6,7 @@
 
 \usepackage{Sweave}
 \begin{document}
+\input{nonparastats-concordance}
 %
 % To change the R input/output style:
 %
@@ -67,7 +68,7 @@
 
 \begin{Schunk}
 \begin{Sinput}
- (x <- rep(c(T, F), 10))
+ (x <- rep(c(T,F),10))
 \end{Sinput}
 \begin{Soutput}
  [1]  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE
@@ -108,7 +109,8 @@
 \end{Soutput}
 \end{Schunk}
 
-With one exception, the statistics names are the same as in the ANALSEQ software.
+With one exception\footnote{for GC in the ANALSEQ software renamed as CC here}, 
+the statistics names are the same as in the ANALSEQ software.
 
 As a practical application, we want to study the isochore structure in \textit{Mus
 musculus} chromosome 1 using non-overlapping windows of 100 kb. Data were 
@@ -120,21 +122,16 @@
 \begin{Schunk}
 \begin{Sinput}
  choosebank("ensembl")
- n <- 201
- res <- rep(-1, 10 * n)
- chr1 <- paste("MOUSE1_", 1:n, sep = "")
- i <- 1
- for (frag in chr1) {
-     myseq <- gfrag(frag, 1, 10^7)
-     for (w in seq(1, nchar(myseq), by = 10^5)) {
-         res[i] <- GC(s2c(substr(myseq, start = w, stop = w + 
-             10^5 - 1)))
-         i <- i + 1
-     }
+ n <- 196 # Mus musculus chromosome size in Mb (ceil of)
+ myseq <- gfrag("MOUSE_1", 1, n*10^6)
+ res <- rep(-1, 10*n)
+ for(w in seq(1, nchar(myseq), by = 10^5)){
+   res[i] <- GC(s2c(substr(myseq, start = w, stop = w + 10^5 - 1)))
+   i <- i + 1
  }
  res <- res[res >= 0]
  res[res == 0] <- NA
- res <- 100 * res
+ res <- 100*res
  closebank()
  save(res, file = "chr1.RData")
 \end{Sinput}
@@ -147,19 +144,20 @@
  load("chr1.RData")
  n <- length(res)
  xx <- seq_len(n)/10
- plot(xx, res, type = "l", las = 1, ylab = "G+C content [%]", 
-     main = "Isochores in mouse chromosome 1", xaxt = "n", 
-     xlab = "Position on the chromosome [Mb]")
- axis(1, at = seq(0, 200, by = 10))
+ plot(xx, res, type = "l", las = 1,
+ ylab = "G+C content [%]",
+ main = "Isochores in mouse chromosome 1", xaxt = "n",
+ xlab = "Position on the chromosome [Mb]")
+ axis(1, at = seq(0,200,by=10))
  breaks <- c(0, 37.5, 42.5, 47.5, 52.5, 100)
- lev <- cut(res, breaks = breaks, labels = c("darkblue", "blue", 
-     "yellow", "orange", "red"), ordered = T)
- segments(x0 = xx, y0 = min(res, na.rm = TRUE), x1 = xx, y1 = res, 
-     col = as.character(lev), lend = "butt")
- segments(x0 = xx[is.na(res)], y0 = min(res, na.rm = T), x1 = xx[is.na(res)], 
-     y1 = max(res, na.rm = T), col = grey(0.7))
+ cut(res, breaks = breaks, 
+ labels = c("darkblue", "blue", "yellow", "orange", "red"),
+ ordered=T) -> lev
+ segments(x0 = xx, y0 = min(res, na.rm = TRUE), x1=xx, y1=res,col=as.character(lev),lend="butt")
+ segments(x0 = xx[is.na(res)], y0 = min(res,na.rm=T), x1 = xx[is.na(res)], y1=max(res,na.rm=T),
+ col = grey(0.7))
  lines(xx, res)
- abline(h = breaks, lty = 3)
+ abline(h=breaks, lty = 3)
 \end{Sinput}
 \end{Schunk}
 \includegraphics[width=1.3\textwidth]{../figs/nonparastats-bernardiplot}
@@ -174,18 +172,17 @@
  n <- length(yy)
  xx <- seq_len(n)/10
  hline <- median(yy)
- plot(yy ~ xx, type = "n", axes = FALSE, ann = FALSE)
- polygon(c(xx[1], xx, xx[n]), c(min(yy), yy, min(yy)), col = "black", 
-     border = NA)
+ plot (yy ~ xx, type="n", axes=FALSE, ann=FALSE)
+ polygon(c(xx[1], xx, xx[n]), c(min(yy), yy, min(yy)), col = "black", border=NA)
  usr <- par("usr")
- rect(usr[1], usr[3], usr[2], hline, col = "white", border = NA)
+ rect(usr[1], usr[3], usr[2], hline, col="white", border=NA)
  lines(xx, yy)
- abline(h = hline)
+ abline (h=hline)
  box()
  axis(1)
- axis(2, las = 1)
+ axis(2, las = 1) 
  title(xlab = "Position on the chromosome [Mb]", ylab = "G+C content [%]", 
-     main = "Isochores in mouse chromosome 1")
+ main = "Isochores in mouse chromosome 1")
 \end{Sinput}
 \end{Schunk}
 \includegraphics[width=1.3\textwidth]{../figs/nonparastats-medianplot}
@@ -204,7 +201,7 @@
  tail(appli)
 \end{Sinput}
 \begin{Soutput}
-[1]  TRUE  TRUE  TRUE FALSE  TRUE FALSE
+[1]  TRUE  TRUE FALSE FALSE FALSE  TRUE
 \end{Soutput}
 \end{Schunk}
 
@@ -229,12 +226,12 @@
 
 \begin{Schunk}
 \begin{Sinput}
- SR <- function(bool, N = length(bool), M = sum(bool)) {
-     stopifnot(is.logical(bool))
-     SR <- sum(seq_len(N)[bool])
-     E <- M * (N + 1)/2
-     V <- M * (N + 1) * (N - M)/12
-     return(list(SR = SR, stat = (SR - E)/sqrt(V)))
+ SR <- function(bool, N = length(bool), M = sum(bool)){
+ 	stopifnot(is.logical(bool))
+ 	SR <- sum(seq_len(N)[bool])
+ 	E <- M*(N + 1)/2
+ 	V <- M*(N + 1)*(N - M)/12
+ 	return(list(SR = SR, stat = (SR - E)/sqrt(V)))
  }
  SR(s2c("**-***-----------") == "*")
 \end{Sinput}
@@ -264,7 +261,7 @@
 \begin{Sinput}
  SRh <- s2c("---------***--***") == "*"
  x <- seq_len(length(SRh))
- x[!SRh] <- -1 * x[!SRh]
+ x[!SRh] <- -1*x[!SRh]
  wilcox.test(x)$statistic
 \end{Sinput}
 \begin{Soutput}
@@ -280,14 +277,13 @@
 \begin{Sinput}
  m <- sum(SRh)
  n <- length(SRh) - m
- pdf <- dwilcox(x = 0:(n * m), m = m, n = n)
- plot(x = 0:(m * n) + m * (m + 1)/2, y = pdf, xlab = "Possible rank sums", 
-     ylab = "Density", main = paste("---------***--*** : N =", 
-         length(SRh), "M =", sum(SRh)), pch = 19)
- points(SR(SRh)$SR, dwilcox(x = SR(SRh)$SR - m * (m + 1)/2, 
-     m = m, n = n), col = "red", pch = 19)
- arrows(x0 = SR(SRh)$SR, y0 = 0.01, x1 = SR(SRh)$SR, y1 = 0.0015, 
-     length = 0.1)
+ dwilcox(x = 0:(n*m), m = m, n = n) -> pdf
+ plot(x = 0:(m*n) + m*(m+1)/2, y = pdf, xlab = "Possible rank sums",
+ ylab = "Density", main = paste("---------***--*** : N =", length(SRh), "M =", sum(SRh)),
+ pch = 19)
+ points(SR(SRh)$SR, dwilcox(x = SR(SRh)$SR - m*(m+1)/2, m = m, n = n), col = "red",
+ pch = 19)
+ arrows(x0 = SR(SRh)$SR, y0 = 0.01, x1 = SR(SRh)$SR, y1 = 0.0015, length = 0.1)
  text(SR(SRh)$SR, 0.01, "Observed\nvalue", pos = 3)
 \end{Sinput}
 \end{Schunk}
@@ -300,7 +296,7 @@
  SR(appli)$stat
 \end{Sinput}
 \begin{Soutput}
-[1] 10.08087
+[1] 10.73121
 \end{Soutput}
 \end{Schunk}
 
@@ -327,12 +323,12 @@
 
 \begin{Schunk}
 \begin{Sinput}
- VR <- function(bool, N = length(bool), M = sum(bool)) {
-     stopifnot(is.logical(bool))
-     VR <- sum((seq_len(N)[bool] - (N + 1)/2)^2)
-     E <- (M * (N + 1) * (N - 1))/12
-     V <- (M * (N - M) * (N + 1) * (N + 2) * (N - 2))/180
-     return(list(VR = VR, stat = (VR - E)/sqrt(V)))
+ VR <- function(bool, N = length(bool), M = sum(bool)){
+ 	stopifnot(is.logical(bool))
+ 	VR <- sum ((seq_len(N)[bool] - (N + 1)/2)^2)
+ 	E <- (M*(N + 1)*(N - 1))/12
+ 	V <- (M*(N - M)*(N + 1)*(N + 2)*(N - 2))/180
+ 	return(list(VR = VR, stat = (VR - E)/sqrt(V)))
  }
  VR(s2c("------****-------") == "*")
 \end{Sinput}
@@ -341,7 +337,7 @@
 [1] 6
 
 $stat
-[1] -2.337860
+[1] -2.33786
 \end{Soutput}
 \begin{Sinput}
  VR(s2c("***----------****") == "*")
@@ -361,10 +357,9 @@
 \begin{Schunk}
 \begin{Sinput}
  VRh <- s2c("***----------****") == "*"
- simVR <- replicate(5000, VR(sample(VRh))$VR)
- hist(simVR, col = grey(0.7), main = paste("***----------**** : N =", 
-     length(VRh), "M =", sum(VRh)), xlab = "Possible rank variances", 
-     proba = TRUE)
+ replicate(5000, VR(sample(VRh))$VR) -> simVR
+ hist(simVR, col = grey(0.7), main = paste("***----------**** : N =",
+ length(VRh), "M =", sum(VRh)), xlab = "Possible rank variances", proba = TRUE)
  lines(density(simVR), lwd = 2)
  arrows(VR(VRh)$VR, 0.004, VR(VRh)$VR, 0, le = 0.1)
 \end{Sinput}
@@ -378,7 +373,7 @@
  VR(appli)$stat
 \end{Sinput}
 \begin{Soutput}
-[1] 4.618334
+[1] 4.43128
 \end{Soutput}
 \end{Schunk}
 
@@ -428,17 +423,16 @@
 
 \begin{Schunk}
 \begin{Sinput}
- CC <- function(bool, N = length(bool), M = sum(bool)) {
-     stopifnot(is.logical(bool))
-     C <- median(seq_len(N)[bool])
-     GC <- sum(abs(seq_len(N)[bool] - C))
-     E <- ((N + 1) * floor(M/2) * floor((M + 1)/2))/(M + 1)
-     if (M%%2 == 1) 
-         V <- ((M - 1) * (M + 3) * (N + 1) * (N - M))/(48 * 
-             (M + 2))
-     else V <- (M * (N + 1) * (N - M) * (M^2 + 2 * M + 4))/(48 * 
-         (M + 1)^2)
-     return(list(GC = GC, stat = (GC - E)/sqrt(V)))
+ CC <- function(bool, N = length(bool), M = sum(bool)){
+ 	stopifnot(is.logical(bool))
+ 	C <- median(seq_len(N)[bool])
+ 	GC <- sum(abs(seq_len(N)[bool] - C))
+ 	E <- ((N + 1)*floor(M/2)*floor((M + 1)/2))/(M + 1)
+ 	if(M %% 2 == 1)
+ 	  V <- ((M - 1)*(M + 3)*(N + 1)*(N - M))/(48*(M + 2))
+ 	else
+ 	  V <- (M*(N + 1)*(N - M)*(M^2 + 2*M + 4))/(48*(M + 1)^2)
+ 	return(list(GC = GC, stat = (GC - E)/sqrt(V)))
  }
  CC(s2c("---*****---------") == "*")
 \end{Sinput}
@@ -468,7 +462,7 @@
  CC(appli)$stat
 \end{Sinput}
 \begin{Soutput}
-[1] 3.748402
+[1] 3.25226
 \end{Soutput}
 \end{Schunk}
 
@@ -493,13 +487,13 @@
 
 \begin{Schunk}
 \begin{Sinput}
- NS <- function(bool, N = length(bool), M = sum(bool)) {
-     stopifnot(is.logical(bool))
-     NS <- length(rle(bool)$lengths)
-     DMNmM <- 2 * M * (N - M)
-     E <- DMNmM/N + 1
-     V <- (DMNmM * (DMNmM - N))/(N * N * (N - 1))
-     return(list(NS = NS, stat = (NS - E)/sqrt(V)))
+ NS <- function(bool, N = length(bool), M = sum(bool)){
+ 	stopifnot(is.logical(bool))
+ 	NS <- length(rle(bool)$lengths)
+ 	DMNmM <- 2*M*(N - M)
+ 	E <- DMNmM/N + 1
+ 	V <- (DMNmM*(DMNmM - N))/(N*N*(N - 1))
+ 	return(list(NS = NS, stat = (NS - E)/sqrt(V)))
  }
  NS(s2c("--***---***--***-") == "*")
 \end{Sinput}
@@ -528,7 +522,7 @@
 \begin{Schunk}
 \begin{Sinput}
  library(tseries)
- NSh <- s2c("-*-*-*-*-*-*-*-*-") == "*"
+ s2c("-*-*-*-*-*-*-*-*-") == "*" -> NSh
  tseries::runs.test(as.factor(NSh))$statistic
 \end{Sinput}
 \begin{Soutput}
@@ -544,7 +538,7 @@
  NS(appli)$stat
 \end{Sinput}
 \begin{Soutput}
-[1] -33.75721
+[1] -34.25156
 \end{Soutput}
 \end{Schunk}
 
@@ -580,24 +574,22 @@
 
 \begin{Schunk}
 \begin{Sinput}
- GM <- function(bool, N = length(bool), M = sum(bool)) {
-     stopifnot(is.logical(bool))
-     XGM <- (N - M)/(M + 1)
-     LS0 <- GM <- 0
-     for (i in seq_len(N)) {
-         if (bool[i]) {
-             GM <- GM + (LS0 - XGM)^2
-             LS0 <- 0
-         }
-         else {
-             LS0 <- LS0 + 1
-         }
-     }
-     GM <- (GM + (LS0 - XGM)^2)/M
-     E <- ((N + 1) * (N - M))/((M + 1) * (M + 2))
-     V <- (4 * (N - M - 1) * (N + 1) * (N + 2) * (N - M))/(M * 
-         (M + 2)^2 * (M + 3) * (M + 4))
-     return(list(GM = GM, stat = (GM - E)/sqrt(V)))
+ GM <-function(bool, N = length(bool), M = sum(bool)){
+ 	stopifnot(is.logical(bool))
+ 	XGM <- (N - M)/(M + 1)
+ 	LS0 <- GM <- 0
+ 	for(i in seq_len(N)){
+ 		if(bool[i]){
+ 			GM <- GM + (LS0 - XGM)^2
+ 			LS0 <- 0
+ 		} else {
+ 			LS0 <- LS0 + 1
+ 		}
+ 	}
+ 	GM <- (GM + (LS0 - XGM)^2)/M
+ 	E <- ((N + 1)*(N - M))/((M +1)*(M + 2))
+   V <- (4*(N - M - 1)*(N + 1)*(N + 2)*(N - M))/(M*(M + 2)^2*(M + 3)*(M + 4))
+ 	return(list(GM = GM, stat = (GM - E)/sqrt(V)))
  }
  GM(s2c("-*-*-*-*-*-*-*-*-") == "*")
 \end{Sinput}
@@ -627,7 +619,7 @@
  GM(appli)$stat
 \end{Sinput}
 \begin{Soutput}
-[1] 301.4908
+[1] 353.0584
 \end{Soutput}
 \end{Schunk}
 
@@ -675,21 +667,20 @@
  n <- 500
  di <- 4
  lseq <- 6000
- rhoseq <- replicate(n, rho(sample(s2c("acgt"), size = lseq, 
-     replace = TRUE)))
- x <- seq(min(rhoseq[di, ]), max(rhoseq[di, ]), length.out = 1000)
- y <- dnorm(x, mean = mean(rhoseq[di, ]), sd = sd(rhoseq[di, 
-     ]))
- histo <- hist(rhoseq[di, ], plot = FALSE)
- plot(histo, freq = FALSE, xlab = expression(paste(rho, " statistic")), 
-     main = paste("Distribution for dinucleotide", toupper(labels(rhoseq)[[1]][di]), 
-         "on", n, "random sequences"), las = 1, col = grey(0.8), 
-     border = grey(0.5), ylim = c(0, max(c(y, histo$density))))
+ rhoseq <- replicate(n, rho(sample(s2c('acgt'), size = lseq, replace = TRUE)))
+ x <- seq(min(rhoseq[di,]), max(rhoseq[di,]), length.out = 1000)
+ y <- dnorm(x, mean = mean(rhoseq[di,]), sd = sd(rhoseq[di,]))
+ histo <- hist(rhoseq[di,], plot = FALSE)
+ plot(histo, freq = FALSE, xlab = expression(paste(rho, " statistic")),
+   main = paste("Distribution for dinucleotide", 
+   toupper(labels(rhoseq)[[1]][di]), "on", n, "random sequences"),
+   las = 1, col = grey(0.8), border = grey(0.5),
+   ylim = c(0, max(c(y, histo$density))))
  lines(x, y, lty = 1, col = "red")
  abline(v = 1, lty = 3, col = "blue", lwd = 2)
- legend("topleft", inset = 0.01, legend = c("normal fit", expression(paste(rho, 
-     " = 1"))), lty = c(1, 3), col = c("red", "blue"), lwd = c(1, 
-     2))
+ legend("topleft", inset = 0.01, legend = c("normal fit", 
+   expression(paste(rho, " = 1"))), lty = c(1, 3),
+   col = c("red", "blue"), lwd = c(1, 2))
 \end{Sinput}
 \end{Schunk}
 
@@ -755,7 +746,7 @@
 \begin{Schunk}
 \begin{Sinput}
  choosebank("greviews")
- query("coli", "N=U00096_GR ET T=CDS ET K=2.3.1.79@")
+ query("coli","N=U00096_GR ET T=CDS ET K=2.3.1.79@")
  sequence <- getSequence(coli$req[[1]])
  annot <- getAnnot(coli$req[[1]])
  closebank()
@@ -766,39 +757,40 @@
 \begin{Schunk}
 \begin{Sinput}
  load("annot.RData")
- cat(annot, sep = "\n")
+ cat(annot, sep="\n")
 \end{Sinput}
 \begin{Soutput}
 FT   CDS             complement(478591..479142)
 FT                   /codon_start=1
-FT                   /evidence="1: Evidence at protein level 
-FT                   {UniProtKB/Swiss-Prot:P77791}"
-FT                   /gene_id="IGI00122745"
-FT                   /gene_name="maa {UniProtKB/Swiss-Prot:P77791}"
+FT                   /evidence="1: Evidence at protein level "
+FT                   /gene_id="IGI28831209"
+FT                   /gene_name="maa"
 FT                   /gene_synonym="ECK0453"
-FT                   /gene_synonym="ylaD {UniProtKB/Swiss-Prot:P77791}"
-FT                   /locus_tag="b0459 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /product="Maltose O-acetyltransferase 
-FT                   {UniProtKB/Swiss-Prot:P77791}"
-FT                   /EC_number="2.3.1.79 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /function="maltose O-acetyltransferase activity 
-FT                   {GO:0008925}"
-FT                   /protein_id="AAC73561.1 {EMBL:U00096}"
-FT                   /db_xref="EMBL:AAB40214.1 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="EMBL:CAA11147.1 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="EcoGene:EG14239 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="GO:0008925 {GOA:P77791}"
-FT                   /db_xref="HOGENOM:HBG282173 {HogenProt:P77791}"
-FT                   /db_xref="HOGENOM:P77791 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="InterPro:IPR001451 {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="PDB:1OCX {UniProtKB/Swiss-Prot:P77791}"
-FT                   /db_xref="UniParc:UPI000002EA96 {EMBL:AAC73561}"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P77791 {EMBL:U00096}"
+FT                   /gene_synonym="ylaD"
+FT                   /locus_tag="b0459"
+FT                   /product="Maltose O-acetyltransferase "
+FT                   /EC_number="2.3.1.79"
+FT                   /function="maltose O-acetyltransferase activity "
+FT                   /protein_id="AAC73561.1"
+FT                   /db_xref="EMBL:AAB40214.1"
+FT                   /db_xref="EMBL:CAA11147.1"
+FT                   /db_xref="EcoGene:EG14239"
+FT                   /db_xref="GO:0008925"
+FT                   /db_xref="HOGENOM:HBG282173"
+FT                   /db_xref="HOGENOM:HBG699746"
+FT                   /db_xref="InterPro:IPR001451"
+FT                   /db_xref="InterPro:IPR011004"
+FT                   /db_xref="InterPro:IPR018357"
+FT                   /db_xref="PDB:1OCX"
+FT                   /db_xref="UniParc:UPI000002EA96"
+FT                   /db_xref="UniProtKB/Swiss-Prot:P77791"
 FT                   /transl_table=11
 FT                   /translation="MSTEKEKMIAGELYRSADETLSRDRLRARQLIHRYNHSLAEEHTL
 FT                   RQQILADLFGQVTEAYIEPTFRCDYGYNIFLGNNFFANFDCVMLDVCPIRIGDNCMLAP
 FT                   GVHIYTATHPIDPVARNSGAELGKPVTIGNNVWIGGRAVINPGVTIGDNVVVASGAVVT
 FT                   KDVPDNVVVGGNPARIIKKL"
+FT                   /%(C+G)="CG<50%"
+FT                   /note="C+G content in third codon positions = 47.8 % "
 \end{Soutput}
 \end{Schunk}
 
@@ -828,25 +820,26 @@
 \begin{Sinput}
  load("sequence.RData")
  rhocoli <- rho(sequence)
- zcolibase <- zscore(sequence, model = "base", exact = TRUE)
- zcolicodon <- zscore(sequence, model = "codon", exact = TRUE)
- par(mfrow = c(3, 1), lend = "butt", oma = c(0, 0, 2, 0), mar = c(3, 
-     4, 0, 2))
+ zcolibase <- zscore(sequence, model = 'base', exact = TRUE)
+ zcolicodon <- zscore(sequence,model = 'codon', exact = TRUE)
+ par(mfrow = c(3, 1), lend = "butt", oma = c(0,0,2,0), mar = c(3,4,0,2))
  col <- c("green", "blue", "orange", "red")
- plot(rhocoli - 1, ylim = c(-0.5, 0.5), las = 1, ylab = expression(rho), 
-     lwd = 10, xaxt = "n", col = col)
+ plot(rhocoli - 1, ylim = c(-0.5,0.5), las = 1, 
+   ylab = expression(rho), lwd = 10, xaxt = "n",
+   col = col)
  axis(1, at = 1:16, labels = toupper(words(2)))
  abline(h = 0)
- plot(zcolibase, ylim = c(-2.5, 2.5), las = 1, ylab = "zscore with base model", 
-     lwd = 10, xaxt = "n", col = col)
+ plot(zcolibase, ylim = c(-2.5,2.5), las = 1, 
+   ylab = "zscore with base model", lwd = 10, xaxt = "n",
+   col = col)
  axis(1, at = 1:16, labels = toupper(words(2)))
  abline(h = 0)
- plot(zcolicodon, ylim = c(-2.5, 2.5), las = 1, ylab = "zscore with codon model", 
-     lwd = 10, xaxt = "n", col = col)
+ plot(zcolicodon, ylim = c(-2.5,2.5), las = 1, 
+   ylab = "zscore with codon model", lwd = 10, xaxt = "n",
+   col = col)
  axis(1, at = 1:16, labels = toupper(words(2)))
  abline(h = 0)
- mtext("Comparison of the three statistics", outer = TRUE, 
-     cex = 1.5)
+ mtext("Comparison of the three statistics", outer = TRUE, cex = 1.5)
 \end{Sinput}
 \end{Schunk}
 
@@ -940,42 +933,42 @@
 
 \begin{Schunk}
 \begin{Sinput}
- worstcase <- function(gc) {
-     c <- gc
-     t <- (1 - gc)
-     (0.59 * t * t + 0.34 * t * c + 0.07 * c * c)/2
+ worstcase <- function(gc){
+   c <- gc
+   t <- (1-gc)
+   (0.59*t*t + 0.34*t*c + 0.07*c*c)/2
  }
- randomcase <- function(gc) {
-     c <- gc/2
-     t <- (1 - gc)/2
-     0.59 * t * t + 0.34 * t * c + 0.07 * c * c
+ randomcase <- function(gc){
+   c <- gc/2
+   t <- (1-gc)/2
+   0.59*t*t + 0.34*t*c + 0.07*c*c
  }
- bestcase <- function(gc) {
-     c <- (gc)/2
-     t <- (1 - gc)/2
-     if ((c + t) <= 0.5) {
-         0
-     }
-     else {
-         c <- (c + t - 0.5)/2
-         t <- (c + t - 0.5)/2
-         0.59 * t * t + 0.34 * t * c + 0.07 * c * c
-     }
+ bestcase <- function(gc){
+   c <- (gc)/2
+   t <- (1-gc)/2
+   if ((c + t) <= 0.5){
+     0
+   } else {
+   c <- (c + t - 0.5)/2
+   t <- (c + t - 0.5)/2
+   0.59*t*t + 0.34*t*c + 0.07*c*c
+   }
  }
  xval <- seq(from = 0, to = 100, length = 100)
- yrand <- sapply(xval/100, randomcase)
- yworst <- sapply(xval/100, worstcase)
- ybest <- sapply(xval/100, bestcase)
- plot(xval, 100 * yworst, las = 1, type = "l", lwd = 2, lty = 1, 
-     xlab = "G+C content [%]", ylab = "Phototargets weighted density [%]", 
-     main = "Estimated as in Escherichia coli chromosome", 
-     ylim = c(0, max(100 * yworst)))
- points(xval, 100 * yrand, type = "l", lwd = 2, lty = 2)
- points(xval, 100 * ybest, type = "l", lwd = 2, lty = 3)
- abline(v = c(25, 75), lty = 2)
- arrows(25, 25, 75, 25, code = 1, le = 0.1)
- arrows(25, 25, 75, 25, code = 2, le = 0.1)
- text(50, 25, "Biological range", pos = 3)
+ sapply(xval/100, randomcase) -> yrand
+ sapply(xval/100, worstcase) -> yworst
+ sapply(xval/100, bestcase) -> ybest
+ plot(xval, 100*yworst, las = 1, type = "l", lwd = 2, lty = 1 ,
+ xlab = "G+C content [%]",
+ ylab = "Phototargets weighted density [%]",
+ main = "Estimated as in Escherichia coli chromosome",
+ ylim = c(0, max(100*yworst)))
+ points(xval,100*yrand,type='l',lwd=2,lty=2)
+ points(xval,100*ybest,type='l',lwd=2,lty=3)
+ abline(v=c(25,75),lty=2)
+ arrows(25, 25, 75, 25, code = 1,le = 0.1)
+ arrows(25, 25, 75, 25,code = 2,le = 0.1)
+ text(50,25,"Biological range",pos=3)
 \end{Sinput}
 \end{Schunk}
 
@@ -1010,42 +1003,42 @@
 
 \begin{Schunk}
 \begin{Sinput}
- worstcase <- function(gc) {
-     c <- gc
-     t <- (1 - gc)
-     (0.19 * t * t + 0.55 * t * c + 0.26 * c * c)/2
+ worstcase <- function(gc){
+   c <- gc
+   t <- (1-gc)
+   (0.19*t*t + 0.55*t*c + 0.26*c*c)/2
  }
- randomcase <- function(gc) {
-     c <- gc/2
-     t <- (1 - gc)/2
-     0.19 * t * t + 0.55 * t * c + 0.26 * c * c
+ randomcase <- function(gc){
+   c <- gc/2
+   t <- (1-gc)/2
+   0.19*t*t + 0.55*t*c + 0.26*c*c
  }
- bestcase <- function(gc) {
-     c <- (gc)/2
-     t <- (1 - gc)/2
-     if ((c + t) <= 0.5) {
-         0
-     }
-     else {
-         c <- (c + t - 0.5)/2
-         t <- (c + t - 0.5)/2
-         0.19 * t * t + 0.55 * t * c + 0.26 * c * c
-     }
+ bestcase <- function(gc){
+   c <- (gc)/2
+   t <- (1-gc)/2
+   if ((c + t) <= 0.5){
+     0
+   } else {
+   c <- (c + t - 0.5)/2
+   t <- (c + t - 0.5)/2
+   0.19*t*t + 0.55*t*c + 0.26*c*c
+   }
  }
  xval <- seq(from = 0, to = 100, length = 100)
- yrand <- sapply(xval/100, randomcase)
- yworst <- sapply(xval/100, worstcase)
- ybest <- sapply(xval/100, bestcase)
- plot(xval, 100 * yworst, las = 1, type = "l", lwd = 2, lty = 1, 
-     xlab = "G+C content [%]", ylab = "Phototargets weighted density [%]", 
-     main = "Estimated as in Micrococcus lysodeikticus chromosome", 
-     ylim = c(0, max(100 * yworst)))
- points(xval, 100 * yrand, type = "l", lwd = 2, lty = 2)
- points(xval, 100 * ybest, type = "l", lwd = 2, lty = 3)
- abline(v = c(25, 75), lty = 2)
- arrows(25, 25, 75, 25, code = 1, le = 0.1)
- arrows(25, 25, 75, 25, code = 2, le = 0.1)
- text(50, 25, "Biological range", pos = 3)
+ sapply(xval/100, randomcase) -> yrand
+ sapply(xval/100, worstcase) -> yworst
+ sapply(xval/100, bestcase) -> ybest
+ plot(xval, 100*yworst, las = 1, type = "l", lwd = 2, lty = 1 ,
+ xlab = "G+C content [%]",
+ ylab = "Phototargets weighted density [%]",
+ main = "Estimated as in Micrococcus lysodeikticus chromosome",
+ ylim = c(0, max(100*yworst)))
+ points(xval,100*yrand,type='l',lwd=2,lty=2)
+ points(xval,100*ybest,type='l',lwd=2,lty=3)
+ abline(v=c(25,75),lty=2)
+ arrows(25, 25, 75, 25, code = 1,le = 0.1)
+ arrows(25, 25, 75, 25,code = 2,le = 0.1)
+ text(50,25,"Biological range",pos=3)
 \end{Sinput}
 \end{Schunk}
 
@@ -1093,28 +1086,26 @@
 \begin{Schunk}
 \begin{Sinput}
  data(dinucl)
- par(mfrow = c(2, 2), mar = c(4, 4, 0.5, 0.5) + 0.1)
- myplot <- function(x) {
-     plot(dinucl$intergenic[, x], dinucl$coding[, x], xlab = "intergenic", 
-         ylab = "coding", las = 1, ylim = c(-6, 4), xlim = c(-3, 
-             3), cex = 0)
-     rect(-10, -10, -1.96, 10, col = "yellow", border = "yellow")
-     rect(1.96, -10, 10, 10, col = "yellow", border = "yellow")
-     rect(-10, -10, 10, -1.96, col = "yellow", border = "yellow")
-     rect(-10, 1.96, 10, 10, col = "yellow", border = "yellow")
-     abline(v = 0, lty = 3)
-     abline(h = 0, lty = 3)
-     abline(h = -1.96, lty = 2)
-     abline(h = +1.96, lty = 2)
-     abline(v = -1.96, lty = 2)
-     abline(v = +1.96, lty = 2)
-     points(dinucl$intergenic[, x], dinucl$coding[, x], pch = 21, 
-         col = rgb(0.1, 0.1, 0.1, 0.5), bg = rgb(0.5, 0.5, 
-             0.5, 0.5))
-     legend("bottomright", inset = 0.02, legend = paste(substr(x, 
-         1, 1), "p", substr(x, 2, 2), " bias", sep = ""), cex = 1.25, 
-         bg = "white")
-     box()
+ par(mfrow = c(2, 2), mar = c(4,4,0.5,0.5)+0.1)
+ myplot <- function(x){
+   plot(dinucl$intergenic[, x], dinucl$coding[, x],
+   xlab = "intergenic", ylab = "coding", 
+   las = 1, ylim = c(-6, 4), 
+   xlim = c(-3, 3), cex = 0)
+   rect(-10,-10,-1.96,10,col="yellow", border = "yellow")
+   rect(1.96,-10,10,10,col="yellow", border = "yellow")
+   rect(-10,-10,10,-1.96,col="yellow", border = "yellow")
+   rect(-10,1.96,10,10,col="yellow", border = "yellow")
+   abline(v=0,lty=3)
+   abline(h=0,lty=3)
+   abline(h=-1.96,lty=2)
+   abline(h=+1.96,lty=2)
+   abline(v=-1.96,lty=2)
+   abline(v=+1.96,lty=2)
+   points(dinucl$intergenic[, x], dinucl$coding[, x], pch = 21,
+   col = rgb(.1,.1,.1,.5), bg = rgb(.5,.5,.5,.5))
+   legend("bottomright", inset = 0.02, legend = paste(substr(x,1,1), "p", substr(x,2,2), " bias", sep = ""), cex = 1.25, bg = "white")
+   box()
  }
  myplot("CT")
  myplot("TC")
@@ -1170,20 +1161,20 @@
 \begin{Schunk}
 \begin{Sinput}
  data(prochlo)
- oneplot <- function(x) {
-     plot(density(prochlo$BX548174[, x]), ylim = c(0, 0.4), 
-         xlim = c(-4, 4), lty = 3, main = paste(substr(x, 1, 
-             1), "p", substr(x, 2, 2), " bias", sep = ""), 
-         xlab = "", ylab = "", las = 1, type = "n")
-     rect(-10, -1, -1.96, 10, col = "yellow", border = "yellow")
-     rect(1.96, -1, 10, 10, col = "yellow", border = "yellow")
-     lines(density(prochlo$BX548174[, x]), lty = 3)
-     lines(density(prochlo$AE017126[, x]), lty = 2)
-     lines(density(prochlo$BX548175[, x]), lty = 1)
-     abline(v = c(-1.96, 1.96), lty = 5)
-     box()
+ oneplot <- function(x){
+   plot(density(prochlo$BX548174[, x]),
+     ylim = c(0,0.4), xlim = c(-4,4), lty=3,
+     main = paste(substr(x,1,1), "p", substr(x,2,2), " bias", sep = ""),
+     xlab="",ylab="",las=1, type = "n")
+   rect(-10,-1,-1.96,10, col = "yellow", border = "yellow")
+   rect(1.96,-1,10,10, col = "yellow", border = "yellow")
+   lines(density(prochlo$BX548174[, x]),lty=3)
+   lines(density(prochlo$AE017126[, x]),lty=2)
+   lines(density(prochlo$BX548175[, x]),lty=1)
+   abline(v=c(-1.96,1.96),lty=5)
+   box()
  }
- par(mfrow = c(2, 2), mar = c(2, 3, 2, 0.5) + 0.1)
+ par(mfrow=c(2,2),mar=c(2,3,2,0.5) + 0.1)
  oneplot("CT")
  oneplot("TC")
  oneplot("CC")
@@ -1225,47 +1216,47 @@
 %
 \begin{Schunk}
 \begin{Sinput}
- data(prochlo)
- par(oma = c(0, 0, 3, 0), mfrow = c(1, 2), mar = c(5, 4, 0, 
-     0), cex = 1.5)
- example(waterabs, ask = FALSE)
- abline(v = 260, lwd = 2, col = "red")
- par(mar = c(5, 0, 0, 2))
- plot(seq(-5, 3, by = 1), seq(0, 150, length = 9), col = "white", 
+ data(prochlo)  
+   par(oma = c(0, 0, 3, 0), mfrow = c(1, 2), mar = c(5, 4, 0, 0), cex = 1.5)
+   example(waterabs, ask = FALSE) #left figure
+   abline(v=260, lwd = 2, col = "red")
+   par(mar = c(5, 0, 0, 2))
+   plot(seq(-5, 3, by = 1), seq(0, 150, length = 9), col = "white", 
      ann = FALSE, axes = FALSE, xaxs = "i", yaxs = "i")
- axis(1, at = c(-1.96, 0, 1.96), labels = c(-1.96, 0, 1.96))
- lines(rep(-1.96, 2), c(0, 150), lty = 2)
- lines(rep(1.96, 2), c(0, 150), lty = 2)
- title(xlab = "zscore distribution", cex = 1.5, adj = 0.65)
- selcol <- c(6, 8, 14, 16)
- z5 <- prochlo$BX548174[, selcol]
- z120 <- prochlo$AE017126[, selcol]
- z135 <- prochlo$BX548175[, selcol]
- todo <- function(who, xx, col = "black", bottom, loupe) {
-     dst <- density(who[, xx])
-     sel <- which(dst$x >= -3)
-     lines(dst$x[sel], dst$y[sel] * loupe + (bottom), col = col)
- }
- todo2 <- function(who, bottom, loupe) {
+   axis(1, at = c(-1.96, 0, 1.96), labels = c(-1.96, 0, 1.96))
+   lines(rep(-1.96, 2),c(0, 150),lty=2)
+   lines(rep(1.96, 2), c(0, 150),lty=2)
+   title(xlab = "zscore distribution", cex = 1.5, adj = 0.65)
+   selcol <- c(6, 8, 14, 16)
+   z5 <- prochlo$BX548174[, selcol]
+   z120 <- prochlo$AE017126[, selcol]
+   z135 <- prochlo$BX548175[, selcol]
+   todo <- function(who, xx, col = "black", bottom, loupe){
+   	dst <- density(who[, xx])
+   	sel <- which(dst$x >= -3)
+     	lines(dst$x[sel], dst$y[sel]*loupe + (bottom), col = col)
+   }
+   todo2 <- function(who, bottom, loupe){
      todo(who, "CC", "blue", bottom, loupe)
      todo(who, "CT", "red", bottom, loupe)
      todo(who, "TC", "green", bottom, loupe)
      todo(who, "TT", "black", bottom, loupe)
- }
- todo3 <- function(bottom, who, leg, loupe = 90) {
-     lines(c(-5, -3), c(150 - leg, bottom + 20))
-     rect(-3, bottom, 3, bottom + 40)
-     text(-2.6, bottom + 38, paste(leg, "m"))
+   }
+   todo3 <- function(bottom, who, leg, loupe = 90){
+     lines(c(-5,-3), c(150 - leg, bottom + 20))
+     rect(-3,bottom,3,bottom+40)
+     text(-2.6,bottom+38, paste(leg, "m"))
      todo2(who, bottom, loupe)
- }
- todo3(bottom = 110, who = z5, leg = 5)
- todo3(bottom = 50, who = z120, leg = 120)
- todo3(bottom = 5, who = z135, leg = 135)
- legend(-4.5, 110, c("CpC", "CpT", "TpC", "TpT"), lty = 1, 
-     pt.cex = cex, col = c("blue", "red", "green", "black"))
- mtext(expression(paste("Dinucleotide composition for three ", 
-     italic("Prochlorococcus marinus"), " ecotypes")), outer = TRUE, 
-     cex = 2, line = 1)
+   }
+        todo3(bottom = 110, who = z5, leg = 5)
+        todo3(bottom = 50, who = z120, leg = 120)
+        todo3(bottom = 5, who = z135, leg = 135)
+        legend(-4.5,110,c('CpC','CpT','TpC','TpT'),lty=1,pt.cex=cex,
+          col=c('blue','red','green','black'))
+        mtext(expression(paste("Dinucleotide composition for three ", 
+          italic("Prochlorococcus marinus")," ecotypes")), outer = TRUE, cex = 2,
+  line = 1)
+ 
 \end{Sinput}
 \end{Schunk}
 
@@ -1305,20 +1296,20 @@
 This part was compiled under the following \Rlogo{}~environment:
 
 \begin{itemize}\raggedright
-  \item R version 2.10.0 (2009-10-26), \verb|i386-apple-darwin8.11.1|
-  \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/C/C|
+  \item R version 3.1.0 (2014-04-10), \verb|x86_64-apple-darwin13.1.0|
+  \item Locale: \verb|fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/fr_FR.UTF-8/fr_FR.UTF-8|
   \item Base packages: base, datasets, graphics, grDevices, grid,
     methods, stats, utils
-  \item Other packages: ade4~1.4-13, ape~2.4, grImport~0.4-4,
-    MASS~7.3-3, quadprog~1.4-11, seqinr~2.0-7, tseries~0.10-21,
-    XML~2.6-0, xtable~1.5-5, zoo~1.5-8
-  \item Loaded via a namespace (and not attached): gee~4.13-14,
-    lattice~0.17-26, nlme~3.1-96, tools~2.10.0
+  \item Other packages: ade4~1.6-2, ape~3.1-2, grImport~0.9-0,
+    MASS~7.3-31, seqinr~3.0-11, tseries~0.10-32, XML~3.98-1.1,
+    xtable~1.7-3
+  \item Loaded via a namespace (and not attached): lattice~0.20-29,
+    nlme~3.1-117, quadprog~1.5-5, tools~3.1.0, zoo~1.7-11
 \end{itemize}
 There were two compilation steps:
 
 \begin{itemize}
-  \item \Rlogo{} compilation time was: Thu Nov  5 14:48:01 2009
+  \item \Rlogo{} compilation time was: Fri Jun  6 21:25:11 2014
   \item \LaTeX{} compilation time was: \today
 \end{itemize}
 



More information about the Seqinr-commits mailing list