[Seqinr-commits] r1761 - in pkg: . R inst/sequences man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 20 14:59:55 CEST 2011


Author: simonpenel
Date: 2011-09-20 14:59:55 +0200 (Tue, 20 Sep 2011)
New Revision: 1761

Added:
   pkg/R/draw.recstat.R
   pkg/R/recstat.R
   pkg/R/test.co.recstat.R
   pkg/R/test.li.recstat.R
   pkg/inst/sequences/ECOUNC.fsa
   pkg/man/draw.recstat.Rd
   pkg/man/recstat.Rd
   pkg/man/test.co.recstat.Rd
   pkg/man/test.li.recstat.Rd
Modified:
   pkg/DESCRIPTION
Log:
Addition of the "recstat" function (Prediction of coding DNA sequences through the use of a  Correspondence Analysis). Guy Perriere is now part of the seqinr project.


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2011-05-30 14:37:05 UTC (rev 1760)
+++ pkg/DESCRIPTION	2011-09-20 12:59:55 UTC (rev 1761)
@@ -1,9 +1,9 @@
 Package: seqinr
-Version: 3.0-3
-Date: 2011-04-05
+Version: 3.0-4
+Date: 2011-09-20
 Title: Biological Sequences Retrieval and Analysis 
-Author: Delphine Charif and Jean R. Lobry and Anamaria Necsulea and Leonor Palmeira and Simon Penel
-Maintainer: Simon Penel <penel at biomserv.univ-lyon1.fr>
+Author: Delphine Charif and Jean R. Lobry and Anamaria Necsulea and Leonor Palmeira and Simon Penel and Guy Perriere
+Maintainer: Simon Penel <simon.penel at univ-lyon1.fr>
 Depends: R (>= 2.10.0)
 Suggests: ade4, segmented
 Description: Exploratory data analysis and data visualization

Added: pkg/R/draw.recstat.R
===================================================================
--- pkg/R/draw.recstat.R	                        (rev 0)
+++ pkg/R/draw.recstat.R	2011-09-20 12:59:55 UTC (rev 1761)
@@ -0,0 +1,185 @@
+##
+# This function draws two graphics, one of the CA of a DNA sequence, and one of
+# start/stop codons positions in the three reading frames. This for the direct or 
+# the reverse strand.
+##
+#v.18.08.2011
+draw.recstat <- function(rec, fac = 1, direct = TRUE, xlim = c(1, seqsize), col = c("red", "blue", "purple"))
+{
+    if (fac < 0 | 4 < fac)
+    { # test if factor is between 1 and 4
+        print("Factor number is not in 1:4.")
+        return()
+    }
+    seq <- rec[[1]] # recovery of elements of list rec
+    sizewin <- rec[[2]]
+    shift <- rec[[3]]
+    seqsize <- rec[[4]]
+    seqname <- rec[[5]]
+    vstopd <- rec[[8]]
+    vstopr <- rec[[9]]
+    vinitd <- rec[[10]]
+    vinitr <- rec[[11]]
+    recd <- rec[[14]]
+    recr <- rec[[15]]
+    if (xlim[1] < 1 | xlim[1] > seqsize)
+    {
+        xlim <- c(1, xlim[2])
+    }
+    if (seqsize < xlim[2] | 1 > xlim[2])
+    {
+        xlim <- c(xlim[1], seqsize)
+    }
+    par(mfrow = c(2, 1), mar = c(0, 4, 4, 2) + 0.1) # division of the window for a closer between plots
+    par(xaxs = "i")
+    seqisize <- floor((dim(recd$li)[1])/3) # number of window by reading frame, we take the integer part
+    if ((dim(recd$li)[1])%%3 == 1) # adaptation of number of window between each reading frame
+    {
+        seqisize1 <- seqisize + 1 # for fr1
+        seqisize2 <- seqisize # for fr2
+    }
+    if ((dim(recd$li)[1])%%3 == 2)
+    {
+        seqisize1 <- seqisize + 1
+        seqisize2 <- seqisize + 1
+    }
+    if ((dim(recd$li)[1])%%3 == 0)
+    {
+        seqisize1 <- seqisize
+        seqisize2 <- seqisize
+    }
+    ##
+    ##direct strand##
+    ##
+    if (direct)
+    { 
+        plot((sizewin/2) + (0:(seqisize1 - 1))*shift, recd$li[1:seqisize1, fac], type = "l", lty = 1,
+            col = col[1], xlim = xlim, ylim = c(min(recd$li[, fac]), max(recd$li[, fac])),
+            main = "Direct strand", xlab = "", ylab = "Factor scores", bty = 'l') # reading frame 1
+        lines((sizewin/2) + (0:(seqisize2 - 1))*shift + 1, recd$li[(seqisize1 + 1):(seqisize1 + seqisize2), fac],
+            lty = 2, col = col[2], ylab = "2") # reading frame 2
+        lines((sizewin/2) + (0:(seqisize - 1))*shift + 2, recd$li[(seqisize1 + seqisize2 + 1):(dim(recd$li)[1]), fac],
+            lty = 3, col = col[3], ylab = "3") # reading frame 3
+        legend("topleft", legend = c(paste("Sequence name:", seqname), paste("Sequence length:", seqsize, "bp")),
+            inset = c(-0.15, -0.2), bty = "n", xpd = TRUE)
+        vstopdindphase <- numeric()
+        if (length(vstopd) > 0)
+        { # test if vector is not empty because problem with modulo
+            vstopdindphase <- sapply(1:length(vstopd), function(x)
+            { # index vector of reading frame of vector vstopd
+                if (vstopd[x]%%3 == 1)
+                {
+                    vstopdindphase <- c(vstopdindphase, 2.5)
+                }
+                else
+                {
+                    if (vstopd[x]%%3 == 2)
+                    {
+                        vstopdindphase <- c(vstopdindphase, 1.5)
+                    }
+                    else
+                    {
+                        vstopdindphase <- c(vstopdindphase, 0.5)
+                    }
+                }
+            })
+        }
+        vinitdindphase <- numeric()
+        if (length(vinitd) > 0)
+        { # test if vector is not empty because problem with modulo
+            vinitdindphase <- sapply(1:length(vinitd), function(x)
+            { # index vector of reading frame of vector vinitd
+                if (vinitd[x]%%3 == 1)
+                {
+                    vinitdindphase <- c(vinitdindphase, 3)
+                }
+                else
+                {
+                    if (vinitd[x]%%3 == 2)
+                    {
+                        vinitdindphase <- c(vinitdindphase, 2)
+                    }
+                    else
+                    {
+                        vinitdindphase <- c(vinitdindphase, 1)
+                    }
+                }
+            })
+        }
+        par(mar = c(5, 4, 3, 2) + 0.1)
+        plot(vstopd, vstopdindphase, pch = 25, cex = 0.7, xlim = xlim, ylim = c(0.25, 3), axes = TRUE,
+            ann = TRUE, tcl = -0.5, bty = 'l', yaxt = 'n', xlab = "Start/Stop positions (bp)",
+            ylab = '', xpd = FALSE) # stop codons positions
+        points(vinitd, vinitdindphase, pch = 24, bg = "slategray", cex = 0.7, col = 'slategray') # start codons positions
+        abline(h = c(3.1, 2.4, 2.1, 1.4,  1.1, 0.4), col = c(col[1], col[1], col[2], col[2], col[3], col[3]),
+            lty = c(1, 1, 2, 2, 3, 3))
+        text(x = (xlim[1]-(xlim[2]-xlim[1])*0.75/6), pos = 4, y = c(2.75, 1.75, 0.75), labels = paste("Ph. ", c(0, 1, 2)), xpd = TRUE)
+    }
+    ##
+    ##reverse strand##
+    ##
+    if (!direct)
+    {    
+        plot((sizewin/2) + (0:(seqisize1 - 1))*shift, recr$li[1:seqisize1, fac], type = "l", lty = 1,
+            col = col[1], xlim = xlim, ylim = c(min(recr$li[, fac]), max(recr$li[, fac])),
+            main = "Reverse strand", xlab = "", ylab = "Factor scores", bty = 'l') # reading frame 1
+        lines((sizewin/2) + (0:(seqisize2-1))*shift + 1, recr$li[(seqisize1 + 1):(seqisize1 + seqisize2), fac],
+            lty = 2, col = col[2], ylab="2") # reading frame 2
+        lines((sizewin/2) + (0:(seqisize - 1))*shift + 2, recr$li[(seqisize1 + seqisize2 + 1):(dim(recr$li)[1]), fac],
+            lty = 3, col = col[3], ylab = "3") # reading frame 3
+        legend("topleft", legend = c(paste("Sequence name:", seqname), paste("Sequence length:", seqsize, "bp")),
+            inset = c(-0.15, -0.2), bty = "n", xpd = TRUE)
+        vstoprindphase <- numeric()
+        if (length(vstopr) > 0)
+        { # test if vector is not empty because problem with modulo
+            vstoprindphase <- sapply(1:length(vstopr), function(x)
+            { # index vector of reading frame of vector vstopr
+                if (vstopr[x]%%3 == 1)
+                {
+                    vstoprindphase <- c(vstoprindphase, 2.5)
+                }
+                else
+                {
+                    if (vstopr[x]%%3 == 2)
+                    {
+                        vstoprindphase <- c(vstoprindphase, 1.5)
+                    }
+                    else
+                    {
+                        vstoprindphase <- c(vstoprindphase, 0.5)
+                    }
+                }
+            })
+        }
+        vinitrindphase <- numeric()
+        if (length(vinitr) > 0)
+        { # test if vector is not empty because problem with modulo
+            vinitrindphase <- sapply(1:length(vinitr), function(x)
+            { # index vector of reading frame of vector vinitr
+                if (vinitr[x]%%3 == 1)
+                {
+                    vinitrindphase <- c(vinitrindphase, 3)
+                }
+                else
+                {
+                    if (vinitr[x]%%3 == 2)
+                    {
+                        vinitrindphase <- c(vinitrindphase, 2)
+                    }
+                    else
+                    {
+                        vinitrindphase <- c(vinitrindphase, 1)
+                    }
+                }
+            })
+        }
+        par(mar = c(5, 4, 3, 2) + 0.1)
+        plot(vstopr, vstoprindphase, pch = 25, cex = 0.7, xlim = xlim, ylim = c(0.25, 3), axes = TRUE,
+            ann = TRUE, tcl = -0.5, bty = 'l', yaxt = 'n', xlab = "Start/Stop positions (bp)",
+            ylab = '', xpd = FALSE) # stop codons positions
+        points(vinitr, vinitrindphase, pch = 24, bg = "slategray", cex = 0.7, col = 'slategray') # start codons positions
+        abline(h = c(3.1, 2.4, 2.1, 1.4,  1.1, 0.4), col = c(col[1], col[1], col[2], col[2], col[3], col[3]),
+            lty = c(1, 1, 2, 2, 3, 3))
+        text(x = (xlim[1]-(xlim[2]-xlim[1])*0.75/6), pos = 4, y = c(2.75, 1.75, 0.75), labels = paste("Ph. ", c(0, 1, 2)), xpd = TRUE)
+    }
+}
\ No newline at end of file


Property changes on: pkg/R/draw.recstat.R
___________________________________________________________________
Added: svn:executable
   + *

Added: pkg/R/recstat.R
===================================================================
--- pkg/R/recstat.R	                        (rev 0)
+++ pkg/R/recstat.R	2011-09-20 12:59:55 UTC (rev 1761)
@@ -0,0 +1,65 @@
+##
+# This function counts the number of each triplet in a sliding window,
+# create a contingency table with triplet composition and then computes
+# a correspondence analysis on this table
+##
+#v.18.08.2011
+recstat <- function(seq, sizewin = 90, shift = 30, seqname = "no name")
+{
+    if (is.character(seq) == FALSE)
+    { # class character if only one sequence, class list if more
+        print("This file has more than one sequence or class is not character.")
+        return()
+    }
+    if ((shift%%3) != 0)
+    { # test if shift give the same reading frame
+        print("The windows are not in the same reading frame, please change shift value to a multiple of 3.")
+        return()
+    }
+    if (sizewin%%3 != 0) 
+    { 
+        print("The length of the window is not a multiple of 3, please change sizewin value.")
+        return()
+    }
+    seqsize <- length(seq) # give the number of 1-mer in the sequence
+    v1 <- seq(from = 1, to = seqsize, by = shift) # start vector of window in reading frame 1
+    v1 <- v1[1:(which((v1 + sizewin) > seqsize)[1])] # suppression if more than one incomplete window
+    v2 <- seq(from = 2, to = seqsize, by = shift)
+    v2 <- v2[1:(which((v2 + sizewin)>seqsize)[1])]
+    v3 <- seq(from = 3, to = seqsize, by = shift)
+    v3 <- v3[1:(which((v3 + sizewin) > seqsize)[1])]
+    vdep <- c(v1, v2, v3) # start vector in the 3 reading frames of direct/reverse strand
+    vind <- c(rep(1, length(v1)), rep(2, length(v2)), rep(3, length(v3))) # index vector of reading frame for each window
+    ##
+    ##direct strand##
+    ##
+    cseq <- c2s(seq)
+    vstopd <- c(words.pos("taa", cseq), words.pos("tag", cseq), words.pos("tga", cseq)) # vector of stop codons positions in direct strand
+    vinitd <- c(words.pos("atg", cseq)) # vector of start codons positions in direct strand
+    resd <- lapply(1:length(vdep), function(x)
+    { # calculation on 3 reading frames of direct strand
+        seq_tmp <- seq[(vdep[x]):(vdep[x] + sizewin - 1)] # temporary window
+        count(seq_tmp, wordsize = 3, start = 0, by = 3) # counting of triplets
+    })
+    ##
+    ##reverse strand##
+    ##
+    seq_reverse <- rev(comp(seq, ambiguous = TRUE)) # creation of reverse strand
+    cseq_reverse <- c2s(seq_reverse)
+    vstopr <- c(words.pos("taa", cseq_reverse), words.pos("tag", cseq_reverse), words.pos("tga", cseq_reverse)) # vector of stop codons positions in reverse strand
+    vinitr <- c(words.pos("atg", cseq_reverse)) # vector of init codons positions in reverse strand    
+    resr <- lapply(1:length(vdep), function(x)
+    { # calculation on 3 reading frames of reverse strand
+        seq_tmp <- seq_reverse[(vdep[x]):(vdep[x] + sizewin - 1)] # temporary window
+        count(seq_tmp, wordsize = 3, start = 0, by = 3) # counting of triplets
+    })
+    resd <- matrix(unlist(resd), byrow = TRUE, ncol = 64) # conversion vector to contingency table
+    resr <- matrix(unlist(resr), byrow = TRUE, ncol = 64)
+    ##
+    ##CA##
+    ##
+    resd.coa <- dudi.coa(resd, scannf = FALSE, nf = 4) # CA on direct strand
+    resr.coa <- dudi.coa(resr, scannf = FALSE, nf = 4) # CA on reverse strand    
+    rec <- list(seq, sizewin, shift, seqsize, seqname, vdep, vind, vstopd, vstopr, vinitd, vinitr, resd, resr, resd.coa, resr.coa)    
+    return(rec)
+}


Property changes on: pkg/R/recstat.R
___________________________________________________________________
Added: svn:executable
   + *

Added: pkg/R/test.co.recstat.R
===================================================================
--- pkg/R/test.co.recstat.R	                        (rev 0)
+++ pkg/R/test.co.recstat.R	2011-09-20 12:59:55 UTC (rev 1761)
@@ -0,0 +1,205 @@
+##
+# This function tests if a region located between two stop codons could be a putative CDS
+#
+# Data used are the factor scores of the CA computed on the windows by recstat function
+##
+#v.18.08.2011
+test.co.recstat <- function(rec, fac = 1, length.min = 150, stop.max = 0.2, win.lim = 0.8, direct = TRUE, level = 0.01) 
+{
+    if (fac < 0 | 4 < fac) 
+	{ # test if factor is between 1 and 4
+        print("Factor number is not in 1:4.")
+        return()
+    }
+    seq <- rec[[1]] # recovery of elements of list n
+    sizewin <- rec[[2]]
+    shift <- rec[[3]]
+    seqsize <- rec[[4]]
+    vstopd <- rec[[8]]
+    vstopr <- rec[[9]]
+    resd <- rec[[12]]
+    resr <- rec[[13]]
+    recd <- rec[[14]]
+    recr <- rec[[15]]
+	if (seqsize < length.min)
+	{
+		print("Seqence length is shorter than minimum distance between two Stop codons.")
+		return()
+	}
+    table.recstat <- function(vstop) 
+	{
+        tabCDS <- numeric() # initialization
+        j <- 0
+        for (i in 2:length(vstop)) 
+		{ # for each stop codons positions vector
+            if ((vstop[i] - vstop[i - 1]) > length.min) 
+			{ # test if space between codons is above the threshold
+                # in each case gets the p-values between each stop codon and range it in vector seg
+                seg <- pvalvec[which((vstop[i - 1] - pvalvec[, 2])/(sizewin + 2) <= stop.max &
+                    (vstop[i] - pvalvec[, 2])/(sizewin + 2) >= (1 - stop.max)), 1]
+                # create a table with calculation on those vectors seg then go to next space inter-codon, each row correspond to a space inter-stop codon
+                k <- 0
+                for (l in 1:length(seg)) 
+				{
+                	if (seg[l] < level) 
+					{
+                		k <- k + 1
+                	}     
+                	if (k/length(seg) > win.lim) 
+					{
+                    	result <- 1
+                	} 
+					else 
+					{
+                    	result <- 0
+                	}
+                }
+                tabCDS <- c(tabCDS, vstop[i - 1]+3, vstop[i]+2, result)
+                j <- j + 1
+            }
+        }
+        tabCDS <- matrix(tabCDS, nrow = j, ncol = 3, byrow = TRUE) # conversion list to table
+        return(tabCDS)
+    }
+    ##
+    ##direct strand##
+    ##
+    if (direct) 
+	{ 
+    	vstopdindphase <- numeric()
+        if (length(vstopd) > 0) 
+		{ # test if vector is not empty because problem with modulo
+            vstopdindphase <- sapply(1:length(vstopd), function(x)
+            { # index vector of reading frame of vector vstopd
+                if (vstopd[x]%%3 == 1) 
+				{
+                    vstopdindphase <- c(vstopdindphase, 1)
+                } 
+				else 
+				{
+                    if (vstopd[x]%%3 == 2) 
+					{
+                        vstopdindphase <- c(vstopdindphase, 2)
+                    } 
+					else 
+					{
+                        vstopdindphase <- c(vstopdindphase, 3)
+                    }
+                }
+            })
+        }
+        vstop1 <- vstopd[vstopdindphase == 1] # vector with only stop codons in reading frame 1
+        vstop2 <- vstopd[vstopdindphase == 2] # vector with only stop codons in reading frame 2
+        vstop3 <- vstopd[vstopdindphase == 3] # vector with only stop codons in reading frame 3
+        vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions, "-3" and "-2" because of table.recstat()
+        vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+        vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+        vstop1 <- sort(unique(vstop1)) # sort of the vector
+        vstop2 <- sort(unique(vstop2))
+        vstop3 <- sort(unique(vstop3))
+    	tab <- numeric()
+    	for (i in 1:dim(resd)[1]) 
+		{
+    		if (sum(resd[i, ]) == sizewin/3) 
+			{
+    			for (j in 1:64) 
+				{
+    				tab <- c(tab, rep(recd$co[j, 1], resd[i, j]))
+    			}
+    		} 
+    	}
+    	tab <- matrix(unlist(tab), byrow = TRUE, ncol = sizewin/3)
+    	seqisize <- floor((dim(tab)[1])/3)
+    	phase <- c(rep(1, sizewin/3), rep(2, sizewin/3), rep(3, sizewin/3))
+    	phase <- as.factor(phase)
+    	pvalvec <- numeric()
+    	for (i in 1:seqisize) 
+		{
+    		v1 <- tab[i,]
+    		v2 <- tab[seqisize + i,]
+    		v3 <- tab[2*seqisize + i,]
+    		v <- c(v1, v2, v3)
+    		x <- kruskal.test(v ~ phase)$p.value
+    		pvalvec <- c(pvalvec, x, (i - 1)*shift + 1)
+    	}
+    	pvalvec <- matrix(unlist(pvalvec), byrow = TRUE, ncol = 2)
+#    	plot((sizewin/2) + (0:(seqisize - 1))*shift + 1, pvalvec[,1], type = "l")
+    	tab1 <- table.recstat(vstop1)
+        colnames(tab1) <- c("Start", "End", "CDS") # definition of table headings
+        tab2 <- table.recstat(vstop2)
+        colnames(tab2) <- c("Start", "End", "CDS")
+        tab3 <- table.recstat(vstop3)
+        colnames(tab3) <- c("Start", "End", "CDS")
+        return(list(tab1, tab2, tab3))
+	}
+	##
+    ##reverse strand##
+    ##
+    if (!direct) 
+	{ 
+    	vstoprindphase <- numeric()
+        if (length(vstopr) > 0) 
+		{ 
+            vstoprindphase <- sapply(1:length(vstopr), function(x)
+            { 
+                if (vstopr[x]%%3 == 1) 
+				{
+                    vstoprindphase <- c(vstoprindphase, 1)
+                } 
+				else 
+				{
+                    if (vstopr[x]%%3 == 2) 
+					{
+                        vstoprindphase <- c(vstoprindphase, 2)
+                    } 
+					else 
+					{
+                        vstoprindphase <- c(vstoprindphase, 3)
+                    }
+                }
+            })
+        }
+        vstop1 <- vstopr[vstoprindphase == 1] 
+        vstop2 <- vstopr[vstoprindphase == 2] 
+        vstop3 <- vstopr[vstoprindphase == 3] 
+        vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions
+        vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+        vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+        vstop1 <- sort(unique(vstop1)) 
+        vstop2 <- sort(unique(vstop2))
+        vstop3 <- sort(unique(vstop3))
+        tab <- numeric()
+    	for (i in 1:dim(resr)[1]) 
+		{
+    		if (sum(resr[i, ]) == sizewin/3) 
+			{
+    			for (j in 1:64) 
+				{
+    				tab <- c(tab, rep(recr$co[j, 1], resr[i, j]))
+    			}
+    		} 
+    	}
+    	tab <- matrix(unlist(tab), byrow = TRUE, ncol = sizewin/3)
+    	seqisize <- floor((dim(tab)[1])/3)
+    	phase <- c(rep(1, sizewin/3), rep(2, sizewin/3), rep(3, sizewin/3))
+    	phase <- as.factor(phase)
+    	pvalvec <- numeric()
+    	for (i in 1:seqisize) 
+		{
+    		v1 <- tab[i,]
+    		v2 <- tab[seqisize + i,]
+    		v3 <- tab[2*seqisize + i,]
+    		v <- c(v1, v2, v3)
+    		x <- kruskal.test(v ~ phase)$p.value
+    		pvalvec <- c(pvalvec, x, (i - 1)*shift + 1)
+    	}
+    	pvalvec <- matrix(unlist(pvalvec), byrow = TRUE, ncol = 2)
+		tab1 <- table.recstat(vstop1)
+        colnames(tab1) <- c("Start", "End", "CDS") # definition of table headings
+        tab2 <- table.recstat(vstop2)
+        colnames(tab2) <- c("Start", "End", "CDS")
+        tab3 <- table.recstat(vstop3)
+        colnames(tab3) <- c("Start", "End", "CDS")
+        return(list(tab1, tab2, tab3))
+    }
+}
\ No newline at end of file


Property changes on: pkg/R/test.co.recstat.R
___________________________________________________________________
Added: svn:executable
   + *

Added: pkg/R/test.li.recstat.R
===================================================================
--- pkg/R/test.li.recstat.R	                        (rev 0)
+++ pkg/R/test.li.recstat.R	2011-09-20 12:59:55 UTC (rev 1761)
@@ -0,0 +1,180 @@
+##
+# This function tests if a region located between two stop codons could be a putative CDS
+#
+# Data used are the factor scores of the CA computed on the windows by recstat function
+##
+#v.18.08.2011
+test.li.recstat <- function(rec, fac = 1, length.min = 150, stop.max = 0.2, direct = TRUE, level = 0.05)
+{
+    if (fac < 0 | 4 < fac)
+    { # test if factor is between 1 and 4
+        print("Factor number is not in 1:4.")
+        return()
+    }
+    seq <- rec[[1]] # recovery of elements of list n
+    sizewin <- rec[[2]]
+    shift <- rec[[3]]
+    seqsize <- rec[[4]]
+    vdep <- rec[[6]]
+    vind <- rec[[7]]
+    vstopd <- rec[[8]]
+    vstopr <- rec[[9]]
+    recd <- rec[[14]]
+    recr <- rec[[15]]
+    if (seqsize < length.min)
+    {
+        print("Seqence length is shorter than minimum distance between two Stop codons.")
+        return()
+    }
+    table.recstat <- function(vstop, rec, frame)
+    {
+        tabCDS <- numeric() # initialization
+        j <- 0
+        for (i in 2:length(vstop))
+        { # for each stop codons positions vector
+            if ((vstop[i] - vstop[i - 1]) > length.min)
+            { # test if space between codons is above the threshold
+                # in each case gets the values between each stop codon for the 3 reading frames and range it in 3 vector seg
+                seg1 <- rec$li[which((vstop[i - 1] - vdep[1:seqisize1])/sizewin <= stop.max &
+                    (vstop[i] - vdep[1:seqisize1])/sizewin >= (1 - stop.max)), fac]
+                seg2 <- rec$li[(which((vstop[i - 1] - vdep[(seqisize1 + 1):(seqisize1 + seqisize2)])/sizewin <= stop.max
+                    & (vstop[i] - vdep[(seqisize1 + 1):(seqisize1 + seqisize2)])/sizewin >= (1 - stop.max)) + seqisize1), fac]
+                seg3 <- rec$li[(which((vstop[i - 1] - vdep[(seqisize1 + seqisize2 + 1):(length(vdep))])/sizewin <= stop.max
+                    & (vstop[i] - vdep[(seqisize1 + seqisize2 + 1):(length(vdep))])/sizewin >= (1 - stop.max)) + seqisize1 + seqisize2), fac]
+                # create a table with calculation on those vectors seg then go to next space
+                # inter-codon, each row correspond to a space inter-stop codon
+                if (frame == 1)
+                {
+                    test1 <- t.test(seg1, seg2)$p.value
+                    test2 <- t.test(seg1, seg3)$p.value
+                }
+                if (frame == 2)
+                {
+                    test1 <- t.test(seg2, seg1)$p.value
+                    test2 <- t.test(seg2, seg3)$p.value
+                }
+                if (frame == 3)
+                {
+                    test1 <- t.test(seg3, seg1)$p.value
+                    test2 <- t.test(seg3, seg2)$p.value
+                }
+                if (test1 < level & test2 < level)
+                {
+                    result <- 1
+                }
+                else
+                {
+                    result <- 0
+                }
+                tabCDS <- c(tabCDS, vstop[i-1]+3, vstop[i]+2, mean(seg1), mean(seg2), mean(seg3), test1, test2, result)
+                j <- j + 1
+            }
+        }
+        tabCDS <- matrix(tabCDS, nrow = j, ncol = 8, byrow = TRUE) # conversion list to table
+        return(tabCDS)
+    }
+    seqisize <- floor((dim(recd$li)[1])/3) # number of window by reading frame, we take the integer part
+    if ((dim(recd$li)[1])%%3 == 1) # adaptation of number of window between each reading frame
+    {
+        seqisize1 <- seqisize + 1 # for fr1
+        seqisize2 <- seqisize # for fr2
+    }
+    if ((dim(recd$li)[1])%%3 == 2)
+    {
+        seqisize1 <- seqisize + 1
+        seqisize2 <- seqisize + 1
+    }
+    if ((dim(recd$li)[1])%%3 == 0)
+    {
+        seqisize1 <- seqisize
+        seqisize2 <- seqisize
+    }
+    ##
+    ##direct strand##
+    ##
+    if (direct)
+    { 
+        vstopdindphase <- numeric()
+        if (length(vstopd) > 0)
+        { # test if vector is not empty because problem with modulo
+            vstopdindphase <- sapply(1:length(vstopd), function(x) { # index vector of reading frame of vector vstopd
+                if (vstopd[x]%%3 == 1)
+                {
+                    vstopdindphase <- c(vstopdindphase, 1)
+                }
+                else
+                {
+                    if (vstopd[x]%%3 == 2)
+                    {
+                        vstopdindphase <- c(vstopdindphase, 2)
+                    }
+                    else
+                    {
+                        vstopdindphase <- c(vstopdindphase, 3)
+                    }
+                }
+            })
+        }
+        vstop1 <- vstopd[vstopdindphase == 1] # vector with only stop codons in reading frame 1
+        vstop2 <- vstopd[vstopdindphase == 2] # vector with only stop codons in reading frame 2
+        vstop3 <- vstopd[vstopdindphase == 3] # vector with only stop codons in reading frame 3
+        vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions, "-3" and "-2" because of table.recstat()
+        vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+        vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+        vstop1 <- sort(unique(vstop1)) # sort of the vector
+        vstop2 <- sort(unique(vstop2))
+        vstop3 <- sort(unique(vstop3))
+        tab1 <- table.recstat(vstop1, recd, 1)
+        colnames(tab1) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(1,2)", "t(1,3)", "CDS")
+        tab2 <- table.recstat(vstop2, recd, 2)
+        colnames(tab2) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(2,1)", "t(2,3)", "CDS")
+        tab3 <- table.recstat(vstop3, recd, 3)
+        colnames(tab3) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(3,1)", "t(3,2)", "CDS")
+        return(list(tab1, tab2, tab3))
+    }
+    ##
+    ##reverse strand##
+    ##
+    if (!direct)
+    { 
+        vstoprindphase <- numeric()
+        if (length(vstopr) > 0)
+        { 
+            vstoprindphase <- sapply(1:length(vstopr), function(x)
+            { 
+                if (vstopr[x]%%3 == 1)
+                {
+                    vstoprindphase <- c(vstoprindphase, 1)
+                }
+                else
+                {
+                    if (vstopr[x]%%3 == 2)
+                    {
+                        vstoprindphase <- c(vstoprindphase, 2)
+                    }
+                    else
+                    {
+                        vstoprindphase <- c(vstoprindphase, 3)
+                    }
+                }
+            })
+        }
+        vstop1 <- vstopr[vstoprindphase == 1] 
+        vstop2 <- vstopr[vstoprindphase == 2] 
+        vstop3 <- vstopr[vstoprindphase == 3] 
+        vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions
+        vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+        vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+        vstop1 <- sort(unique(vstop1)) 
+        vstop2 <- sort(unique(vstop2))
+        vstop3 <- sort(unique(vstop3))
+        tab1 <- table.recstat(vstop1, recr, 1)
+        colnames(tab1) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(1,2)", "t(1,3)", "CDS")
+        tab2 <- table.recstat(vstop2, recr, 2)
+        colnames(tab2) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(2,1)", "t(2,3)", "CDS")
+        tab3 <- table.recstat(vstop3, recr, 3)
+        colnames(tab3) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(3,1)", "t(3,2)", "CDS")
+        return(list(tab1, tab2, tab3))
+    }
+}
+


Property changes on: pkg/R/test.li.recstat.R
___________________________________________________________________
Added: svn:executable
   + *

Added: pkg/inst/sequences/ECOUNC.fsa
===================================================================
--- pkg/inst/sequences/ECOUNC.fsa	                        (rev 0)
+++ pkg/inst/sequences/ECOUNC.fsa	2011-09-20 12:59:55 UTC (rev 1761)
@@ -0,0 +1,133 @@
+>ECOUNC
+aaagcaaataaaatttaatttttatcaaaaaaatcataaaaaattgaccggttagactgt
+taacaacaaccaggttttctactgatataactggttacatttaacgccacgttcactctt
+ttgcatcaacaagataacgtggctttttttggtaagcagaaaataagtcattagtgaaaa
+tatcagtctgctaaaaatcggcgctaagaaccatcattggctgttaaaacattattaaaa
+atgtcaatgggtggtttttgttgtgtaaatgtcatttattaaaacagtatctgtttttag
+actgaaatatcataaacttgcaaaggcatcatttgccaagtaaataaatatgctgtgcgc
+gaacatgcgcaatatgtgatctgaagcacgctttatcaccagtgtttacgcgttatttac
+agtttttcatgatcgaacagggttagcagaaaagtcgcaattgtatgcactggaaaaata
+tttaaacatttattcaccttttggctacttattgtttgaaatcacgggggcgcaccgtat
+aatttgaccgctttttgatgcttgactctaagccttaaagaaagttttatacgacacgcg
+gcatacctcgaagggagcaggagtgaaaaacgtgatgtctgtgtcgctcgtgagtcgaaa
+cgttgctcggaagcttctgctcgttcagttactggtggtgatagcaagtggattgctgtt
+cagcctcaaagaccccttctggggcgtctctgcaataagcgggggcctggcagtctttct
+gcctaacgttttgtttatgatatttgcctggcgtcaccaggcgcatacaccagcgaaagg
+ccgggtggcctggacattcgcatttggcgaagctttcaaagttctggcgatgttggtgtt
+actggtggtggcgttggcggttttaaaggcggtattcttgccgctgatcgttacgtgggt
+tttggtgctggtggttcagatactggcaccggctgtaattaacaacaaagggtaaaaggc
+atcatggcttcagaaaatatgacgccgcaggattacataggacaccacctgaataacctt
+cagctggacctgcgtacattctcgctggtggatccacaaaaccccccagccaccttctgg
+acaatcaatattgactccatgttcttctcggtggtgctgggtctgttgttcctggtttta
+ttccgtagcgtagccaaaaaggcgaccagcggtgtgccaggtaagtttcagaccgcgatt
+gagctggtgatcggctttgttaatggtagcgtgaaagacatgtaccatggcaaaagcaag
+ctgattgctccgctggccctgacgatcttcgtctgggtattcctgatgaacctgatggat
+ttactgcctatcgacctgctgccgtacattgctgaacatgtactgggtctgcctgcactg
+cgtgtggttccgtctgcggacgtgaacgtaacgctgtctatggcactgggcgtatttatc
+ctgattctgttctacagcatcaaaatgaaaggcatcggcggcttcacgaaagagttgacg
+ctgcagccgttcaatcactgggcgttcattcctgtcaacttaatccttgaaggggtaagc
+ctgctgtccaaaccagtttcactcggtttgcgactgttcggtaacatgtatgccggtgag
+ctgattttcattctgattgctggtctgttgccgtggtggtcacagtggatcctgaatgtg
+ccgtgggccattttccacatcctgatcattacgctgcaagccttcatcttcatggttctg
+acgatcgtctatctgtcgatggcgtctgaagaacattaatttaccaacactactacgttt
+taactgaaacaaactggagactgtcatggaaaacctgaatatggatctgctgtacatggc
+tgccgctgtgatgatgggtctggcggcaatcggtgctgcgatcggtatcggcatcctcgg
+gggtaaattcctggaaggcgcagcgcgtcaacctgatctgattcctctgctgcgtactca
+gttctttatcgttatgggtctggtggatgctatcccgatgatcgctgtaggtctgggtct
+gtacgtgatgttcgctgtcgcgtagtaagcgttgcttttatttaaagagcaatatcagaa
+cgttaactaaatagaggcattgtgctgtgaatcttaacgcaacaatcctcggccaggcca
+tcgcgtttgtcctgttcgttctgttctgcatgaagtacgtatggccgccattaatggcag
+ccatcgaaaaacgtcaaaaagaaattgctgacggccttgcttccgcagaacgagcacata
+aggaccttgaccttgcaaaggccagcgcgaccgaccagctgaaaaaagcgaaagcggaag
+cccaggtaatcatcgagcaggcgaacaaacgccgctcgcagattctggacgaagcgaaag
+ctgaggcagaacaggaacgtactaaaatcgtggcccaggcgcaggcggaaattgaagccg
+agcgtaaacgtgcccgtgaagagctgcgtaagcaagttgctatcctggctgttgctggcg
+ccgagaagatcatcgaacgttccgtggatgaagctgctaacagcgacatcgtggataaac
+ttgtcgctgaactgtaaggagggaggggctgatgtctgaatttattacggtagctcgccc
+ctacgccaaagcagcttttgactttgccgtcgaacaccaaagtgtagaacgctggcagga
+catgctggcgtttgccgccgaggtaaccaaaaacgaacaaatggcagagcttctctctgg
+cgcgcttgcgccagaaacgctcgccgagtcgtttatcgcagtttgtggtgagcaactgga
+cgaaaacggtcagaacctgattcgggttatggctgaaaatggtcgtcttaacgcgctccc
+ggatgttctggagcagtttattcacctgcgtgccgtgagtgaggctaccgctgaggtaga
+cgtcatttccgctgccgcactgagtgaacaacagctcgcgaaaatttctgctgcgatgga
+aaaacgtctgtcacgcaaagttaagctgaattgcaaaatcgataagtctgtaatggcagg
+cgttatcatccgagcgggtgatatggtcattgatggcagcgtacgcggtcgtcttgagcg
+ccttgcagacgtcttgcagtcttaaggggactggagcatgcaactgaattccaccgaaat
+cagcgaactgatcaagcagcgcattgctcagttcaatgttgtgagtgaagctcacaacga
+aggtactattgtttctgtaagtgacggtgttatccgcattcacggcctggccgattgtat
+gcagggtgaaatgatctccctgccgggtaaccgttacgctatcgcactgaacctcgagcg
+cgactctgtaggtgcggttgttatgggtccgtacgctgaccttgccgaaggcatgaaagt
+taagtgtactggccgtatcctggaagttccggttggccgtggcctgctgggccgtgtggt
+taacactctgggtgcaccaatcgacggtaaaggtccgctggatcacgacggcttctctgc
+tgtagaagcaatcgctccgggcgttatcgaacgtcagtccgtagatcagccggtacagac
+cggttataaagccgttgactccatgatcccaatcggtcgtggtcagcgtgaattgatcat
+cggtgaccgtcagacaggtaaaaccgcactggctatcgatgccatcatcaaccagcgcga
+ttccggtatcaaatgtatctatgtcgctatcggccagaaagcgtccaccatttctaacgt
+ggtacgtaaactggaagagcacggcgcactggctaacaccatcgttgtggtagcaaccgc
+gtctgaatccgctgcactgcaatacctggcacgtatgccggttgcgctaatgggcgaata
+cttccgtgaccgcggtgaagatgcgctgatcatttacgatgacctgtctaaacaggctgt
+tgcttaccgtcagatctccctgctgctccgtcgtccgccaggacgtgaagcattcccggg
+cgacgttttctacctccactctcgtctgctggagcgtgctgcacgtgttaacgccgaata
+cgttgaagccttcaccaaaggtgaagtgaaagggaaaaccggttctctgaccgcactgcc
+gattatcgaaactcaggcgggtgacgtttctgcgttcgttccgaccaacgtaatctccat
+taccgatggtcagatcttcctggaaaccaacctgttcaacgccggtattcgtcctgcggt
+taacccgggtatttccgtatcccgtgttggtggtgcagcacagaccaagatcatgaaaaa
+actgtccggtggtatccgtaccgctctggcacagtatcgtgaactggcagcgttctctca
+gtttgcatccgaccttgacgatgcaacacgtaaccagcttgaccacggtcagaaagtgac
+cgaactgctgaaacagaaacagtatgcgccgatgtccgttgcgcagcagtctctggttct
+gttcgcagcagaacgtggttacctggcggatgttgaactgtcgaaaattggcagcttcga
+agccgctctgctggcttacgtcgaccgtgatcacgctccgttgatgcaagagatcaacca
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/seqinr -r 1761


More information about the Seqinr-commits mailing list