[adegenet-commits] r667 - in pkg: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Sep 16 13:29:28 CEST 2010


Author: jombart
Date: 2010-09-16 13:29:28 +0200 (Thu, 16 Sep 2010)
New Revision: 667

Modified:
   pkg/R/sequences.R
   pkg/man/sequences.Rd
Log:
Added a procedure to import proteic sequences (alignments from seqinr).


Modified: pkg/R/sequences.R
===================================================================
--- pkg/R/sequences.R	2010-09-08 17:16:37 UTC (rev 666)
+++ pkg/R/sequences.R	2010-09-16 11:29:28 UTC (rev 667)
@@ -60,6 +60,7 @@
     }
 
     toKeep <- apply(x, 2, isPoly)
+    if(sum(toKeep)==0) stop("No polymorphic site detected")
     x <- x[,toKeep]
 
     ## build output
@@ -75,6 +76,97 @@
 
 
 
+################
+# alignment2genind
+################
+alignment2genind <- function(x, pop=NULL, exp.char=c("a","t","g","c"), na.char="-", polyThres=1/100){
+
+    ## misc checks
+    if(!inherits(x,"alignment")) stop("x is not a alignment object")
+    if(!require(seqinr)) stop("The package seqinr is required.")
+
+
+    ## convert alignment to matrix of characters
+    mat <- sapply(x$seq, s2c, USE.NAMES=FALSE)
+    if(nrow(mat)!=x$nb){
+        mat <- t(mat)
+    }
+
+    rownames(mat) <- x$nam
+
+    if(is.null(colnames(x))) {
+        colnames(mat) <- 1:ncol(mat)
+    }
+
+    ## replace NAs
+    if(is.null(na.char)){
+        if(is.null(exp.char)) stop("both exp.char and na.char are NULL")
+        temp <- paste(exp.char, collapse="", sep="")
+        if(any(exp.char=="-")) {
+            temp <- paste("-",temp, sep="") # string '-' must begin the regexp
+        }
+        temp <- paste("[^", temp, "]", sep="") # anything but the expected is NA
+        mat <- gsub(temp,NA,mat)
+    } else {
+        temp <- paste(na.char, collapse="", sep="")
+        if(any(na.char=="-")) {
+            temp <- paste("-",temp, sep="") # string '-' must start the regexp
+        }
+        temp <- paste("[", temp, "]", sep="")
+        mat <- gsub(temp,NA,mat)
+    }
+
+    ## keep only columns with polymorphism (i.e., SNPs)
+    isPoly <- function(vec){
+        N <- sum(!is.na(vec)) # N: number of sequences
+        temp <- table(vec)/N
+        if(sum(temp > polyThres) >= 2) return(TRUE)
+        return(FALSE)
+    }
+
+    toKeep <- apply(mat, 2, isPoly)
+    if(sum(toKeep)==0) stop("No polymorphic site detected")
+
+    mat <- mat[,toKeep, drop=FALSE]
+
+    ## build output
+    res <- df2genind(mat, pop=pop, ploidy=1, ncode=1, type="codom")
+    res$call <- match.call()
+
+    if(!is.null(x$com)){
+        res at other$com <- x$com
+    }
+
+    return(res)
+} # end alignment2genind
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 ## ###############
 ## ## transiProb
 ## ###############

Modified: pkg/man/sequences.Rd
===================================================================
--- pkg/man/sequences.Rd	2010-09-08 17:16:37 UTC (rev 666)
+++ pkg/man/sequences.Rd	2010-09-16 11:29:28 UTC (rev 667)
@@ -1,6 +1,7 @@
 \encoding{UTF-8}
 \name{SequencesToGenind}
 \alias{DNAbin2genind}
+\alias{alignment2genind}
 \title{ Importing data from an alignement of sequences to a genind object}
 \description{
   These functions take an alignement of sequences and translate SNPs
@@ -9,11 +10,14 @@
 
   Currently, accepted sequence formats are:\cr
   - DNAbin (ape package): function DNAbin2genind\cr
-  - alignement (seqinr package): to come...
+  - alignment (seqinr package): function alignment2genind\cr
 }
 \usage{
 DNAbin2genind(x, pop=NULL, exp.char=c("a","t","g","c"), na.char=NULL,
                          polyThres=1/100)
+
+alignment2genind(x, pop=NULL, exp.char=c("a","t","g","c"), na.char="-", polyThres=1/100)
+
 }
 \arguments{
  \item{x}{an object containing aligned sequences.}
@@ -30,7 +34,7 @@
 
 \seealso{\code{\link{import2genind}}, \code{\link{read.genetix}},
   \code{\link{read.fstat}}, \code{\link{read.structure}},
-  \code{\link{read.genepop}}, \code{\link[ape]{DNAbin}}.
+  \code{\link{read.genepop}}, \code{\link[ape]{DNAbin}}, \code{\link[seqinr]{as.alignment}}.
 }
 \author{Thibaut Jombart \email{t.jombart at imperial.ac.uk} }
 \examples{
@@ -40,5 +44,25 @@
 x
 genind2df(x)
 }
+
+if(require(seqinr)){
+mase.res   <- read.alignment(file = system.file("sequences/test.mase",package = "seqinr"), format = "mase")
+mase.res
+x <- alignment2genind(mase.res)
+x
+locNames(x) # list of polymorphic sites
+genind2df(x)
+
+## look at Euclidean distances
+D <- dist(truenames(x))
+D
+
+if(require(ade4)){
+## summarise with a PCoA
+pco1 <- dudi.pco(D, scannf=FALSE,nf=2)
+scatter(pco1, posi="bottomright")
+title("Principal Coordinate Analysis\n-based on proteic distances-")
 }
+}
+}
 \keyword{manip}



More information about the adegenet-commits mailing list