[adegenet-commits] r667 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Sep 16 13:29:28 CEST 2010
Author: jombart
Date: 2010-09-16 13:29:28 +0200 (Thu, 16 Sep 2010)
New Revision: 667
Modified:
pkg/R/sequences.R
pkg/man/sequences.Rd
Log:
Added a procedure to import proteic sequences (alignments from seqinr).
Modified: pkg/R/sequences.R
===================================================================
--- pkg/R/sequences.R 2010-09-08 17:16:37 UTC (rev 666)
+++ pkg/R/sequences.R 2010-09-16 11:29:28 UTC (rev 667)
@@ -60,6 +60,7 @@
}
toKeep <- apply(x, 2, isPoly)
+ if(sum(toKeep)==0) stop("No polymorphic site detected")
x <- x[,toKeep]
## build output
@@ -75,6 +76,97 @@
+################
+# alignment2genind
+################
+alignment2genind <- function(x, pop=NULL, exp.char=c("a","t","g","c"), na.char="-", polyThres=1/100){
+
+ ## misc checks
+ if(!inherits(x,"alignment")) stop("x is not a alignment object")
+ if(!require(seqinr)) stop("The package seqinr is required.")
+
+
+ ## convert alignment to matrix of characters
+ mat <- sapply(x$seq, s2c, USE.NAMES=FALSE)
+ if(nrow(mat)!=x$nb){
+ mat <- t(mat)
+ }
+
+ rownames(mat) <- x$nam
+
+ if(is.null(colnames(x))) {
+ colnames(mat) <- 1:ncol(mat)
+ }
+
+ ## replace NAs
+ if(is.null(na.char)){
+ if(is.null(exp.char)) stop("both exp.char and na.char are NULL")
+ temp <- paste(exp.char, collapse="", sep="")
+ if(any(exp.char=="-")) {
+ temp <- paste("-",temp, sep="") # string '-' must begin the regexp
+ }
+ temp <- paste("[^", temp, "]", sep="") # anything but the expected is NA
+ mat <- gsub(temp,NA,mat)
+ } else {
+ temp <- paste(na.char, collapse="", sep="")
+ if(any(na.char=="-")) {
+ temp <- paste("-",temp, sep="") # string '-' must start the regexp
+ }
+ temp <- paste("[", temp, "]", sep="")
+ mat <- gsub(temp,NA,mat)
+ }
+
+ ## keep only columns with polymorphism (i.e., SNPs)
+ isPoly <- function(vec){
+ N <- sum(!is.na(vec)) # N: number of sequences
+ temp <- table(vec)/N
+ if(sum(temp > polyThres) >= 2) return(TRUE)
+ return(FALSE)
+ }
+
+ toKeep <- apply(mat, 2, isPoly)
+ if(sum(toKeep)==0) stop("No polymorphic site detected")
+
+ mat <- mat[,toKeep, drop=FALSE]
+
+ ## build output
+ res <- df2genind(mat, pop=pop, ploidy=1, ncode=1, type="codom")
+ res$call <- match.call()
+
+ if(!is.null(x$com)){
+ res at other$com <- x$com
+ }
+
+ return(res)
+} # end alignment2genind
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
## ###############
## ## transiProb
## ###############
Modified: pkg/man/sequences.Rd
===================================================================
--- pkg/man/sequences.Rd 2010-09-08 17:16:37 UTC (rev 666)
+++ pkg/man/sequences.Rd 2010-09-16 11:29:28 UTC (rev 667)
@@ -1,6 +1,7 @@
\encoding{UTF-8}
\name{SequencesToGenind}
\alias{DNAbin2genind}
+\alias{alignment2genind}
\title{ Importing data from an alignement of sequences to a genind object}
\description{
These functions take an alignement of sequences and translate SNPs
@@ -9,11 +10,14 @@
Currently, accepted sequence formats are:\cr
- DNAbin (ape package): function DNAbin2genind\cr
- - alignement (seqinr package): to come...
+ - alignment (seqinr package): function alignment2genind\cr
}
\usage{
DNAbin2genind(x, pop=NULL, exp.char=c("a","t","g","c"), na.char=NULL,
polyThres=1/100)
+
+alignment2genind(x, pop=NULL, exp.char=c("a","t","g","c"), na.char="-", polyThres=1/100)
+
}
\arguments{
\item{x}{an object containing aligned sequences.}
@@ -30,7 +34,7 @@
\seealso{\code{\link{import2genind}}, \code{\link{read.genetix}},
\code{\link{read.fstat}}, \code{\link{read.structure}},
- \code{\link{read.genepop}}, \code{\link[ape]{DNAbin}}.
+ \code{\link{read.genepop}}, \code{\link[ape]{DNAbin}}, \code{\link[seqinr]{as.alignment}}.
}
\author{Thibaut Jombart \email{t.jombart at imperial.ac.uk} }
\examples{
@@ -40,5 +44,25 @@
x
genind2df(x)
}
+
+if(require(seqinr)){
+mase.res <- read.alignment(file = system.file("sequences/test.mase",package = "seqinr"), format = "mase")
+mase.res
+x <- alignment2genind(mase.res)
+x
+locNames(x) # list of polymorphic sites
+genind2df(x)
+
+## look at Euclidean distances
+D <- dist(truenames(x))
+D
+
+if(require(ade4)){
+## summarise with a PCoA
+pco1 <- dudi.pco(D, scannf=FALSE,nf=2)
+scatter(pco1, posi="bottomright")
+title("Principal Coordinate Analysis\n-based on proteic distances-")
}
+}
+}
\keyword{manip}
More information about the adegenet-commits
mailing list