[adegenet-commits] r244 - in pkg: . R man

Sun Jan 25 20:01:20 CET 2009

Author: jombart
Date: 2009-01-25 20:01:20 +0100 (Sun, 25 Jan 2009)
New Revision: 244

Added:
   pkg/R/sequences.R
   pkg/man/sequences.Rd
Modified:
   pkg/DESCRIPTION
   pkg/R/import.R
   pkg/man/import.Rd
Log:
Added DNAbin2genind converter. Package passes the check. Converter seems to work well.


Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2009-01-25 17:39:42 UTC (rev 243)
+++ pkg/DESCRIPTION	2009-01-25 19:01:20 UTC (rev 244)
@@ -4,9 +4,9 @@
 Title: adegenet: a R package for the multivariate analysis of genetic markers.
 Author: Thibaut Jombart <jombart at biomserv.univ-lyon1.fr>, with contributions from Peter Solymos
 Maintainer: Thibaut Jombart <jombart at biomserv.univ-lyon1.fr>
-Suggests: ade4, genetics, hierfstat, spdep, tripack
+Suggests: ade4, genetics, hierfstat, spdep, tripack, ape
 Depends: methods
 Description: Classes and functions for genetic data analysis within the multivariate framework.
 License: GPL (>=2)
 LazyLoad: yes
-Collate: classes.R auxil.R handling.R genind2genpop.R propTyped.R basicMethods.R old2new.R makefreq.R chooseCN.R dist.genpop.R export.R setAs.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R propShared.R scale.R colorplot.R loadingplot.R
+Collate: classes.R auxil.R handling.R genind2genpop.R propTyped.R basicMethods.R old2new.R makefreq.R chooseCN.R dist.genpop.R export.R setAs.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R propShared.R scale.R colorplot.R loadingplot.R sequences.R

Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2009-01-25 17:39:42 UTC (rev 243)
+++ pkg/R/import.R	2009-01-25 19:01:20 UTC (rev 244)
@@ -92,8 +92,14 @@
 
     ## find or check the number of coding characters, 'ncode'
     if(is.null(sep)){
-        if(!is.null(ncode)) {if(ncode <  max(nchar(X)) ) stop("some character strings exceed the provided ncode.")}
-        if(is.null(ncode)) { ncode <- max(nchar(X)) }
+        if(!is.null(ncode)) {
+            temp <- nchar(X[!is.na(X)])
+            if(ncode <  max(temp) ) stop("some character strings exceed the provided ncode.")
+        }
+        if(is.null(ncode)) {
+            temp <- nchar(X[!is.na(X)])
+            ncode <- max(temp)
+        }
         if((ncode %% ploidy)>0) stop(paste(ploidy,"alleles cannot be coded by a total of",
                                            ncode,"characters", sep=" "))
     }

Added: pkg/R/sequences.R
===================================================================
--- pkg/R/sequences.R	                        (rev 0)
+++ pkg/R/sequences.R	2009-01-25 19:01:20 UTC (rev 244)
@@ -0,0 +1,53 @@
+######################################
+##
+## The code below implements import
+## from alignement data.
+##
+######################################
+
+
+
+################
+# DNAbin2genind
+################
+DNAbin2genind <- function(x, pop=NULL, na.char=c("n","-","?")){
+
+    ## misc checks
+    if(!inherits(x,"DNAbin")) stop("x is not a DNAbin object")
+    if(!require(ape)) stop("The package ape is required.")
+
+    ## DNA bin to matrix of characters
+    x <- as.character(x) # should output a matrix
+
+    if(is.list(x)) { # if this is a list
+        temp <- unique(sapply(x,length)) # check lengths of sequences
+        if(length(temp)>1) stop("Sequences have different length - please use alignements only.")
+        else{ # if sequences have same length, build the matrix
+            temp <- names(x)
+            x <- t(as.data.frame(x))
+            rownames(x) <- temp
+        }
+    }
+
+    if(is.null(colnames(x))) {
+        colnames(x) <- 1:ncol(x)
+    }
+
+    ## keep only columns with polymorphism (i.e., SNPs)
+    f1 <- function(vec){
+        if(length(unique(vec))==1) return(FALSE)
+        return(TRUE)
+    }
+
+    toKeep <- apply(x, 2, f1)
+    x <- x[,toKeep]
+
+    ## replace NAs
+    x[x %in% na.char] <- NA
+
+    ## build output
+    res <- df2genind(x, pop=pop, ploidy=1, ncode=1)
+    res$call <- match.call()
+
+    return(res)
+} # end DNAbin2genind

Modified: pkg/man/import.Rd
===================================================================
--- pkg/man/import.Rd	2009-01-25 17:39:42 UTC (rev 243)
+++ pkg/man/import.Rd	2009-01-25 19:01:20 UTC (rev 244)
@@ -4,9 +4,10 @@
 \alias{import2genind}
 \title{ Importing data from several softwares to a genind object}
 \description{
-  Their are two ways to import genotyping data to a \linkS4class{genind} object: from
-  a data.frame with a given format (see \code{\link{df2genind}}, or from a file with a
-  recognized extension.\cr
+  Their are several ways to import genotype data to a
+  \linkS4class{genind} object: i) from a data.frame with a given format
+  (see \code{\link{df2genind}}), ii) from a file with a recognized
+  extension, or iii) from an alignement of sequences (see \code{\link{DNAbin2genind}}).\cr
   
   The function \code{import2genind} detects the extension of
   the file given in argument and seeks for an appropriate import

Added: pkg/man/sequences.Rd
===================================================================
--- pkg/man/sequences.Rd	                        (rev 0)
+++ pkg/man/sequences.Rd	2009-01-25 19:01:20 UTC (rev 244)
@@ -0,0 +1,39 @@
+\encoding{UTF-8}
+\name{SequencesToGenind}
+\alias{DNAbin2genind}
+\title{ Importing data from an alignement of sequences to a genind object}
+\description{
+  These functions take an alignement of sequences and translate SNPs
+  into a \linkS4class{genind} object. Note that only polymorphic loci
+  are retained.\cr
+
+  Currently, accepted sequence formats are:\cr
+  - DNAbin (ape package): function DNAbin2genind\cr
+  - alignement (seqinr package): to come...
+}
+\usage{
+DNAbin2genind(x, pop=NULL, na.char=c("n","-","?"))
+}
+\arguments{
+ \item{x}{an object containing aligned sequences.}
+ \item{pop}{an optional factor giving the population to which each sequence belongs.}
+ \item{na.char}{a character vector providing values that should be
+ considered as NA.}
+}
+
+\value{an object of the class \linkS4class{genind}}
+
+\seealso{\code{\link{import2genind}}, \code{\link{read.genetix}},
+  \code{\link{read.fstat}}, \code{\link{read.structure}},
+  \code{\link{read.genepop}}, \code{\link[pkg:ape]{DNAbin}}.
+}
+\author{Thibaut Jombart \email{jombart at biomserv.univ-lyon1.fr} }
+\examples{
+if(require(ape)){
+data(woodmouse)
+x <- DNAbin2genind(woodmouse)
+x
+genind2df(x)
+}
+}
+\keyword{manip}