[adegenet-commits] r701 - in pkg: . R man

Thu Oct 28 18:00:35 CEST 2010

Author: jombart
Date: 2010-10-28 18:00:35 +0200 (Thu, 28 Oct 2010)
New Revision: 701

Added:
   pkg/man/ascore.Rd
Modified:
   pkg/ChangeLog
   pkg/DESCRIPTION
   pkg/R/dapc.R
   pkg/R/haploGen.R
   pkg/man/adegenet.package.Rd
   pkg/man/dapc.Rd
Log:
New release (1.2-7) fixing ape's bug in as.list.DNAbin.



Modified: pkg/ChangeLog
===================================================================

--- pkg/ChangeLog	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/ChangeLog	2010-10-28 16:00:35 UTC (rev 701)
@@ -1,3 +1,24 @@
+		CHANGES IN ADEGENET VERSION 1.2-7
+
+
+NEW FEATURES
+
+	o dapc and find.clusters are now stable versions for the published methods.
+
+	o a.score and optim.a.score are released in their beta versions.
+
+	o scatter.dapc is now adapted to representation 1-dimensional DAPC results.
+	
+
+BUG FIXES
+
+	o fixed a major issue in the ape package causing haploGen to
+	bug. This is a temporary replacement, waiting for ape to implement
+	the changes (currently, adegenet replaces ape's as.list.DNAbin
+	function with a corrected version).
+
+
+
 			CHANGES IN ADEGENET VERSION 1.2-6
 
 

Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/DESCRIPTION	2010-10-28 16:00:35 UTC (rev 701)
@@ -1,6 +1,6 @@
 Package: adegenet
 Version: 1.2-7
-Date: 2010/09/22
+Date: 2010/10/28
 Title: adegenet: a R package for the multivariate analysis of genetic markers.
 Author:  Thibaut Jombart <t.jombart at imperial.ac.uk>
   with contributions of: Peter Solymos, Francois Balloux
@@ -12,4 +12,4 @@
 Description: Classes and functions for genetic data analysis within the multivariate framework.
 Collate: classes.R basicMethods.R handling.R auxil.R setAs.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R  colorplot.R  monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R  zzz.R
 License: GPL (>=2)
-LazyLoad: yes
\ No newline at end of file
+LazyLoad: yes

Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/R/dapc.R	2010-10-28 16:00:35 UTC (rev 701)
@@ -484,20 +484,20 @@
 
 
 
-############
-## crossval
-############
-crossval <- function (x, ...) UseMethod("crossval")
+## ############
+## ## crossval
+## ############
+## crossval <- function (x, ...) UseMethod("crossval")
 
-crossval.dapc <- function(){
+## crossval.dapc <- function(){
 
-}
+## }
 
 
 
-###############
-## randtest.dapc
-###############
-##randtest.dapc <- function(x, nperm = 999, ...){
+## ###############
+## ## randtest.dapc
+## ###############
+## ##randtest.dapc <- function(x, nperm = 999, ...){
 
-##} # end randtest.dapc
+## ##} # end randtest.dapc

Modified: pkg/R/haploGen.R
===================================================================
--- pkg/R/haploGen.R	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/R/haploGen.R	2010-10-28 16:00:35 UTC (rev 701)
@@ -19,6 +19,33 @@
     if(!require(ape)) stop("The ape package is required.")
 
 
+    ## HACK TO FIX APE'S BUG ##
+    env <- environment(as.list.DNAbin)
+    as.list.DNAbin.new <- function (x, ...){
+        if (is.list(x))
+            return(x)
+        if (is.null(dim(x)))
+            obj <- list(x)
+        else {
+            n <- nrow(x)
+            obj <- vector("list", n)
+            for (i in 1:n) obj[[i]] <- x[i, ]
+            names(obj) <- rownames(x)
+        }
+        class(obj) <- "DNAbin"
+        obj
+    }
+
+    as.list.DNAbin <- as.list.DNAbin.new
+    unlockBinding("as.list.DNAbin", env)
+    assignInNamespace("as.list.DNAbin", as.list.DNAbin.new, ns="ape", envir=env)
+    assign("as.list.DNAbin", as.list.DNAbin.new, envir=env)
+    lockBinding("as.list.DNAbin", env)
+
+    ## END HACK ##
+
+
+
     ## HANDLE ARGUMENTS ##
     if(is.numeric(mu)){
         mu.val <- mu
@@ -47,14 +74,15 @@
     ## AUXILIARY FUNCTIONS ##
     ## generate sequence from scratch
     seq.gen <- function(){
-        res <- list(sample(NUCL, size=seq.length, replace=TRUE))
+        ##res <- list(sample(NUCL, size=seq.length, replace=TRUE)) # DNAbin are no longer lists by default
+        res <- sample(NUCL, size=seq.length, replace=TRUE)
         class(res) <- "DNAbin"
         return(res)
     }
 
     ## create substitutions for defined SNPs
     substi <- function(snp){
-        res <- sapply(snp, function(e) sample(setdiff(NUCL,e),1))
+        res <- sapply(1:length(snp), function(e) sample(setdiff(NUCL,e),1)) # ! sapply does not work on DNAbin vectors directly
         class(res) <- "DNAbin"
         return(res)
     }

Modified: pkg/man/adegenet.package.Rd
===================================================================
--- pkg/man/adegenet.package.Rd	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/man/adegenet.package.Rd	2010-10-28 16:00:35 UTC (rev 701)
@@ -115,9 +115,9 @@
    - \code{\link{Hs}}: computes the average expected heterozygosity by
    population in a \linkS4class{genpop}. Classically Used as a measure
    of genetic diversity.\cr
-   - \code{\link{find.clusters}} and \code{\link{dapc}}: implements the
+   - \code{\link{find.clusters}} and \code{\link{dapc}}: implement the
    Discriminant Analysis of Principal Component (DAPC, Jombart et al.,
-   submitted).\cr
+   2010).\cr
    - \code{\link{seqTrack}}: implements the SeqTrack algorithm for
    recontructing transmission trees of pathogens (Jombart et al.,
    2010) .\cr
@@ -166,7 +166,7 @@
     Package: \tab adegenet\cr
     Type: \tab Package\cr
     Version: \tab 1.2-7\cr
-    Date: \tab 2010-??-?? \cr
+    Date: \tab 2010-10-28 \cr
     License: \tab GPL (>=2)
   }  
 }

Added: pkg/man/ascore.Rd
===================================================================
--- pkg/man/ascore.Rd	                        (rev 0)
+++ pkg/man/ascore.Rd	2010-10-28 16:00:35 UTC (rev 701)
@@ -0,0 +1,96 @@
+\encoding{UTF-8}
+\name{a-score}
+\alias{a.score}
+\alias{optim.a.score}
+\title{Compute and optimize a-score for Discriminant Analysis of Principal Components (DAPC)}
+\description{
+  These functions are under development. Please email the author before
+  using them for published results.
+}
+\usage{
+a.score(x, n.sim=10, \ldots)
+
+optim.a.score(x, n.pca=1:ncol(x$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10, n.da=length(levels(x$grp)), \ldots)
+}
+\arguments{
+\item{x}{a \code{dapc} object.}
+\item{n.pca}{a vector of \code{integers} indicating the number of axes retained in the
+  Principal Component Analysis (PCA) steps of DAPC. \code{nsim} DAPC
+  will be run for each value in \code{n.pca}, unless the smart approach
+  is used (see details).}
+\item{smart}{a \code{logical} indicating whether a smart, less
+  computer-intensive approach should be used (TRUE, default) or not
+  (FALSE). See details section.}
+\item{n}{an \code{integer} indicating the numbers of values spanning the
+  range of \code{n.pca} to be used in the smart approach.}
+\item{plot}{a \code{logical} indicating whether the results should be
+  displayed graphically (TRUE, default) or not (FALSE).}
+\item{n.sim}{an \code{integer} indicating the number of simulations to
+  be performed for each number of retained PC.}
+\item{n.da}{an \code{integer} indicating the number of axes retained in the
+  Discriminant Analysis step.}
+\item{\ldots}{further arguments passed to other methods; currently unused..}
+}
+\details{
+  The Discriminant Analysis of Principal Components seeks a reduced
+  space inside which observations are best discriminated into
+  pre-defined groups. One way to assess the quality of the
+  discrimination is looking at re-assignment of individuals to their
+  prior group, successful re-assignment being a sign of strong discrimination.
+
+  However, when the original space is very large, ad hoc solutions can
+  be found, which discriminate very well the sampled individuals but
+  would perform poorly on new samples. In such a case, DAPC
+  re-assignment would be high even for randomly chosen clusters.
+  The a-score measures this bias. It is computed as (Pt-Pr), where Pt is
+  the reassignment probability using the true cluster, and Pr is the
+  reassignment probability for randomly permuted clusters. A a-score
+  close to one is a sign that the DAPC solution is both strongly
+  discriminating and stable, while low values (toward 0 or lower)
+  indicate either weak discrimination or instability of the results.
+
+  The a-score can serve as a criterion for choosing the optimal number of
+  PCs in the PCA step of DAPC, i.e. the number of PC maximizing the
+  a-score. Two procedures are implemented in \code{optim.a.score}. The
+  smart procedure selects evenly distributed number of PCs in a
+  pre-defined range, compute the a-score for each, and then interpolate
+  the results using splines, predicting an approximate optimal number of
+  PCs. The other procedure (when \code{smart} is FALSE) performs the
+  computations for all number of PCs request by the user. The 'optimal'
+  number is then the one giving the highest mean a-score (computed over
+  the groups).
+}
+\value{
+  === a.score ===\cr
+ \code{a.score} returns a list with the following components:\cr
+  \item{tab}{a matrix of a-scores with groups in columns and simulations in row.}
+  \item{pop.score}{a vector giving the mean a-score for each population.}
+  \item{mean}{the overall mean a-score.}\cr
+  
+  === optim.a.score ===\cr
+  \code{optima.score} returns a list with the following components:\cr
+   \item{pop.score}{a list giving the mean a-score of the populations
+   for each number of retained PC (each element of the list corresponds
+   to a number of retained PCs).}
+  \item{mean}{a vector giving the overall mean a-score for each number
+    of retained PCs.}
+  \item{pred}{(only when \code{smart} is TRUE) the predictions of the
+    spline, given in x and y coordinates.}
+ \item{best}{the optimal number of PCs to be retained.}
+}
+\references{
+  Jombart T, Devillard S and Balloux F  (2010) Discriminant analysis of
+  principal components: a new method for the analysis of genetically
+  structured populations. BMC Genetics11:94. doi:10.1186/1471-2156-11-94
+}
+\seealso{
+  - \code{\link{find.clusters}}: to identify clusters without prior.
+
+  - \code{\link{dapc}}: the Discriminant Analysis of Principal
+    Components (DAPC)
+}
+\author{ Thibaut Jombart \email{t.jombart at imperial.ac.uk} }
+\examples{
+
+}
+\keyword{multivariate}
\ No newline at end of file

Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd	2010-10-28 09:42:10 UTC (rev 700)
+++ pkg/man/dapc.Rd	2010-10-28 16:00:35 UTC (rev 701)
@@ -4,6 +4,7 @@
 \alias{dapc.data.frame}
 \alias{dapc.matrix}
 \alias{dapc.genind}
+\alias{dapc.dudi}
 \alias{print.dapc}
 \alias{summary.dapc}
 \alias{scatter.dapc}
@@ -37,7 +38,7 @@
 \usage{
 \method{dapc}{data.frame}(x, grp, n.pca=NULL, n.da=NULL, center=TRUE,
      scale=FALSE,var.contrib=FALSE, pca.select=c("nbEig","percVar"),
-    perc.pca=NULL, \ldots)
+    perc.pca=NULL, \ldots, dudi=NULL)
 
 \method{dapc}{matrix}(x, \ldots)
 
@@ -45,6 +46,8 @@
      scale.method=c("sigma", "binom"), truenames=TRUE, all.contrib=FALSE,
      pca.select=c("nbEig","percVar"), perc.pca=NULL, \ldots)
 
+\method{dapc}{dudi}(x, grp, \ldots)
+
 \method{print}{dapc}(x, \dots)
 
 \method{summary}{dapc}(object, \dots)
@@ -112,6 +115,9 @@
 \item{pch}{a \code{numeric} indicating the type of point to be used to indicate
   the prior group of individuals (see \code{\link{points}} documentation for
   more details).}
+\item{dudi}{optionally, a multivariate analysis with the class
+  \code{dudi} (from the ade4 package). If provided, prior PCA will be
+  ignored, and this object will be used as a prior step for variable orthogonalisation.}
 }
 \details{
   The Discriminant Analysis of Principal Components (DAPC) is designed
@@ -167,8 +173,7 @@
 \references{
   Jombart T, Devillard S and Balloux F  (2010) Discriminant analysis of
   principal components: a new method for the analysis of genetically
-  structured populations. BMC Genetics
-  11:94. doi:10.1186/1471-2156-11-94
+  structured populations. BMC Genetics11:94. doi:10.1186/1471-2156-11-94
 }
 \seealso{
   - \code{\link{find.clusters}}: to identify clusters without prior.