[adegenet-commits] r840 - in pkg: R man

Fri Mar 4 14:31:14 CET 2011

Author: jombart
Date: 2011-03-04 14:31:14 +0100 (Fri, 04 Mar 2011)
New Revision: 840

Modified:
   pkg/R/dapc.R
   pkg/R/glFunctions.R
   pkg/R/loadingplot.R
   pkg/man/dapc.Rd
Log:
DAPC for genlight is OK.
Documented too.



Modified: pkg/R/dapc.R
===================================================================

--- pkg/R/dapc.R	2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/dapc.R	2011-03-04 13:31:14 UTC (rev 840)
@@ -190,12 +190,13 @@
 dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
                           scale=FALSE,  var.contrib=TRUE,
                           pca.select=c("nbEig","percVar"), perc.pca=NULL, glPca=NULL, ...){
-    ## FIRST CHECKS
+    ## FIRST CHECKS ##
     if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
     if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
-
     if(!inherits(x, "genlight")) stop("x must be a genlight object.")
 
+    pca.select <- match.arg(pca.select)
+
     if(is.null(pop)) {
         pop.fac <- pop(x)
     } else {
@@ -209,7 +210,7 @@
     ## PERFORM PCA ##
     REDUCEDIM <- is.null(glPca)
 
-    if(REDUCEDIM & is.null(n.pca)){ # if no glPca provided
+    if(REDUCEDIM){ # if no glPca provided
         maxRank <- min(c(nInd(x), nLoc(x)))
         pcaX <- glPca(x, center = TRUE, scale = scale, nf=maxRank, loadings=FALSE, returnDotProd = TRUE, ...)
     }
@@ -221,17 +222,31 @@
         }
         pcaX <- glPca
     }
-    cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
 
+    if(is.null(n.pca)){
+        cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
+    }
+
+
     ## select the number of retained PC for PCA
     if(is.null(n.pca) & pca.select=="nbEig"){
-        plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA")
+        if(!REDUCEDIM){
+            myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$scores),length(pcaX$eig)))
+        } else {
+            myCol <- "black"
+        }
+        plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
         cat("Choose the number PCs to retain (>=1): ")
         n.pca <- as.integer(readLines(n = 1))
     }
 
     if(is.null(perc.pca) & pca.select=="percVar"){
-        plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA")
+        if(!REDUCEDIM){
+            myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$scores),length(pcaX$eig)))
+        } else {
+            myCol <- "black"
+        }
+        plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
         cat("Choose the percentage of variance to retain (0-100): ")
         nperc.pca <- as.numeric(readLines(n = 1))
     }
@@ -243,14 +258,21 @@
         if(n.pca<1) n.pca <- 1
     }
 
+    if(!REDUCEDIM){
+        if(n.pca > ncol(pcaX$scores)) {
+            n.pca <- ncol(pcaX$scores)
+        }
+    }
 
+
     ## recompute PCA with loadings if needed
     if(REDUCEDIM){
-        pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pca1$dotProd)
+        pcaX <- glPca(x, center = TRUE, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pcaX$dotProd)
     }
 
 
     ## keep relevant PCs - stored in XU
+    N <- nInd(x)
     X.rank <- sum(pcaX$eig > 1e-14)
     n.pca <- min(X.rank, n.pca)
     if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
@@ -264,14 +286,14 @@
 
 
     ## PERFORM DA ##
-    ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
+    ldaX <- lda(XU, pop.fac, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
     if(is.null(n.da)){
-        barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
+        barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop.fac))) )
         cat("Choose the number discriminant functions to retain (>=1): ")
         n.da <- as.integer(readLines(n = 1))
     }
 
-    n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
+    n.da <- min(n.da, length(levels(pop.fac))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
     n.da <- round(n.da)
     predX <- predict(ldaX, dimen=n.da)
 
@@ -281,12 +303,12 @@
     res$n.pca <- n.pca
     res$n.da <- n.da
     res$tab <- XU
-    res$grp <- grp
+    res$grp <- pop.fac
     res$var <- XU.lambda
     res$eig <- ldaX$svd^2
     res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
     res$ind.coord <-predX$x
-    res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
+    res$grp.coord <- apply(res$ind.coord, 2, tapply, pop.fac, mean)
     res$prior <- ldaX$prior
     res$posterior <- predX$posterior
     res$assign <- predX$class

Modified: pkg/R/glFunctions.R
===================================================================
--- pkg/R/glFunctions.R	2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/glFunctions.R	2011-03-04 13:31:14 UTC (rev 840)
@@ -354,6 +354,7 @@
     ## rescale PCs
     res <- list()
     res$eig <- eigRes$values
+    nf <- min(nf, sum(res$eig>1e-10))
     ##res$matprod <- allProd # for debugging
 
     ## use: li = XQU = V\Lambda^(1/2)

Modified: pkg/R/loadingplot.R
===================================================================
--- pkg/R/loadingplot.R	2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/loadingplot.R	2011-03-04 13:31:14 UTC (rev 840)
@@ -11,12 +11,12 @@
     if(is.data.frame(x) | is.matrix(x)){
         if(is.null(lab)) {lab <- rownames(x)}
         x <- x[,axis]
-        names(x) <- temp
     } else {
         if(is.null(lab)) {lab <- names(x)}
     }
-    lab <- rep(lab, length=length(x))
 
+    names(x) <- lab <- rep(lab, length=length(x))
+
     if(!is.numeric(x)) stop("x is not numeric")
     if(any(is.na(x))) stop("NA entries in x")
     if(any(x<0)) {

Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd	2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/man/dapc.Rd	2011-03-04 13:31:14 UTC (rev 840)
@@ -5,6 +5,7 @@
 \alias{dapc.matrix}
 \alias{dapc.genind}
 \alias{dapc.dudi}
+\alias{dapc.genlight}
 \alias{print.dapc}
 \alias{summary.dapc}
 \alias{scatter.dapc}
@@ -12,28 +13,35 @@
 \title{Discriminant Analysis of Principal Components (DAPC)}
 \description{
   These functions implement the Discriminant Analysis of Principal Components
-  (DAPC). See 'details' section for a succint description of the method. DAPC
-  implementation calls upon \code{\link[ade4]{dudi.pca}} from the \code{ade4} package and
-  \code{\link[MASS]{lda}} from the \code{MASS} package.
+  (DAPC). See 'details' section for a succint description of the method. 
 
- \code{dapc} performs the DAPC on a \code{data.frame}, a \code{matrix}, or a
- \code{\linkS4class{genind}} object, and returns an object with class
- \code{dapc}. If data are stored in a \code{data.frame} or a \code{matrix},
- these have to be quantitative data (i.e., \code{numeric} or \code{integers}),
- as opposed to \code{characters} or \code{factors}.
+ \code{dapc} is a generic function performing the DAPC on the following
+ types of objects:
+ - \code{data.frame} (only numeric data)
+ - \code{matrix} (only numeric data)
+ - \code{\linkS4class{genind}} objects (genetic markers)
+ - \code{\linkS4class{genlight}} objects (genome-wide SNPs)
 
- 
- Other functions are:
-  
+ These methods all return an object with class \code{dapc}.
+
+ Functions that can be applied to these objects are (the ".dapc" can be
+ ommitted):
+
   - \code{print.dapc}: prints the content of a \code{dapc} object.
-  
+
   - \code{summary.dapc}: extracts useful information from a  \code{dapc} object.
-  
+
   - \code{scatter.dapc}: produces scatterplots of principal components (or
     'discriminant functions'), with a screeplot of eigenvalues as inset.
-  
+
   - \code{assignplot}: plot showing the probabilities of assignment of
-    individuals to the different clusters.
+  individuals to the different clusters.
+
+
+  DAPC
+  implementation calls upon \code{\link[ade4]{dudi.pca}} from the
+  \code{ade4} package and \code{\link[MASS]{lda}} from the \code{MASS}
+  package.
 }
 \usage{
 \method{dapc}{data.frame}(x, grp, n.pca=NULL, n.da=NULL, center=TRUE,
@@ -46,6 +54,10 @@
      scale.method=c("sigma", "binom"), truenames=TRUE, var.contrib=TRUE,
      pca.select=c("nbEig","percVar"), perc.pca=NULL, \ldots)
 
+\method{dapc}{genlight}(x, pop = NULL, n.pca = NULL, n.da = NULL, scale = FALSE, 
+    var.contrib = TRUE, pca.select = c("nbEig", "percVar"), perc.pca = NULL, 
+    glPca = NULL, \ldots)
+
 \method{dapc}{dudi}(x, grp, \ldots)
 
 \method{print}{dapc}(x, \dots)
@@ -94,7 +106,12 @@
     retained axes of PCA.}
   \item{\ldots}{further arguments to be passed to other functions. For
     \code{dapc.matrix}, arguments are to match those of
-    \code{dapc.data.frame}; for \code{scatter}, arguments passed to \code{points}.}
+    \code{dapc.data.frame}; for \code{scatter}, arguments passed to
+    \code{points}; for \code{dapc.genlight}, arguments passed to
+    \code{\link{glPca}}.}
+  \item{glPca}{an optional \code{\link{glPca}} object; if provided,
+    dimension reduction is not performed (saving computational time) but
+    taken directly from this object.}
   \item{object}{a \code{dapc} object.}
   \item{scale.method}{a \code{character} specifying the scaling method to be used
     for allele frequencies, which must match "sigma" (usual estimate of standard
@@ -219,6 +236,28 @@
 scatter(dapc1, cell=0, pch=18:23, cstar=0)
 
 ## only ellipse, custom labels
-scatter(dapc1, cell=2, pch="", cstar=0, posi="top", lab=paste("year\n",2001:2006), axesel=FALSE, col=terrain.colors(10))
+scatter(dapc1, cell=2, pch="", cstar=0, posi="top",
+lab=paste("year\n",2001:2006), axesel=FALSE, col=terrain.colors(10))
+
+
+
+## example using genlight objects ##
+## simulate data
+x <- glSim(50,4e3-50, 50, ploidy=2)
+x
+plot(x)
+
+## perform DAPC
+dapc1 <- dapc(x, n.pca=10, n.da=1)
+dapc1
+
+## plot results
+scatter(dapc1)
+
+## SNP contributions
+loadingplot(dapc1$var.contr)
+loadingplot(tail(dapc1$var.contr, 100), main="Loading plot - last 100 SNPs")
+
+
 }
 \keyword{multivariate}
\ No newline at end of file