[adegenet-commits] r840 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Mar 4 14:31:14 CET 2011
Author: jombart
Date: 2011-03-04 14:31:14 +0100 (Fri, 04 Mar 2011)
New Revision: 840
Modified:
pkg/R/dapc.R
pkg/R/glFunctions.R
pkg/R/loadingplot.R
pkg/man/dapc.Rd
Log:
DAPC for genlight is OK.
Documented too.
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/dapc.R 2011-03-04 13:31:14 UTC (rev 840)
@@ -190,12 +190,13 @@
dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
scale=FALSE, var.contrib=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, glPca=NULL, ...){
- ## FIRST CHECKS
+ ## FIRST CHECKS ##
if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
-
if(!inherits(x, "genlight")) stop("x must be a genlight object.")
+ pca.select <- match.arg(pca.select)
+
if(is.null(pop)) {
pop.fac <- pop(x)
} else {
@@ -209,7 +210,7 @@
## PERFORM PCA ##
REDUCEDIM <- is.null(glPca)
- if(REDUCEDIM & is.null(n.pca)){ # if no glPca provided
+ if(REDUCEDIM){ # if no glPca provided
maxRank <- min(c(nInd(x), nLoc(x)))
pcaX <- glPca(x, center = TRUE, scale = scale, nf=maxRank, loadings=FALSE, returnDotProd = TRUE, ...)
}
@@ -221,17 +222,31 @@
}
pcaX <- glPca
}
- cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
+ if(is.null(n.pca)){
+ cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
+ }
+
+
## select the number of retained PC for PCA
if(is.null(n.pca) & pca.select=="nbEig"){
- plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA")
+ if(!REDUCEDIM){
+ myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$scores),length(pcaX$eig)))
+ } else {
+ myCol <- "black"
+ }
+ plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
n.pca <- as.integer(readLines(n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
- plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA")
+ if(!REDUCEDIM){
+ myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$scores),length(pcaX$eig)))
+ } else {
+ myCol <- "black"
+ }
+ plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
nperc.pca <- as.numeric(readLines(n = 1))
}
@@ -243,14 +258,21 @@
if(n.pca<1) n.pca <- 1
}
+ if(!REDUCEDIM){
+ if(n.pca > ncol(pcaX$scores)) {
+ n.pca <- ncol(pcaX$scores)
+ }
+ }
+
## recompute PCA with loadings if needed
if(REDUCEDIM){
- pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pca1$dotProd)
+ pcaX <- glPca(x, center = TRUE, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pcaX$dotProd)
}
## keep relevant PCs - stored in XU
+ N <- nInd(x)
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
@@ -264,14 +286,14 @@
## PERFORM DA ##
- ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
+ ldaX <- lda(XU, pop.fac, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
if(is.null(n.da)){
- barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
+ barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop.fac))) )
cat("Choose the number discriminant functions to retain (>=1): ")
n.da <- as.integer(readLines(n = 1))
}
- n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
+ n.da <- min(n.da, length(levels(pop.fac))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
n.da <- round(n.da)
predX <- predict(ldaX, dimen=n.da)
@@ -281,12 +303,12 @@
res$n.pca <- n.pca
res$n.da <- n.da
res$tab <- XU
- res$grp <- grp
+ res$grp <- pop.fac
res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
res$ind.coord <-predX$x
- res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
+ res$grp.coord <- apply(res$ind.coord, 2, tapply, pop.fac, mean)
res$prior <- ldaX$prior
res$posterior <- predX$posterior
res$assign <- predX$class
Modified: pkg/R/glFunctions.R
===================================================================
--- pkg/R/glFunctions.R 2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/glFunctions.R 2011-03-04 13:31:14 UTC (rev 840)
@@ -354,6 +354,7 @@
## rescale PCs
res <- list()
res$eig <- eigRes$values
+ nf <- min(nf, sum(res$eig>1e-10))
##res$matprod <- allProd # for debugging
## use: li = XQU = V\Lambda^(1/2)
Modified: pkg/R/loadingplot.R
===================================================================
--- pkg/R/loadingplot.R 2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/R/loadingplot.R 2011-03-04 13:31:14 UTC (rev 840)
@@ -11,12 +11,12 @@
if(is.data.frame(x) | is.matrix(x)){
if(is.null(lab)) {lab <- rownames(x)}
x <- x[,axis]
- names(x) <- temp
} else {
if(is.null(lab)) {lab <- names(x)}
}
- lab <- rep(lab, length=length(x))
+ names(x) <- lab <- rep(lab, length=length(x))
+
if(!is.numeric(x)) stop("x is not numeric")
if(any(is.na(x))) stop("NA entries in x")
if(any(x<0)) {
Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd 2011-03-04 12:53:58 UTC (rev 839)
+++ pkg/man/dapc.Rd 2011-03-04 13:31:14 UTC (rev 840)
@@ -5,6 +5,7 @@
\alias{dapc.matrix}
\alias{dapc.genind}
\alias{dapc.dudi}
+\alias{dapc.genlight}
\alias{print.dapc}
\alias{summary.dapc}
\alias{scatter.dapc}
@@ -12,28 +13,35 @@
\title{Discriminant Analysis of Principal Components (DAPC)}
\description{
These functions implement the Discriminant Analysis of Principal Components
- (DAPC). See 'details' section for a succint description of the method. DAPC
- implementation calls upon \code{\link[ade4]{dudi.pca}} from the \code{ade4} package and
- \code{\link[MASS]{lda}} from the \code{MASS} package.
+ (DAPC). See 'details' section for a succint description of the method.
- \code{dapc} performs the DAPC on a \code{data.frame}, a \code{matrix}, or a
- \code{\linkS4class{genind}} object, and returns an object with class
- \code{dapc}. If data are stored in a \code{data.frame} or a \code{matrix},
- these have to be quantitative data (i.e., \code{numeric} or \code{integers}),
- as opposed to \code{characters} or \code{factors}.
+ \code{dapc} is a generic function performing the DAPC on the following
+ types of objects:
+ - \code{data.frame} (only numeric data)
+ - \code{matrix} (only numeric data)
+ - \code{\linkS4class{genind}} objects (genetic markers)
+ - \code{\linkS4class{genlight}} objects (genome-wide SNPs)
-
- Other functions are:
-
+ These methods all return an object with class \code{dapc}.
+
+ Functions that can be applied to these objects are (the ".dapc" can be
+ ommitted):
+
- \code{print.dapc}: prints the content of a \code{dapc} object.
-
+
- \code{summary.dapc}: extracts useful information from a \code{dapc} object.
-
+
- \code{scatter.dapc}: produces scatterplots of principal components (or
'discriminant functions'), with a screeplot of eigenvalues as inset.
-
+
- \code{assignplot}: plot showing the probabilities of assignment of
- individuals to the different clusters.
+ individuals to the different clusters.
+
+
+ DAPC
+ implementation calls upon \code{\link[ade4]{dudi.pca}} from the
+ \code{ade4} package and \code{\link[MASS]{lda}} from the \code{MASS}
+ package.
}
\usage{
\method{dapc}{data.frame}(x, grp, n.pca=NULL, n.da=NULL, center=TRUE,
@@ -46,6 +54,10 @@
scale.method=c("sigma", "binom"), truenames=TRUE, var.contrib=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, \ldots)
+\method{dapc}{genlight}(x, pop = NULL, n.pca = NULL, n.da = NULL, scale = FALSE,
+ var.contrib = TRUE, pca.select = c("nbEig", "percVar"), perc.pca = NULL,
+ glPca = NULL, \ldots)
+
\method{dapc}{dudi}(x, grp, \ldots)
\method{print}{dapc}(x, \dots)
@@ -94,7 +106,12 @@
retained axes of PCA.}
\item{\ldots}{further arguments to be passed to other functions. For
\code{dapc.matrix}, arguments are to match those of
- \code{dapc.data.frame}; for \code{scatter}, arguments passed to \code{points}.}
+ \code{dapc.data.frame}; for \code{scatter}, arguments passed to
+ \code{points}; for \code{dapc.genlight}, arguments passed to
+ \code{\link{glPca}}.}
+ \item{glPca}{an optional \code{\link{glPca}} object; if provided,
+ dimension reduction is not performed (saving computational time) but
+ taken directly from this object.}
\item{object}{a \code{dapc} object.}
\item{scale.method}{a \code{character} specifying the scaling method to be used
for allele frequencies, which must match "sigma" (usual estimate of standard
@@ -219,6 +236,28 @@
scatter(dapc1, cell=0, pch=18:23, cstar=0)
## only ellipse, custom labels
-scatter(dapc1, cell=2, pch="", cstar=0, posi="top", lab=paste("year\n",2001:2006), axesel=FALSE, col=terrain.colors(10))
+scatter(dapc1, cell=2, pch="", cstar=0, posi="top",
+lab=paste("year\n",2001:2006), axesel=FALSE, col=terrain.colors(10))
+
+
+
+## example using genlight objects ##
+## simulate data
+x <- glSim(50,4e3-50, 50, ploidy=2)
+x
+plot(x)
+
+## perform DAPC
+dapc1 <- dapc(x, n.pca=10, n.da=1)
+dapc1
+
+## plot results
+scatter(dapc1)
+
+## SNP contributions
+loadingplot(dapc1$var.contr)
+loadingplot(tail(dapc1$var.contr, 100), main="Loading plot - last 100 SNPs")
+
+
}
\keyword{multivariate}
\ No newline at end of file
More information about the adegenet-commits
mailing list