[adegenet-commits] r839 - in pkg: . R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Mar 4 13:53:58 CET 2011
Author: jombart
Date: 2011-03-04 13:53:58 +0100 (Fri, 04 Mar 2011)
New Revision: 839
Modified:
pkg/DESCRIPTION
pkg/R/dapc.R
Log:
First version of dapc for genlight. Need to debug.
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2011-03-04 12:35:04 UTC (rev 838)
+++ pkg/DESCRIPTION 2011-03-04 12:53:58 UTC (rev 839)
@@ -7,8 +7,8 @@
and contributed datasets from: Katayoun Moazami-Goudarzi, Denis Laloe,
Dominique Pontier, Daniel Maillard, Francois Balloux
Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
-Suggests: ade4, genetics, hierfstat, spdep, tripack, ape, pegas, graph, RBGL, seqinr, multicore
-Depends: methods, MASS
+Suggests: genetics, hierfstat, spdep, tripack, ape, pegas, graph, RBGL, seqinr, multicore
+Depends: methods, MASS, ade4
Description: Classes and functions for genetic data analysis within the multivariate framework.
Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R zzz.R
License: GPL (>=2)
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2011-03-04 12:35:04 UTC (rev 838)
+++ pkg/R/dapc.R 2011-03-04 12:53:58 UTC (rev 839)
@@ -69,7 +69,7 @@
names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
- ## PERFORM DA ##
+ ## PERFORM DA ##
ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
if(is.null(n.da)){
barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
@@ -207,12 +207,18 @@
## PERFORM PCA ##
- maxRank <- min(c(nInd(x), nLoc(x)))
+ REDUCEDIM <- is.null(glPca)
- if(REDUCEDIM){ # if no dudi provided
- maxRank <- min(dim(x))
- pcaX <- glPca(x, center = center, scale = scale, nf=maxRank, loadings=FALSE, ...)
- } else { # else use the provided dudi
+ if(REDUCEDIM & is.null(n.pca)){ # if no glPca provided
+ maxRank <- min(c(nInd(x), nLoc(x)))
+ pcaX <- glPca(x, center = TRUE, scale = scale, nf=maxRank, loadings=FALSE, returnDotProd = TRUE, ...)
+ }
+
+ if(!REDUCEDIM){ # else use the provided glPca object
+ if(is.null(glPca$loadings) & var.contrib) {
+ warning("Contribution of variables requested but glPca object provided without loadings.")
+ var.contrib <- FALSE
+ }
pcaX <- glPca
}
cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
@@ -238,19 +244,67 @@
}
- ## recompute PCA with loadings
- if(REDUCEDIM){ # if no dudi provided
- maxRank <- min(dim(x))
- pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=TRUE, ...)
+ ## recompute PCA with loadings if needed
+ if(REDUCEDIM){
+ pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pca1$dotProd)
}
+ ## keep relevant PCs - stored in XU
+ X.rank <- sum(pcaX$eig > 1e-14)
+ n.pca <- min(X.rank, n.pca)
+ if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
+ if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
+ U <- pcaX$loadings[, 1:n.pca, drop=FALSE] # principal axes
+ XU <- pcaX$scores[, 1:n.pca, drop=FALSE] # principal components
+ XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
+ names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
+ names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
+
+ ## PERFORM DA ##
+ ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
+ if(is.null(n.da)){
+ barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
+ cat("Choose the number discriminant functions to retain (>=1): ")
+ n.da <- as.integer(readLines(n = 1))
+ }
+
+ n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
+ n.da <- round(n.da)
+ predX <- predict(ldaX, dimen=n.da)
+
+
+ ## BUILD RESULT
+ res <- list()
+ res$n.pca <- n.pca
+ res$n.da <- n.da
+ res$tab <- XU
+ res$grp <- grp
+ res$var <- XU.lambda
+ res$eig <- ldaX$svd^2
+ res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
+ res$ind.coord <-predX$x
+ res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
+ res$prior <- ldaX$prior
+ res$posterior <- predX$posterior
+ res$assign <- predX$class
res$call <- match.call()
+ ## optional: get loadings of alleles
+ if(var.contrib){
+ res$var.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
+ f1 <- function(x){
+ temp <- sum(x*x)
+ if(temp < 1e-12) return(rep(0, length(x)))
+ return(x*x / temp)
+ }
+ res$var.contr <- apply(res$var.contr, 2, f1)
+ }
+
+ class(res) <- "dapc"
return(res)
-
} # end dapc.genlight
More information about the adegenet-commits
mailing list