[adegenet-commits] r839 - in pkg: . R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Mar 4 13:53:58 CET 2011


Author: jombart
Date: 2011-03-04 13:53:58 +0100 (Fri, 04 Mar 2011)
New Revision: 839

Modified:
   pkg/DESCRIPTION
   pkg/R/dapc.R
Log:
First version of dapc for genlight. Need to debug.


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2011-03-04 12:35:04 UTC (rev 838)
+++ pkg/DESCRIPTION	2011-03-04 12:53:58 UTC (rev 839)
@@ -7,8 +7,8 @@
   and contributed datasets from: Katayoun Moazami-Goudarzi, Denis Laloe,
   Dominique Pontier, Daniel Maillard, Francois Balloux
 Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
-Suggests: ade4, genetics, hierfstat, spdep, tripack, ape, pegas, graph, RBGL, seqinr, multicore
-Depends: methods, MASS
+Suggests: genetics, hierfstat, spdep, tripack, ape, pegas, graph, RBGL, seqinr, multicore
+Depends: methods, MASS, ade4
 Description: Classes and functions for genetic data analysis within the multivariate framework.
 Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R zzz.R
 License: GPL (>=2)

Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R	2011-03-04 12:35:04 UTC (rev 838)
+++ pkg/R/dapc.R	2011-03-04 12:53:58 UTC (rev 839)
@@ -69,7 +69,7 @@
     names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
 
 
-     ## PERFORM DA ##
+    ## PERFORM DA ##
     ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
     if(is.null(n.da)){
         barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
@@ -207,12 +207,18 @@
 
 
     ## PERFORM PCA ##
-    maxRank <- min(c(nInd(x), nLoc(x)))
+    REDUCEDIM <- is.null(glPca)
 
-    if(REDUCEDIM){ # if no dudi provided
-        maxRank <- min(dim(x))
-        pcaX <- glPca(x, center = center, scale = scale, nf=maxRank, loadings=FALSE, ...)
-    } else { # else use the provided dudi
+    if(REDUCEDIM & is.null(n.pca)){ # if no glPca provided
+        maxRank <- min(c(nInd(x), nLoc(x)))
+        pcaX <- glPca(x, center = TRUE, scale = scale, nf=maxRank, loadings=FALSE, returnDotProd = TRUE, ...)
+    }
+
+    if(!REDUCEDIM){ # else use the provided glPca object
+        if(is.null(glPca$loadings) & var.contrib) {
+            warning("Contribution of variables requested but glPca object provided without loadings.")
+            var.contrib <- FALSE
+        }
         pcaX <- glPca
     }
     cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
@@ -238,19 +244,67 @@
     }
 
 
-    ## recompute PCA with loadings
-    if(REDUCEDIM){ # if no dudi provided
-        maxRank <- min(dim(x))
-        pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=TRUE, ...)
+    ## recompute PCA with loadings if needed
+    if(REDUCEDIM){
+        pcaX <- glPca(x, center = center, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pca1$dotProd)
     }
 
 
+    ## keep relevant PCs - stored in XU
+    X.rank <- sum(pcaX$eig > 1e-14)
+    n.pca <- min(X.rank, n.pca)
+    if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
+    if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
 
+    U <- pcaX$loadings[, 1:n.pca, drop=FALSE] # principal axes
+    XU <- pcaX$scores[, 1:n.pca, drop=FALSE] # principal components
+    XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
+    names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
+    names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
 
+
+    ## PERFORM DA ##
+    ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
+    if(is.null(n.da)){
+        barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
+        cat("Choose the number discriminant functions to retain (>=1): ")
+        n.da <- as.integer(readLines(n = 1))
+    }
+
+    n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
+    n.da <- round(n.da)
+    predX <- predict(ldaX, dimen=n.da)
+
+
+    ## BUILD RESULT
+    res <- list()
+    res$n.pca <- n.pca
+    res$n.da <- n.da
+    res$tab <- XU
+    res$grp <- grp
+    res$var <- XU.lambda
+    res$eig <- ldaX$svd^2
+    res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
+    res$ind.coord <-predX$x
+    res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
+    res$prior <- ldaX$prior
+    res$posterior <- predX$posterior
+    res$assign <- predX$class
     res$call <- match.call()
 
+    ## optional: get loadings of alleles
+    if(var.contrib){
+        res$var.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
+        f1 <- function(x){
+            temp <- sum(x*x)
+            if(temp < 1e-12) return(rep(0, length(x)))
+            return(x*x / temp)
+        }
+        res$var.contr <- apply(res$var.contr, 2, f1)
+    }
+
+    class(res) <- "dapc"
     return(res)
-
 } # end dapc.genlight
 
 



More information about the adegenet-commits mailing list