[adegenet-commits] r499 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Nov 30 19:01:12 CET 2009
Author: jombart
Date: 2009-11-30 19:01:12 +0100 (Mon, 30 Nov 2009)
New Revision: 499
Added:
pkg/R/dapc.R
Modified:
pkg/R/import.R
Log:
Added a working function for dapc. Remaining: summary, plot, scatter.
Added: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R (rev 0)
+++ pkg/R/dapc.R 2009-11-30 18:01:12 UTC (rev 499)
@@ -0,0 +1,360 @@
+########
+## dapc
+########
+dapc <- function(x, pop=NULL, n.pca=NULL, n.da=NULL, scale=TRUE,
+ scale.method=c("sigma", "binom"), truenames=TRUE, all.contrib=FALSE){
+
+ ## FIRST CHECKS
+ if(!inherits(x,"genind")) stop("x must be a genind or genpop xect.")
+ invisible(validObject(x))
+ if(is.null(pop)) {
+ pop <- pop(x)
+ }
+ if(is.null(pop(x))) stop("x does not include pre-defined populations, and `pop' is not provided")
+
+ if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
+ if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
+
+
+ ## SOME GENERAL VARIABLES
+ N <- nrow(x at tab)
+
+ ## PERFORM PCA ##
+ maxRank <- min(dim(x at tab))
+
+ X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
+ missing = "mean", truenames = truenames)
+
+ pcaX <- dudi.pca(X, center = FALSE, scale = FALSE, scannf = FALSE, nf=maxRank)
+ ## select the number of retained PC for PCA
+ if(is.null(n.pca)){
+ cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
+ plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative genetic variance (%)", main="Genetic variance explained by PCA")
+ cat("Choose the number PCs to retain (>=1): ")
+ n.pca <- as.integer(readLines(n = 1))
+ }
+
+ ## keep relevant PCs - stored in XU
+ X.rank <- length(pcaX$eig)
+ n.pca <- min(X.rank, n.pca)
+ if(n.pca >= N) stop("number of retained PCA is greater than N")
+ if(n.pca > N/3) warning("number of retained PCA may be too large (> N /3)")
+
+ U <- pcaX$c1[, 1:n.pca, drop=FALSE] # principal axes
+ XU <- pcaX$li[, 1:n.pca, drop=FALSE] # principal components
+ XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
+ names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
+ names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
+
+
+ ## PERFORM DA ##
+ ldaX <- lda(XU, pop)
+ if(is.null(n.da)){
+ barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop))) )
+ cat("Choose the number LDs to retain (>=1): ")
+ n.da <- as.integer(readLines(n = 1))
+ }
+
+ predX <- predict(ldaX, dimen=n.da)
+
+
+ ## BUILD RESULT
+ res <- list()
+ res$tab <- XU
+ res$fac <- pop
+ res$var.gen <- XU.lambda
+ res$eig <- ldaX$svd^2
+ res$LD <- ldaX$scaling
+ res$pop.coord <- ldaX$means
+ res$ind.coord <-predX$x
+ res$prior <- ldaX$prior
+ res$posterior <- predX$posterior
+ res$assign <- predX$class
+
+ ## optional: get loadings of alleles
+ if(all.contrib){
+ res$all.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling)
+ res$all.contr <- t(apply(all.contr, 1, function(e) e*e / sum(e*e)))
+ }
+
+ res$call <- match.call()
+ class(res) <- "dapc"
+ return(res)
+} # end dapc
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+######################
+# Function print.dapc
+######################
+print.dapc <- function(x, ...){
+ cat("\t########################################\n")
+ cat("\t# spatial Principal Component Analysis #\n")
+ cat("\t########################################\n")
+ cat("class: ")
+ cat(class(x))
+ cat("\n$call: ")
+ print(x$call)
+ cat("\n$nfposi:", x$nfposi, "axis-components saved")
+ cat("\n$nfnega:", x$nfnega, "axis-components saved")
+
+ cat("\nPositive eigenvalues: ")
+ l0 <- sum(x$eig >= 0)
+ cat(signif(x$eig, 4)[1:(min(5, l0))])
+ if (l0 > 5)
+ cat(" ...\n")
+ else cat("\n")
+ cat("Negative eigenvalues: ")
+ l0 <- sum(x$eig <= 0)
+ cat(sort(signif(x$eig, 4))[1:(min(5, l0))])
+ if (l0 > 5)
+ cat(" ...\n")
+ else cat("\n")
+ cat('\n')
+ sumry <- array("", c(1, 4), list(1, c("vector", "length",
+ "mode", "content")))
+ sumry[1, ] <- c('$eig', length(x$eig), mode(x$eig), 'eigenvalues')
+ class(sumry) <- "table"
+ print(sumry)
+ cat("\n")
+ sumry <- array("", c(4, 4), list(1:4, c("data.frame", "nrow", "ncol", "content")))
+ sumry[1, ] <- c("$c1", nrow(x$c1), ncol(x$c1), "principal axes: scaled vectors of alleles loadings")
+ sumry[2, ] <- c("$li", nrow(x$li), ncol(x$li), "principal components: coordinates of entities ('scores')")
+ sumry[3, ] <- c("$ls", nrow(x$ls), ncol(x$ls), 'lag vector of principal components')
+ sumry[4, ] <- c("$as", nrow(x$as), ncol(x$as), 'pca axes onto dapc axes')
+
+ class(sumry) <- "table"
+ print(sumry)
+
+ cat("\n$xy: matrix of spatial coordinates")
+ cat("\n$lw: a list of spatial weights (class 'listw')")
+
+ cat("\n\nother elements: ")
+ if (length(names(x)) > 10)
+ cat(names(x)[11:(length(names(x)))], "\n")
+ else cat("NULL\n")
+}
+
+
+
+
+
+########################
+# Function summary.dapc
+########################
+summary.dapc <- function (object, ..., printres=TRUE) {
+ if (!inherits(object, "dapc"))stop("to be used with 'dapc' object")
+ if(!require(ade4,quietly=TRUE)) stop("the library ade4 is required; please install this package")
+ if(!require(spdep,quietly=TRUE)) stop("the library spdep is required; please install this package")
+
+ #util <- function(n) { ## no longer used
+ # x <- "1"
+ # for (i in 2:n) x[i] <- paste(x[i - 1], i, sep = "+")
+ # return(x)
+ #}
+ norm.w <- function(X, w) {
+ f2 <- function(v) sum(v * v * w)/sum(w)
+ norm <- apply(X, 2, f2)
+ return(norm)
+ }
+
+ resfin <- list()
+
+ if(printres) {
+ cat("\nSpatial principal component analysis\n")
+ cat("\nCall: ")
+ print(object$call)
+ }
+
+ appel <- as.list(object$call)
+ ## compute original pca
+ # prepare data
+ obj <- eval(appel$obj)
+ if(is.null(appel$truenames)) truenames <- FALSE
+
+ f1 <- function(vec){
+ m <- mean(vec,na.rm=TRUE)
+ vec[is.na(vec)] <- m
+ return(vec)
+ }
+
+ if(is.genind(obj)) { X <- obj at tab }
+ if(is.genpop(obj)) { X <- makefreq(obj, quiet=TRUE)$tab }
+
+ X <- apply(X,2,f1)
+
+ if(truenames){
+ rownames(X) <- rownames(truenames(obj))
+ colnames(X) <- colnames(truenames(obj))
+ }
+
+ nfposi <- object$nfposi
+ nfnega <- object$nfnega
+
+ dudi <- dudi.pca(X, center=TRUE, scale=FALSE, scannf=FALSE, nf=nfposi+nfnega)
+ ## end of pca
+
+ lw <- object$lw
+
+ # I0, Imin, Imax
+ n <- nrow(X)
+ I0 <- -1/(n-1)
+ L <- listw2mat(lw)
+ eigL <- eigen(0.5*(L+t(L)))$values
+ Imin <- min(eigL)
+ Imax <- max(eigL)
+ Ival <- data.frame(I0=I0,Imin=Imin,Imax=Imax)
+ row.names(Ival) <- ""
+ if(printres) {
+ cat("\nConnection network statistics:\n")
+ print(Ival)
+ }
+
+ Istat <- c(I0,Imin,Imax)
+ names(Istat) <- c("I0","Imin","Imax")
+ resfin$Istat <- Istat
+
+
+ # les scores de l'analyse de base
+ nf <- dudi$nf
+ eig <- dudi$eig[1:nf]
+ cum <- cumsum(dudi$eig)[1:nf]
+ ratio <- cum/sum(dudi$eig)
+ w <- apply(dudi$l1,2,lag.listw,x=lw)
+ moran <- apply(w*as.matrix(dudi$l1)*dudi$lw,2,sum)
+ res <- data.frame(var=eig,cum=cum,ratio=ratio, moran=moran)
+ row.names(res) <- paste("Axis",1:nf)
+ if(printres) {
+ cat("\nScores from the centred PCA\n")
+ print(res)
+ }
+
+ resfin$pca <- res
+
+
+ # les scores de l'analyse spatiale
+ # on recalcule l'objet en gardant tous les axes
+ eig <- object$eig
+ nfposimax <- sum(eig > 0)
+ nfnegamax <- sum(eig < 0)
+
+ ms <- multispati(dudi=dudi, listw=lw, scannf=FALSE,
+ nfposi=nfposimax, nfnega=nfnegamax)
+
+ ndim <- dudi$rank
+ nf <- nfposi + nfnega
+ agarder <- c(1:nfposi,if (nfnega>0) (ndim-nfnega+1):ndim)
+ varspa <- norm.w(ms$li,dudi$lw)
+ moran <- apply(as.matrix(ms$li)*as.matrix(ms$ls)*dudi$lw,2,sum)
+ res <- data.frame(eig=eig,var=varspa,moran=moran/varspa)
+ row.names(res) <- paste("Axis",1:length(eig))
+
+ if(printres) {
+ cat("\ndapc eigenvalues decomposition:\n")
+ print(res[agarder,])
+ }
+
+ resfin$dapc <- res
+
+ return(invisible(resfin))
+}
+
+
+
+#####################
+# Function plot.dapc
+#####################
+plot.dapc <- function (x, axis = 1, useLag=FALSE, ...){
+ if (!inherits(x, "dapc")) stop("Use only with 'dapc' objects.")
+
+ if(!require(ade4)) stop("ade4 package is required.")
+ if(!require(spdep)) stop("spdep package is required.")
+ if(axis>ncol(x$li)) stop("wrong axis required.")
+
+ opar <- par(no.readonly = TRUE)
+ on.exit(par(opar))
+ par(mar = rep(.1,4), mfrow=c(3,2))
+
+ n <- nrow(x$li)
+ xy <- x$xy
+
+ ## handle useLag argument
+ if(useLag){
+ z <- x$ls[,axis]
+ } else {
+ z <- x$li[,axis]
+ } # end if useLag
+ nfposi <- x$nfposi
+ nfnega <- x$nfnega
+ ## handle neig parameter - hide cn if nore than 100 links
+ nLinks <- sum(card(x$lw$neighbours))
+ if(nLinks < 500) {
+ neig <- nb2neig(x$lw$neighbours)
+ } else {
+ neig <- NULL
+ }
+
+ sub <- paste("Score",axis)
+ csub <- 2
+
+ # 1
+ if(n<30) clab <- 1 else clab <- 0
+ s.label(xy, clab=clab, include.ori=FALSE, addaxes=FALSE, neig=neig,
+ cneig=1, sub="Connection network", csub=2)
+
+ # 2
+ s.image(xy,z, include.ori=FALSE, grid=TRUE, kgrid=10, cgrid=1,
+ sub=sub, csub=csub, possub="bottomleft")
+ box()
+
+ # 3
+ if(n<30) {neig <- nb2neig(x$lw$neighbours)} else {neig <- NULL}
+ s.value(xy,z, include.ori=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+ neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+ # 4
+ s.value(xy,z, include.ori=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+ method="greylevel", neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+ # 5
+ omar <- par("mar")
+ par(mar = c(0.8, 2.8, 0.8, 0.8))
+ m <- length(x$eig)
+ col.w <- rep("white", m) # elles sont toutes blanches
+ col.w[1:nfposi] <- "grey"
+ if (nfnega>0) {col.w[m:(m-nfnega+1)] <- "grey"}
+ j <- axis
+ if (j>nfposi) {j <- j-nfposi +m -nfnega}
+ col.w[j] <- "black"
+ barplot(x$eig, col = col.w)
+ scatterutil.sub(cha ="Eigenvalues", csub = 2.5, possub = "topright")
+ par(mar=rep(.1,4))
+ box()
+ par(mar=omar)
+
+ # 6
+ par(mar=c(4,4,2,1))
+ screeplot(x,main="Eigenvalues decomposition")
+ par(mar=rep(.1,4))
+ box()
+ return(invisible(match.call()))
+}
+
Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R 2009-11-30 11:50:35 UTC (rev 498)
+++ pkg/R/import.R 2009-11-30 18:01:12 UTC (rev 499)
@@ -244,7 +244,8 @@
loc.rep <- rep(names(nall),nall)
col.lab <- paste(loc.rep,unlist(loc.all,use.names=FALSE),sep=".")
- mat <- as.matrix(cbind.data.frame(temp))
+ ## mat <- as.matrix(cbind.data.frame(temp)) # ! does not work for huge numbers of alleles
+ mat <- matrix(unlist(temp), nrow=nrow(temp[[1]]))
mat <- mat/ploidy
colnames(mat) <- col.lab
rownames(mat) <- ind.names
More information about the adegenet-commits
mailing list