[adegenet-commits] r499 - pkg/R

Mon Nov 30 19:01:12 CET 2009

Author: jombart
Date: 2009-11-30 19:01:12 +0100 (Mon, 30 Nov 2009)
New Revision: 499

Added:
   pkg/R/dapc.R
Modified:
   pkg/R/import.R
Log:
Added a working function for dapc. Remaining: summary, plot, scatter.


Added: pkg/R/dapc.R
===================================================================

--- pkg/R/dapc.R	                        (rev 0)
+++ pkg/R/dapc.R	2009-11-30 18:01:12 UTC (rev 499)
@@ -0,0 +1,360 @@
+########
+## dapc
+########
+dapc <- function(x, pop=NULL, n.pca=NULL, n.da=NULL, scale=TRUE,
+                 scale.method=c("sigma", "binom"), truenames=TRUE, all.contrib=FALSE){
+
+    ## FIRST CHECKS
+    if(!inherits(x,"genind")) stop("x must be a genind or genpop xect.")
+    invisible(validObject(x))
+    if(is.null(pop)) {
+        pop <- pop(x)
+    }
+    if(is.null(pop(x))) stop("x does not include pre-defined populations, and `pop' is not provided")
+
+    if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
+    if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
+
+
+    ## SOME GENERAL VARIABLES
+    N <- nrow(x at tab)
+
+    ## PERFORM PCA ##
+    maxRank <- min(dim(x at tab))
+
+    X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
+                  missing = "mean", truenames = truenames)
+
+    pcaX <- dudi.pca(X, center = FALSE, scale = FALSE, scannf = FALSE, nf=maxRank)
+    ## select the number of retained PC for PCA
+    if(is.null(n.pca)){
+        cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
+        plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative genetic variance (%)", main="Genetic variance explained by PCA")
+        cat("Choose the number PCs to retain (>=1): ")
+        n.pca <- as.integer(readLines(n = 1))
+    }
+
+    ## keep relevant PCs - stored in XU
+    X.rank <- length(pcaX$eig)
+    n.pca <- min(X.rank, n.pca)
+    if(n.pca >= N) stop("number of retained PCA is greater than N")
+    if(n.pca > N/3) warning("number of retained PCA may be too large (> N /3)")
+
+    U <- pcaX$c1[, 1:n.pca, drop=FALSE] # principal axes
+    XU <- pcaX$li[, 1:n.pca, drop=FALSE] # principal components
+    XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
+    names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
+    names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
+
+
+    ## PERFORM DA ##
+    ldaX <- lda(XU, pop)
+    if(is.null(n.da)){
+        barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop))) )
+        cat("Choose the number LDs to retain (>=1): ")
+        n.da <- as.integer(readLines(n = 1))
+    }
+
+    predX <- predict(ldaX, dimen=n.da)
+
+
+    ## BUILD RESULT
+    res <- list()
+    res$tab <- XU
+    res$fac <- pop
+    res$var.gen <- XU.lambda
+    res$eig <- ldaX$svd^2
+    res$LD <- ldaX$scaling
+    res$pop.coord <- ldaX$means
+    res$ind.coord <-predX$x
+    res$prior <- ldaX$prior
+    res$posterior <- predX$posterior
+    res$assign <- predX$class
+
+    ## optional: get loadings of alleles
+    if(all.contrib){
+        res$all.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling)
+        res$all.contr <- t(apply(all.contr, 1, function(e) e*e / sum(e*e)))
+    }
+
+    res$call <- match.call()
+    class(res) <- "dapc"
+    return(res)
+} # end dapc
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+######################
+# Function print.dapc
+######################
+print.dapc <- function(x, ...){
+  cat("\t########################################\n")
+  cat("\t# spatial Principal Component Analysis #\n")
+  cat("\t########################################\n")
+  cat("class: ")
+  cat(class(x))
+  cat("\n$call: ")
+  print(x$call)
+  cat("\n$nfposi:", x$nfposi, "axis-components saved")
+  cat("\n$nfnega:", x$nfnega, "axis-components saved")
+
+  cat("\nPositive eigenvalues: ")
+  l0 <- sum(x$eig >= 0)
+  cat(signif(x$eig, 4)[1:(min(5, l0))])
+  if (l0 > 5)
+    cat(" ...\n")
+  else cat("\n")
+  cat("Negative eigenvalues: ")
+  l0 <- sum(x$eig <= 0)
+  cat(sort(signif(x$eig, 4))[1:(min(5, l0))])
+  if (l0 > 5)
+    cat(" ...\n")
+  else cat("\n")
+  cat('\n')
+  sumry <- array("", c(1, 4), list(1, c("vector", "length",
+                                        "mode", "content")))
+  sumry[1, ] <- c('$eig', length(x$eig), mode(x$eig), 'eigenvalues')
+  class(sumry) <- "table"
+  print(sumry)
+  cat("\n")
+  sumry <- array("", c(4, 4), list(1:4, c("data.frame", "nrow", "ncol", "content")))
+  sumry[1, ] <- c("$c1", nrow(x$c1), ncol(x$c1), "principal axes: scaled vectors of alleles loadings")
+  sumry[2, ] <- c("$li", nrow(x$li), ncol(x$li), "principal components: coordinates of entities ('scores')")
+  sumry[3, ] <- c("$ls", nrow(x$ls), ncol(x$ls), 'lag vector of principal components')
+  sumry[4, ] <- c("$as", nrow(x$as), ncol(x$as), 'pca axes onto dapc axes')
+
+  class(sumry) <- "table"
+  print(sumry)
+
+  cat("\n$xy: matrix of spatial coordinates")
+  cat("\n$lw: a list of spatial weights (class 'listw')")
+
+  cat("\n\nother elements: ")
+  if (length(names(x)) > 10)
+    cat(names(x)[11:(length(names(x)))], "\n")
+  else cat("NULL\n")
+}
+
+
+
+
+
+########################
+# Function summary.dapc
+########################
+summary.dapc <- function (object, ..., printres=TRUE) {
+  if (!inherits(object, "dapc"))stop("to be used with 'dapc' object")
+  if(!require(ade4,quietly=TRUE)) stop("the library ade4 is required; please install this package")
+  if(!require(spdep,quietly=TRUE)) stop("the library spdep is required; please install this package")
+
+  #util <- function(n) { ## no longer used
+  #  x <- "1"
+  #  for (i in 2:n) x[i] <- paste(x[i - 1], i, sep = "+")
+  #  return(x)
+  #}
+  norm.w <- function(X, w) {
+    f2 <- function(v) sum(v * v * w)/sum(w)
+    norm <- apply(X, 2, f2)
+    return(norm)
+  }
+
+  resfin <- list()
+
+  if(printres) {
+    cat("\nSpatial principal component analysis\n")
+    cat("\nCall: ")
+    print(object$call)
+  }
+
+  appel <- as.list(object$call)
+  ## compute original pca
+  # prepare data
+  obj <- eval(appel$obj)
+  if(is.null(appel$truenames)) truenames <- FALSE
+
+  f1 <- function(vec){
+    m <- mean(vec,na.rm=TRUE)
+    vec[is.na(vec)] <- m
+    return(vec)
+  }
+
+  if(is.genind(obj)) { X <- obj at tab }
+  if(is.genpop(obj)) { X <- makefreq(obj, quiet=TRUE)$tab }
+
+  X <- apply(X,2,f1)
+
+  if(truenames){
+    rownames(X) <- rownames(truenames(obj))
+    colnames(X) <- colnames(truenames(obj))
+  }
+
+  nfposi <- object$nfposi
+  nfnega <- object$nfnega
+
+  dudi <- dudi.pca(X, center=TRUE, scale=FALSE, scannf=FALSE, nf=nfposi+nfnega)
+  ## end of pca
+
+  lw <- object$lw
+
+  # I0, Imin, Imax
+  n <- nrow(X)
+  I0 <- -1/(n-1)
+  L <- listw2mat(lw)
+  eigL <- eigen(0.5*(L+t(L)))$values
+  Imin <- min(eigL)
+  Imax <- max(eigL)
+  Ival <- data.frame(I0=I0,Imin=Imin,Imax=Imax)
+  row.names(Ival) <- ""
+  if(printres) {
+    cat("\nConnection network statistics:\n")
+    print(Ival)
+  }
+
+  Istat <- c(I0,Imin,Imax)
+  names(Istat) <- c("I0","Imin","Imax")
+  resfin$Istat <- Istat
+
+
+  # les scores de l'analyse de base
+  nf <- dudi$nf
+  eig <- dudi$eig[1:nf]
+  cum <- cumsum(dudi$eig)[1:nf]
+  ratio <- cum/sum(dudi$eig)
+  w <- apply(dudi$l1,2,lag.listw,x=lw)
+  moran <- apply(w*as.matrix(dudi$l1)*dudi$lw,2,sum)
+  res <- data.frame(var=eig,cum=cum,ratio=ratio, moran=moran)
+  row.names(res) <- paste("Axis",1:nf)
+  if(printres) {
+    cat("\nScores from the centred PCA\n")
+    print(res)
+  }
+
+  resfin$pca <- res
+
+
+  # les scores de l'analyse spatiale
+  # on recalcule l'objet en gardant tous les axes
+  eig <- object$eig
+  nfposimax <- sum(eig > 0)
+  nfnegamax <- sum(eig < 0)
+
+  ms <- multispati(dudi=dudi, listw=lw, scannf=FALSE,
+                     nfposi=nfposimax, nfnega=nfnegamax)
+
+  ndim <- dudi$rank
+  nf <- nfposi + nfnega
+  agarder <- c(1:nfposi,if (nfnega>0) (ndim-nfnega+1):ndim)
+  varspa <- norm.w(ms$li,dudi$lw)
+  moran <- apply(as.matrix(ms$li)*as.matrix(ms$ls)*dudi$lw,2,sum)
+  res <- data.frame(eig=eig,var=varspa,moran=moran/varspa)
+  row.names(res) <- paste("Axis",1:length(eig))
+
+  if(printres) {
+    cat("\ndapc eigenvalues decomposition:\n")
+    print(res[agarder,])
+  }
+
+  resfin$dapc <- res
+
+  return(invisible(resfin))
+}
+
+
+
+#####################
+# Function plot.dapc
+#####################
+plot.dapc <- function (x, axis = 1, useLag=FALSE, ...){
+    if (!inherits(x, "dapc")) stop("Use only with 'dapc' objects.")
+
+    if(!require(ade4)) stop("ade4 package is required.")
+    if(!require(spdep)) stop("spdep package is required.")
+    if(axis>ncol(x$li)) stop("wrong axis required.")
+
+    opar <- par(no.readonly = TRUE)
+    on.exit(par(opar))
+    par(mar = rep(.1,4), mfrow=c(3,2))
+
+    n <- nrow(x$li)
+    xy <- x$xy
+
+    ## handle useLag argument
+    if(useLag){
+        z <- x$ls[,axis]
+    } else {
+        z <- x$li[,axis]
+    } # end if useLag
+    nfposi <- x$nfposi
+    nfnega <- x$nfnega
+    ## handle neig parameter - hide cn if nore than 100 links
+    nLinks <- sum(card(x$lw$neighbours))
+    if(nLinks < 500) {
+        neig <- nb2neig(x$lw$neighbours)
+    } else {
+        neig <- NULL
+    }
+
+    sub <- paste("Score",axis)
+    csub <- 2
+
+    # 1
+    if(n<30) clab <- 1 else clab <- 0
+    s.label(xy, clab=clab, include.ori=FALSE, addaxes=FALSE, neig=neig,
+            cneig=1, sub="Connection network", csub=2)
+
+    # 2
+    s.image(xy,z, include.ori=FALSE, grid=TRUE, kgrid=10, cgrid=1,
+            sub=sub, csub=csub, possub="bottomleft")
+    box()
+
+    # 3
+    if(n<30) {neig <- nb2neig(x$lw$neighbours)} else {neig <- NULL}
+    s.value(xy,z, include.ori=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+            neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+    # 4
+    s.value(xy,z, include.ori=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+            method="greylevel", neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+    # 5
+    omar <- par("mar")
+    par(mar = c(0.8, 2.8, 0.8, 0.8))
+    m <- length(x$eig)
+    col.w <- rep("white", m) # elles sont toutes blanches
+    col.w[1:nfposi] <- "grey"
+    if (nfnega>0) {col.w[m:(m-nfnega+1)] <- "grey"}
+    j <- axis
+    if (j>nfposi) {j <- j-nfposi +m -nfnega}
+    col.w[j] <- "black"
+    barplot(x$eig, col = col.w)
+    scatterutil.sub(cha ="Eigenvalues", csub = 2.5, possub = "topright")
+    par(mar=rep(.1,4))
+    box()
+    par(mar=omar)
+
+    # 6
+    par(mar=c(4,4,2,1))
+    screeplot(x,main="Eigenvalues decomposition")
+    par(mar=rep(.1,4))
+    box()
+    return(invisible(match.call()))
+}
+

Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2009-11-30 11:50:35 UTC (rev 498)
+++ pkg/R/import.R	2009-11-30 18:01:12 UTC (rev 499)
@@ -244,7 +244,8 @@
     loc.rep <- rep(names(nall),nall)
     col.lab <- paste(loc.rep,unlist(loc.all,use.names=FALSE),sep=".")
 
-    mat <- as.matrix(cbind.data.frame(temp))
+    ## mat <- as.matrix(cbind.data.frame(temp)) # ! does not work for huge numbers of alleles
+    mat <- matrix(unlist(temp), nrow=nrow(temp[[1]]))
     mat <- mat/ploidy
     colnames(mat) <- col.lab
     rownames(mat) <- ind.names