[adegenet-commits] r507 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Dec 4 13:13:07 CET 2009
Author: jombart
Date: 2009-12-04 13:13:07 +0100 (Fri, 04 Dec 2009)
New Revision: 507
Modified:
pkg/R/dapc.R
Log:
Generalised all functions. Have to test the stuff.
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2009-12-04 11:42:06 UTC (rev 506)
+++ pkg/R/dapc.R 2009-12-04 12:13:07 UTC (rev 507)
@@ -3,7 +3,6 @@
#############
find.clusters <- function (x, ...) UseMethod("find.clusters")
-
######################
## find.clusters.data.frame
######################
@@ -16,8 +15,8 @@
if(!require(stats)) stop("package stats is required")
stat <- match.arg(stat)
-
+
## SOME GENERAL VARIABLES ##
N <- nrow(x)
min.n.clust <- 2
@@ -27,7 +26,6 @@
maxRank <- min(dim(x))
pcaX <- dudi.pca(x, center = center, scale = scale, scannf = FALSE, nf=maxRank)
- X <- scalewt(x, center=center, scale=scale)
## select the number of retained PC for PCA
if(is.null(n.pca)){
@@ -150,42 +148,47 @@
+###################
+## find.clusters.matrix
+###################
+find.clusters.matrix <- function(x, ...){
+ return(find.clusters(as.data.frame(x), ...))
+}
+
+
+
+
+
########
## dapc
########
-dapc <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
- scale=TRUE, scale.method=c("sigma", "binom"), truenames=TRUE, all.contrib=FALSE){
+dapc <- function (x, ...) UseMethod("dapc")
+#################
+## dapc.data.frame
+#################
+dapc.data.frame <- function(x, fac, n.pca=NULL, n.da=NULL,
+ center=TRUE, scale=TRUE, var.contrib=FALSE){
+
## FIRST CHECKS
- if(!is.genind(x)) stop("x must be a genind object.")
-
- if(is.null(pop)) {
- pop.fac <- pop(x)
- } else {
- pop.fac <- pop
- }
-
- if(is.null(pop.fac)) stop("x does not include pre-defined populations, and `pop' is not provided")
-
if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
+ fac <- as.factor(fac)
## SOME GENERAL VARIABLES
- N <- nrow(x at tab)
+ N <- nrow(x)
## PERFORM PCA ##
- maxRank <- min(dim(x at tab))
+ maxRank <- min(dim(x))
- X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
- missing = "mean", truenames = truenames)
+ pcaX <- dudi.pca(x, center = center, scale = scale, scannf = FALSE, nf=maxRank)
- pcaX <- dudi.pca(X, center = FALSE, scale = FALSE, scannf = FALSE, nf=maxRank)
## select the number of retained PC for PCA
if(is.null(n.pca)){
cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
- plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative genetic variance (%)", main="Genetic variance explained by PCA")
+ plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA")
cat("Choose the number PCs to retain (>=1): ")
n.pca <- as.integer(readLines(n = 1))
}
@@ -204,9 +207,9 @@
## PERFORM DA ##
- ldaX <- lda(XU, pop.fac)
+ ldaX <- lda(XU, fac)
if(is.null(n.da)){
- barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop.fac))) )
+ barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(fac))) )
cat("Choose the number discriminant functions to retain (>=1): ")
n.da <- as.integer(readLines(n = 1))
}
@@ -219,77 +222,119 @@
res$n.pca <- n.pca
res$n.da <- n.da
res$tab <- XU
- res$pop <- pop.fac
- res$var.gen <- XU.lambda
+ res$fac <- fac
+ res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$disc.func <- ldaX$scaling[, 1:n.da, drop=FALSE]
res$ind.coord <-predX$x
- res$pop.coord <- apply(res$ind.coord, 2, tapply, pop.fac, mean)
+ res$grp.coord <- apply(res$ind.coord, 2, tapply, fac, mean)
res$prior <- ldaX$prior
res$posterior <- predX$posterior
res$assign <- predX$class
res$call <- match.call()
## optional: get loadings of alleles
- if(all.contrib){
- res$all.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling)
- res$all.contr <- t(apply(res$all.contr, 1, function(e) e*e / sum(e*e)))
+ if(var.contrib){
+ res$var.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling)
+ res$var.contr <- t(apply(res$var.contr, 1, function(e) e*e / sum(e*e)))
}
class(res) <- "dapc"
return(res)
-} # end dapc
+} # end dapc.data.frame
+#############
+## dapc.genind
+#############
+dapc.genind <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
+ scale=TRUE, scale.method=c("sigma", "binom"), truenames=TRUE, all.contrib=FALSE){
+
+ ## FIRST CHECKS
+ if(!is.genind(x)) stop("x must be a genind object.")
+
+ if(is.null(pop)) {
+ pop.fac <- pop(x)
+ } else {
+ pop.fac <- pop
+ }
+
+ if(is.null(pop.fac)) stop("x does not include pre-defined populations, and `pop' is not provided")
+ if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
+ if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
+
+
+ ## SOME GENERAL VARIABLES
+ N <- nrow(x at tab)
+
+ ## PERFORM PCA ##
+ maxRank <- min(dim(x at tab))
+
+ X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
+ missing = "mean", truenames = truenames)
+
+ ## CALL DATA.FRAME METHOD ##
+ res <- dapc(X, fac=pop.fac, n.pca=n.pca, n.da=n.da,
+ center=center, scale=scale, var.contrib=all.contrib)
+
+ res$call <- match.call()
+
+ return(res)
+} # end dapc.genind
+
+
+
+
+
+
######################
# Function print.dapc
######################
print.dapc <- function(x, ...){
- cat("\t#########################################\n")
- cat("\t# Discriminant Analysis of Principal Components #\n")
- cat("\t#########################################\n")
- cat("class: ")
- cat(class(x))
- cat("\n$call: ")
- print(x$call)
- cat("\n$n.pca:", x$n.pca, "first PCs of PCA used")
- cat("\n$n.da:", x$n.da, "discriminant functions saved")
- cat("\n$var.gen (proportion of conserved genetic variance):", round(x$var.gen,3))
+ cat("\t#########################################\n")
+ cat("\t# Discriminant Analysis of Principal Components #\n")
+ cat("\t#########################################\n")
+ cat("class: ")
+ cat(class(x))
+ cat("\n$call: ")
+ print(x$call)
+ cat("\n$n.pca:", x$n.pca, "first PCs of PCA used")
+ cat("\n$n.da:", x$n.da, "discriminant functions saved")
+ cat("\n$varn (proportion of conserved variance):", round(x$var.gen,3))
+ cat("\n\n$eig (eigenvalues): ")
+ l0 <- sum(x$eig >= 0)
+ cat(signif(x$eig, 4)[1:(min(5, l0))])
+ if (l0 > 5)
+ cat(" ...\n\n")
- cat("\n\n$eig (eigenvalues): ")
- l0 <- sum(x$eig >= 0)
- cat(signif(x$eig, 4)[1:(min(5, l0))])
- if (l0 > 5)
- cat(" ...\n\n")
+ ## vectors
+ sumry <- array("", c(4, 3), list(1:4, c("vector", "length", "content")))
+ sumry[1, ] <- c('$eig', length(x$eig), 'eigenvalues')
+ sumry[2, ] <- c('$grp', length(x$grp), 'prior group assignment')
+ sumry[3, ] <- c('$prior', length(x$prior), 'prior group probabilities')
+ sumry[4, ] <- c('$assign', length(x$assign), 'posterior group assignment')
+ class(sumry) <- "table"
+ print(sumry)
- ## vectors
- sumry <- array("", c(4, 3), list(1:4, c("vector", "length", "content")))
- sumry[1, ] <- c('$eig', length(x$eig), 'eigenvalues')
- sumry[2, ] <- c('$pop', length(x$pop), 'prior group assignment')
- sumry[3, ] <- c('$prior', length(x$prior), 'prior group probabilities')
- sumry[4, ] <- c('$assign', length(x$assign), 'posterior group assignment')
- class(sumry) <- "table"
- print(sumry)
+ ## data.frames
+ cat("\n")
+ sumry <- array("", c(5, 4), list(1:5, c("data.frame", "nrow", "ncol", "content")))
+ sumry[1, ] <- c("$tab", nrow(x$tab), ncol(x$tab), "retained PCs of PCA")
+ sumry[2, ] <- c("$disc.func", nrow(x$disc.func), ncol(x$disc.func), "discriminant functions")
+ sumry[3, ] <- c("$ind.coord", nrow(x$ind.coord), ncol(x$ind.coord), "coordinates of individuals")
+ sumry[4, ] <- c("$grp.coord", nrow(x$grp.coord), ncol(x$grp.coord), "coordinates of groups")
+ sumry[5, ] <- c("$posterior", nrow(x$posterior), ncol(x$posterior), "posterior membership probabilities")
+ class(sumry) <- "table"
+ print(sumry)
- ## data.frames
- cat("\n")
- sumry <- array("", c(5, 4), list(1:5, c("data.frame", "nrow", "ncol", "content")))
- sumry[1, ] <- c("$tab", nrow(x$tab), ncol(x$tab), "retained PCs of PCA")
- sumry[2, ] <- c("$disc.func", nrow(x$disc.func), ncol(x$disc.func), "discriminant functions")
- sumry[3, ] <- c("$ind.coord", nrow(x$ind.coord), ncol(x$ind.coord), "coordinates of individuals")
- sumry[4, ] <- c("$pop.coord", nrow(x$pop.coord), ncol(x$pop.coord), "coordinates of populations")
- sumry[5, ] <- c("$posterior", nrow(x$posterior), ncol(x$posterior), "posterior membership probabilities")
- class(sumry) <- "table"
- print(sumry)
-
- cat("\nother elements: ")
- if (length(names(x)) > 13)
- cat(names(x)[14:(length(names(x)))], "\n")
- else cat("NULL\n")
+ cat("\nother elements: ")
+ if (length(names(x)) > 13)
+ cat(names(x)[14:(length(names(x)))], "\n")
+ else cat("NULL\n")
}
@@ -308,15 +353,15 @@
## number of dimensions
res$n.dim <- ncol(x$disc.func)
- res$n.pop <- length(levels(x$pop))
+ res$n.pop <- length(levels(x$grp))
## assignment success
- temp <- as.character(x$pop)==as.character(x$assign)
+ temp <- as.character(x$grp)==as.character(x$assign)
res$assign.prop <- mean(temp)
- res$assign.per.pop <- tapply(temp, x$pop, mean)
+ res$assign.per.pop <- tapply(temp, x$grp, mean)
## group sizes
- res$prior.grp.size <- table(x$pop)
+ res$prior.grp.size <- table(x$grp)
res$post.grp.size <- table(x$assign)
return(res)
@@ -330,11 +375,11 @@
##############
## scatter.dapc
##############
-scatter.dapc <- function(x, xax=1, yax=2, col=rainbow(length(levels(x$pop))), posi="bottomleft", bg="grey", ratio=0.3, csub=1.2, ...){
+scatter.dapc <- function(x, xax=1, yax=2, col=rainbow(length(levels(x$grp))), posi="bottomleft", bg="grey", ratio=0.3, csub=1.2, ...){
if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
axes <- c(xax,yax)
par(bg=bg)
- s.class(x$ind.coord[,axes], fac=x$pop, col=col, ...)
+ s.class(x$ind.coord[,axes], fac=x$grp, col=col, ...)
add.scatter.eig(x$eig, ncol(x$disc.func), axes[1], axes[2], posi=posi, ratio=ratio, csub=csub)
return(invisible())
} # end scatter.dapc
@@ -353,12 +398,12 @@
if(!is.null(only.pop)){
only.pop <- as.character(only.pop)
- ori.pop <- as.character(x$pop)
- x$pop <- x$pop[only.pop==ori.pop]
+ ori.pop <- as.character(x$grp)
+ x$grp <- x$grp[only.pop==ori.pop]
x$assign <- x$assign[only.pop==ori.pop]
x$posterior <- x$posterior[only.pop==ori.pop, , drop=FALSE]
} else if(!is.null(subset)){
- x$pop <- x$pop[subset]
+ x$grp <- x$grp[subset]
x$assign <- x$assign[subset]
x$posterior <- x$posterior[subset, , drop=FALSE]
}
@@ -382,7 +427,7 @@
box()
newPop <- colnames(x$posterior)
- x.real.coord <- rev(match(x$pop, newPop))
+ x.real.coord <- rev(match(x$grp, newPop))
y.real.coord <- seq(.5, by=1, le=n.ind)
points(x.real.coord, y.real.coord, col="deepskyblue2", pch=pch)
More information about the adegenet-commits
mailing list