[adegenet-commits] r503 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 1 20:11:00 CET 2009
Author: jombart
Date: 2009-12-01 20:10:59 +0100 (Tue, 01 Dec 2009)
New Revision: 503
Modified:
pkg/R/dapc.R
Log:
find.clusters works! Allows for AIC, BIC, and WSS criteria.
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2009-12-01 17:16:31 UTC (rev 502)
+++ pkg/R/dapc.R 2009-12-01 19:10:59 UTC (rev 503)
@@ -1,8 +1,8 @@
#############
## find.clusters
#############
-find.clusters <- function(x, n.pca=NULL, choose.n.grp=TRUE,
- min.n.clust=2, max.n.clust=round(nrow(x at tab)/10), n.iter=1e6, n.start=1e3,
+find.clusters <- function(x, n.pca=NULL, n.clust=NULL, choose.n.grp=TRUE, stat=c("BIC", "AIC", "WSS"),
+ max.n.clust=round(nrow(x at tab)/10), n.iter=1e6, n.start=1e3,
scale=TRUE, scale.method=c("sigma", "binom"), truenames=TRUE){
## CHECKS ##
@@ -10,10 +10,13 @@
if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
if(!require(stats)) stop("package stats is required")
if(!is.genind(x)) stop("x must be a genind object.")
+ stat <- match.arg(stat)
+ if(choose.n.grp & is.null(n.clust)) stop("choose.n.grp is FALSE, but n.clust is not provided")
## SOME GENERAL VARIABLES ##
N <- nrow(x at tab)
+ min.n.clust <- 2
## PERFORM PCA ##
maxRank <- min(dim(x at tab))
@@ -41,38 +44,60 @@
## PERFORM K-MEANS
nbClust <- min.n.clust:max.n.clust
- best <- NULL
- stat <- numeric(0)
+ WSS <- numeric(0)
for(i in 1:length(nbClust)){
if(is.null(prior)) {ini <- nbClust[i]} else {ini <- prior}
temp <- kmeans(XU, centers=ini, iter.max=min(n.iter, 100), nstart=min(n.start, 1e3))
- stat[i] <- mean(temp$withinss)
- if(which.min(stat)==i) {best <- temp}
+ WSS[i] <- sum(temp$withinss)
}
- ## CHOOSE NUMBER OF GROUPS
- if(choose.n.grp){
+ ## DETERMINE THE NUMBER OF GROUPS
##TSS <- sum(pcaX$eig) * N
##betweenVar <- (1 - ((stat/(N-nbClust-1))/(TSS/(N-1)) )) *100
- WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
- reducWSS <- -diff(c(WSS.ori, stat))
- reducWSS <- reducWSS/max(reducWSS)
- plot(reducWSS, xlab="Number of clusters", ylab="Reduction in within SS", main="Reduction of within SS \nwith number of clusters", type="b", col="blue")
+ ##WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+ ##reducWSS <- -diff(c(WSS.ori, stat))
+ ##reducWSS <- reducWSS/max(reducWSS)
+
+ if(stat=="AIC"){
+ WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+ k <- nbClust
+ myStat <- N*log(c(WSS.ori,WSS)/N) + 2*c(1,nbClust)
+ myLab <- "AIC"
+ myTitle <- "Value of AIC \nversus number of clusters"
+
+ }
+ if(stat=="BIC"){
+ WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+ k <- nbClust
+ myStat <- N*log(c(WSS.ori,WSS)/N) + log(N) *c(1,nbClust)
+ myLab <- "BIC"
+ myTitle <- "Value of BIC \nversus number of clusters"
+ }
+ if(stat=="WSS"){
+ WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+ myStat <- c(WSS.ori, stat)
+ ## reducWSS <- -diff(c(WSS.ori, stat))
+ ## myStat <- reducWSS/max(reducWSS)
+ myLab <- "Within sum of squares"
+ myTitle <- "Value of within SS\nversus number of clusters"
+ }
+
+ if(choose.n.grp){
+ plot(c(1,nbClust), myStat, xlab="Number of clusters", ylab=myLab, main=myTitle, type="b", col="blue")
abline(h=0, lty=2, col="red")
cat("Choose the number of clusters (>=2: ")
n.clust <- as.integer(readLines(n = 1))
- } else {
- n.clust <- length(best$size)
}
+ ## get final groups
best <- kmeans(XU, centers=n.clust, iter.max=n.iter, nstart=n.start)
## MAKE RESULT ##
- names(stat) <- paste("K",nbClust, sep="=")
- res <- list(stat=stat, grp=factor(best$cluster), size=best$size)
+ names(myStat) <- paste("K",c(1,nbClust), sep="=")
+ res <- list(stat=myStat, grp=factor(best$cluster), size=best$size)
return(res)
} # end find.clusters
@@ -270,6 +295,18 @@
+
+############
+## assignplot
+############
+assignplot <- function(x, pop=NULL){
+
+} # end assignplot
+
+
+
+
+
###############
## randtest.dapc
###############
More information about the adegenet-commits
mailing list