[adegenet-commits] r503 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 1 20:11:00 CET 2009


Author: jombart
Date: 2009-12-01 20:10:59 +0100 (Tue, 01 Dec 2009)
New Revision: 503

Modified:
   pkg/R/dapc.R
Log:
find.clusters works! Allows for AIC, BIC, and WSS criteria.


Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R	2009-12-01 17:16:31 UTC (rev 502)
+++ pkg/R/dapc.R	2009-12-01 19:10:59 UTC (rev 503)
@@ -1,8 +1,8 @@
 #############
 ## find.clusters
 #############
-find.clusters <- function(x, n.pca=NULL, choose.n.grp=TRUE,
-                          min.n.clust=2, max.n.clust=round(nrow(x at tab)/10), n.iter=1e6, n.start=1e3,
+find.clusters <- function(x, n.pca=NULL, n.clust=NULL, choose.n.grp=TRUE, stat=c("BIC", "AIC", "WSS"),
+                          max.n.clust=round(nrow(x at tab)/10), n.iter=1e6, n.start=1e3,
                           scale=TRUE, scale.method=c("sigma", "binom"), truenames=TRUE){
 
     ## CHECKS ##
@@ -10,10 +10,13 @@
     if(!require(MASS, quiet=TRUE)) stop("MASS library is required.")
     if(!require(stats)) stop("package stats is required")
     if(!is.genind(x)) stop("x must be a genind object.")
+    stat <- match.arg(stat)
+    if(choose.n.grp & is.null(n.clust)) stop("choose.n.grp is FALSE, but n.clust is not provided")
 
 
     ## SOME GENERAL VARIABLES ##
     N <- nrow(x at tab)
+    min.n.clust <- 2
 
     ## PERFORM PCA ##
     maxRank <- min(dim(x at tab))
@@ -41,38 +44,60 @@
 
     ## PERFORM K-MEANS
     nbClust <- min.n.clust:max.n.clust
-    best <- NULL
-    stat <- numeric(0)
+    WSS <- numeric(0)
 
     for(i in 1:length(nbClust)){
         if(is.null(prior)) {ini <- nbClust[i]} else {ini <- prior}
         temp <- kmeans(XU, centers=ini, iter.max=min(n.iter, 100), nstart=min(n.start, 1e3))
-        stat[i] <- mean(temp$withinss)
-        if(which.min(stat)==i) {best <- temp}
+        WSS[i] <- sum(temp$withinss)
     }
 
 
-    ## CHOOSE NUMBER OF GROUPS
-    if(choose.n.grp){
+    ## DETERMINE THE NUMBER OF GROUPS
         ##TSS <- sum(pcaX$eig) * N
         ##betweenVar <- (1 - ((stat/(N-nbClust-1))/(TSS/(N-1)) )) *100
-        WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
-        reducWSS <- -diff(c(WSS.ori, stat))
-        reducWSS <- reducWSS/max(reducWSS)
-        plot(reducWSS, xlab="Number of clusters", ylab="Reduction in within SS", main="Reduction of within SS \nwith number of clusters", type="b", col="blue")
+        ##WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+        ##reducWSS <- -diff(c(WSS.ori, stat))
+        ##reducWSS <- reducWSS/max(reducWSS)
+
+        if(stat=="AIC"){
+            WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+            k <- nbClust
+            myStat <- N*log(c(WSS.ori,WSS)/N) + 2*c(1,nbClust)
+            myLab <- "AIC"
+            myTitle <- "Value of AIC \nversus number of clusters"
+
+        }
+        if(stat=="BIC"){
+            WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+            k <- nbClust
+            myStat <- N*log(c(WSS.ori,WSS)/N) + log(N) *c(1,nbClust)
+            myLab <- "BIC"
+            myTitle <- "Value of BIC \nversus number of clusters"
+        }
+        if(stat=="WSS"){
+            WSS.ori <- sum(apply(XU, 2, function(v) sum((v-mean(v))^2) ))
+            myStat <- c(WSS.ori, stat)
+            ##            reducWSS <- -diff(c(WSS.ori, stat))
+            ##            myStat <- reducWSS/max(reducWSS)
+            myLab <- "Within sum of squares"
+            myTitle <- "Value of within SS\nversus number of clusters"
+        }
+
+    if(choose.n.grp){
+        plot(c(1,nbClust), myStat, xlab="Number of clusters", ylab=myLab, main=myTitle, type="b", col="blue")
         abline(h=0, lty=2, col="red")
         cat("Choose the number of clusters (>=2: ")
         n.clust <- as.integer(readLines(n = 1))
-    } else {
-        n.clust <- length(best$size)
     }
 
+    ## get final groups
     best <-  kmeans(XU, centers=n.clust, iter.max=n.iter, nstart=n.start)
 
 
     ## MAKE RESULT ##
-    names(stat) <- paste("K",nbClust, sep="=")
-    res <- list(stat=stat, grp=factor(best$cluster), size=best$size)
+    names(myStat) <- paste("K",c(1,nbClust), sep="=")
+    res <- list(stat=myStat, grp=factor(best$cluster), size=best$size)
 
     return(res)
 } # end find.clusters
@@ -270,6 +295,18 @@
 
 
 
+
+############
+## assignplot
+############
+assignplot <- function(x, pop=NULL){
+
+} # end assignplot
+
+
+
+
+
 ###############
 ## randtest.dapc
 ###############



More information about the adegenet-commits mailing list