[adegenet-commits] r549 - in pkg: R data man

Sat Feb 6 15:24:42 CET 2010

Author: jombart
Date: 2010-02-06 15:24:42 +0100 (Sat, 06 Feb 2010)
New Revision: 549

Modified:
   pkg/R/dapc.R
   pkg/data/eHGDP.RData
   pkg/man/eHGDP.Rd
Log:
eHGDP almost done.


Modified: pkg/R/dapc.R
===================================================================

--- pkg/R/dapc.R	2010-02-05 15:10:18 UTC (rev 548)
+++ pkg/R/dapc.R	2010-02-06 14:24:42 UTC (rev 549)
@@ -44,7 +44,7 @@
 
     ## get n.pca from the % of variance to conserve
     if(!is.null(perc.pca)){
-        n.pca <- min(which(cumVar > perc.pca))
+        n.pca <- min(which(cumVar >= perc.pca))
         if(n.pca<1) n.pca <- 1
     }
 

Modified: pkg/data/eHGDP.RData
===================================================================
(Binary files differ)

Modified: pkg/man/eHGDP.Rd
===================================================================
--- pkg/man/eHGDP.Rd	2010-02-05 15:10:18 UTC (rev 548)
+++ pkg/man/eHGDP.Rd	2010-02-06 14:24:42 UTC (rev 549)
@@ -12,30 +12,88 @@
 }
 \usage{data(eHGDP)}
 \format{
-    \code{eHGDP} is a genind object with 3 supplementary components:
+    \code{eHGDP} is a genind object with a data frame named \code{popInfo} as supplementary
+    component (\code{eHGDP at other$popInfo)}, which contains the following
+    variables:
     \describe{
-        \item{coun}{a factor giving the country of each individual (AF:
-	  Afric; FR: France).}
-        \item{breed}{a factor giving the breed of each individual.}
-        \item{spe}{is a factor giving the species of each individual
-	  (BT: Bos taurus; BI: Bos indicus).}
+        \item{Population}{a character vector indicating populations.}
+        \item{Region}{a character vector indicating the geographic region of each population.}
+        \item{Label}{a character vector indicating the correspondance
+	  with population labels used in the genind object (i.e., as
+	  output by \code{pop(eHGDP)}).}
+	\item{Latitude,Longitude}{geographic coordinates of the
+	  populations, indicated as north and east degrees.}
     }
 }
 \source{
-Data prepared by Katayoun Moazami-Goudarzi and Denis Lalo\"e (INRA,
-Jouy-en-Josas, France)
+Data prepared by Francois Balloux.
 }
 \references{
-  Lalo\"e D., Jombart T., Dufour A.-B. and Moazami-Goudarzi K. (2007)
-  Consensus genetic structuring and typological value of markers using
-  Multiple Co-Inertia Analysis. \emph{Genetics Selection Evolution}.
-  \bold{39}: 545--567.
+  Jombart, T., Devillard, S. and Balloux, F.
+Discriminant analysis of principal components: a new method for the analysis of
+genetically structured populations. Submitted to \emph{PLoS genetics}.
 }
 \examples{
+\dontrun{
+## LOAD DATA
 data(eHGDP)
 eHGDP
 
 
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc1 <- dapc(eHGDP, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes 2 minutes
+dapc1
 
+## SCREEPLOT OF EIGENVALUES
+barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues",
+col=c("red","green","blue", rep("grey", 1000)))
+
+## SCATTERPLOTS
+## ! note ! colors may be inverted with respect to the
+## original paper (signs of principal components are arbitrary)
+## axes 1-2
+s.label(dapc1$grp.coord[,1:2], clab=0, sub="Axes 1-2")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,1:2], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
+
+## axes 2-3
+s.label(dapc1$grp.coord[,2:3], clab=0, sub="Axes 2-3")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,2:3], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
+
+
+## MAP DAPC RESULTS
+if(require(maps)){
+
+xy <- cbind(eHGDP$other$popInfo$Longitude, eHGDP$other$popInfo$Latitude)
+
+par(mar=rep(.1,4))
+map(fill=TRUE, col="lightgrey")
+colorplot(xy, dapc1$grp.coord, cex=3, add=TRUE, trans=FALSE)
 }
+
+
+
+## LOOK FOR LARGER CLUSTERS
+## to reproduce results of the reference paper, use :
+## grp <- find.clusters(hgdp, max.n=50, n.pca=200, scale=FALSE)
+## and then
+## plot(grp$Kstat, type="b", col="blue")
+
+grp <- find.clusters(hgdp, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+names(grp)
+
+
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc1 <- dapc(eHGDP, grp=grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around 2 minutes
+dapc1
+
+
+
+}
+}
 \keyword{datasets}