[adegenet-commits] r549 - in pkg: R data man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Feb 6 15:24:42 CET 2010
Author: jombart
Date: 2010-02-06 15:24:42 +0100 (Sat, 06 Feb 2010)
New Revision: 549
Modified:
pkg/R/dapc.R
pkg/data/eHGDP.RData
pkg/man/eHGDP.Rd
Log:
eHGDP almost done.
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2010-02-05 15:10:18 UTC (rev 548)
+++ pkg/R/dapc.R 2010-02-06 14:24:42 UTC (rev 549)
@@ -44,7 +44,7 @@
## get n.pca from the % of variance to conserve
if(!is.null(perc.pca)){
- n.pca <- min(which(cumVar > perc.pca))
+ n.pca <- min(which(cumVar >= perc.pca))
if(n.pca<1) n.pca <- 1
}
Modified: pkg/data/eHGDP.RData
===================================================================
(Binary files differ)
Modified: pkg/man/eHGDP.Rd
===================================================================
--- pkg/man/eHGDP.Rd 2010-02-05 15:10:18 UTC (rev 548)
+++ pkg/man/eHGDP.Rd 2010-02-06 14:24:42 UTC (rev 549)
@@ -12,30 +12,88 @@
}
\usage{data(eHGDP)}
\format{
- \code{eHGDP} is a genind object with 3 supplementary components:
+ \code{eHGDP} is a genind object with a data frame named \code{popInfo} as supplementary
+ component (\code{eHGDP at other$popInfo)}, which contains the following
+ variables:
\describe{
- \item{coun}{a factor giving the country of each individual (AF:
- Afric; FR: France).}
- \item{breed}{a factor giving the breed of each individual.}
- \item{spe}{is a factor giving the species of each individual
- (BT: Bos taurus; BI: Bos indicus).}
+ \item{Population}{a character vector indicating populations.}
+ \item{Region}{a character vector indicating the geographic region of each population.}
+ \item{Label}{a character vector indicating the correspondance
+ with population labels used in the genind object (i.e., as
+ output by \code{pop(eHGDP)}).}
+ \item{Latitude,Longitude}{geographic coordinates of the
+ populations, indicated as north and east degrees.}
}
}
\source{
-Data prepared by Katayoun Moazami-Goudarzi and Denis Lalo\"e (INRA,
-Jouy-en-Josas, France)
+Data prepared by Francois Balloux.
}
\references{
- Lalo\"e D., Jombart T., Dufour A.-B. and Moazami-Goudarzi K. (2007)
- Consensus genetic structuring and typological value of markers using
- Multiple Co-Inertia Analysis. \emph{Genetics Selection Evolution}.
- \bold{39}: 545--567.
+ Jombart, T., Devillard, S. and Balloux, F.
+Discriminant analysis of principal components: a new method for the analysis of
+genetically structured populations. Submitted to \emph{PLoS genetics}.
}
\examples{
+\dontrun{
+## LOAD DATA
data(eHGDP)
eHGDP
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc1 <- dapc(eHGDP, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes 2 minutes
+dapc1
+## SCREEPLOT OF EIGENVALUES
+barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues",
+col=c("red","green","blue", rep("grey", 1000)))
+
+## SCATTERPLOTS
+## ! note ! colors may be inverted with respect to the
+## original paper (signs of principal components are arbitrary)
+## axes 1-2
+s.label(dapc1$grp.coord[,1:2], clab=0, sub="Axes 1-2")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,1:2], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
+
+## axes 2-3
+s.label(dapc1$grp.coord[,2:3], clab=0, sub="Axes 2-3")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,2:3], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
+
+
+## MAP DAPC RESULTS
+if(require(maps)){
+
+xy <- cbind(eHGDP$other$popInfo$Longitude, eHGDP$other$popInfo$Latitude)
+
+par(mar=rep(.1,4))
+map(fill=TRUE, col="lightgrey")
+colorplot(xy, dapc1$grp.coord, cex=3, add=TRUE, trans=FALSE)
}
+
+
+
+## LOOK FOR LARGER CLUSTERS
+## to reproduce results of the reference paper, use :
+## grp <- find.clusters(hgdp, max.n=50, n.pca=200, scale=FALSE)
+## and then
+## plot(grp$Kstat, type="b", col="blue")
+
+grp <- find.clusters(hgdp, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+names(grp)
+
+
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc1 <- dapc(eHGDP, grp=grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around 2 minutes
+dapc1
+
+
+
+}
+}
\keyword{datasets}
More information about the adegenet-commits
mailing list