[adegenet-commits] r550 - pkg/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Feb 8 13:06:47 CET 2010
Author: jombart
Date: 2010-02-08 13:06:46 +0100 (Mon, 08 Feb 2010)
New Revision: 550
Modified:
pkg/man/eHGDP.Rd
Log:
Done eHGDP doc.
Modified: pkg/man/eHGDP.Rd
===================================================================
--- pkg/man/eHGDP.Rd 2010-02-06 14:24:42 UTC (rev 549)
+++ pkg/man/eHGDP.Rd 2010-02-08 12:06:46 UTC (rev 550)
@@ -2,13 +2,13 @@
\name{eHGDP}
\alias{eHGDP}
\docType{data}
-\title{Microsatellites genotypes of 15 cattle breeds}
+\title{Extended HGDP-CEPH dataset}
\description{
-This data set gives the genotypes of 704 cattle individuals for 30
-microsatellites recommended by the FAO. The individuals are divided into
-two countries (Afric, France), two species (Bos taurus, Bos indicus) and
-15 breeds. Individuals were chosen in order to avoid pseudoreplication
-according to their exact genealogy.
+This dataset consists of 1350 individuals from native Human populations
+distributed worldwide typed at 678 microsatellite loci. The original
+HGDP-CEPH panel [1-3] has been extended by several native American
+populations [4]. This dataset was used to illustrate the Discriminant
+Analysis of Principal Components (DAPC, [5]).
}
\usage{data(eHGDP)}
\format{
@@ -26,10 +26,22 @@
}
}
\source{
-Data prepared by Francois Balloux.
+ Original panel by Human Genome Diversity Project (HGDP) and Centre
+ d'Etude du Polymorphisme Humain (CEPH). See reference [4] for Native
+ American populations.
+
+ This copy of the dataset was prepared by Francois Balloux (f.balloux at imperial.ac.uk).
}
\references{
- Jombart, T., Devillard, S. and Balloux, F.
+[1] Rosenberg NA, Pritchard JK, Weber JL, Cann HM, Kidd KK, et al. (2002) Genetic structure of human populations. \emph{Science} 298: 2381-2385.
+
+[2] Ramachandran S, Deshpande O, Roseman CC, Rosenberg NA, Feldman MW, et al. (2005) Support from the relationship of genetic and geographic distance in human populations for a serial founder effect originating in Africa. \emph{Proc Natl Acad Sci U S A} 102: 15942-15947.
+
+[3] Cann HM, de Toma C, Cazes L, Legrand MF, Morel V, et al. (2002) A human genome diversity cell line panel. \emph{Science} 296: 261-262.
+
+[4] Wang S, Lewis CM, Jakobsson M, Ramachandran S, Ray N, et al. (2007) Genetic Variation and Population Structure in Native Americans. \emph{PLoS Genetics} 3: e185.
+
+[5] Jombart, T., Devillard, S. and Balloux, F.
Discriminant analysis of principal components: a new method for the analysis of
genetically structured populations. Submitted to \emph{PLoS genetics}.
}
@@ -45,13 +57,18 @@
dapc1 <- dapc(eHGDP, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes 2 minutes
dapc1
+## (see ?dapc for details about the output)
+
+
+
## SCREEPLOT OF EIGENVALUES
-barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues",
-col=c("red","green","blue", rep("grey", 1000)))
+barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues", col=c("red","green","blue", rep("grey", 1000)))
+
+
## SCATTERPLOTS
-## ! note ! colors may be inverted with respect to the
-## original paper (signs of principal components are arbitrary)
+## (!) Note: colors may be inverted with respect to the
+## original paper (as signs of principal components are arbitrary)
## axes 1-2
s.label(dapc1$grp.coord[,1:2], clab=0, sub="Axes 1-2")
par(xpd=T)
@@ -65,35 +82,54 @@
add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
-## MAP DAPC RESULTS
+
+## MAP DAPC1 RESULTS
if(require(maps)){
xy <- cbind(eHGDP$other$popInfo$Longitude, eHGDP$other$popInfo$Latitude)
par(mar=rep(.1,4))
map(fill=TRUE, col="lightgrey")
-colorplot(xy, dapc1$grp.coord, cex=3, add=TRUE, trans=FALSE)
+colorplot(xy, -dapc1$grp.coord, cex=3, add=TRUE, trans=FALSE)
}
-## LOOK FOR LARGER CLUSTERS
+## LOOK FOR OTHER CLUSTERS
## to reproduce results of the reference paper, use :
## grp <- find.clusters(hgdp, max.n=50, n.pca=200, scale=FALSE)
## and then
## plot(grp$Kstat, type="b", col="blue")
-grp <- find.clusters(hgdp, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+grp <- find.clusters(eHGDP, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
names(grp)
+## (see ?find.clusters for details about the output)
+
+
## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
## to reproduce exactly analyses from the paper, use "n.pca=1000"
-dapc1 <- dapc(eHGDP, grp=grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around 2 minutes
-dapc1
+dapc2 <- dapc(eHGDP, pop=grp$grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around 2 minutes
+dapc2
+## PRODUCE SCATTERPLOT
+scatter(dapc2) # axes 1-2
+scatter(dapc2,2,3) # axes 2-3
+
+## MAP DAPC2 RESULTS
+if(require(maps)){
+xy <- cbind(eHGDP$other$popInfo$Longitude, eHGDP$other$popInfo$Latitude)
+
+myCoords <- apply(dapc2$ind.coord, 2, tapply, pop(eHGDP), mean)
+
+par(mar=rep(.1,4))
+map(fill=TRUE, col="lightgrey")
+colorplot(xy, myCoords, cex=3, add=TRUE, trans=FALSE)
}
+
}
+}
\keyword{datasets}
More information about the adegenet-commits
mailing list