[adegenet-commits] r556 - pkg/man

Mon Feb 8 16:34:02 CET 2010

Author: jombart
Date: 2010-02-08 16:34:02 +0100 (Mon, 08 Feb 2010)
New Revision: 556

Modified:
   pkg/man/H3N2.Rd
Log:
Moving forward.


Modified: pkg/man/H3N2.Rd
===================================================================

--- pkg/man/H3N2.Rd	2010-02-08 15:30:20 UTC (rev 555)
+++ pkg/man/H3N2.Rd	2010-02-08 15:34:02 UTC (rev 556)
@@ -1,105 +1,120 @@
 \encoding{UTF-8}
-\name{microbov}
-\alias{microbov}
+\name{H3N2}
+\alias{H3N2}
 \docType{data}
-\title{Microsatellites genotypes of 15 cattle breeds}
+\title{}
 \description{
-This data set gives the genotypes of 704 cattle individuals for 30
-microsatellites recommended by the FAO. The individuals are divided into
-two countries (Afric, France), two species (Bos taurus, Bos indicus) and
-15 breeds. Individuals were chosen in order to avoid pseudoreplication
-according to their exact genealogy.
+This dataset consists of  strains of
+distributed worldwide typed at ... SNPs.
 }
-\usage{data(microbov)}
+\usage{data(H3N2)}
 \format{
-    \code{microbov} is a genind object with 3 supplementary components:
+    \code{H3N2} is a genind object with a data frame named \code{...} as supplementary
+    component (\code{H3N2 at other$popInfo)}, which contains the following
+    variables:
     \describe{
-        \item{coun}{a factor giving the country of each individual (AF:
-	  Afric; FR: France).}
-        \item{breed}{a factor giving the breed of each individual.}
-        \item{spe}{is a factor giving the species of each individual
-	  (BT: Bos taurus; BI: Bos indicus).}
+        \item{Population}{a character vector indicating populations.}
+        \item{Region}{a character vector indicating the geographic region of each population.}
+        \item{Label}{a character vector indicating the correspondance
+	  with population labels used in the genind object (i.e., as
+	  output by \code{pop(H3N2)}).}
+	\item{Latitude,Longitude}{geographic coordinates of the
+	  populations, indicated as north and east degrees.}
     }
 }
 \source{
-Data prepared by Katayoun Moazami-Goudarzi and Denis Lalo\"e (INRA,
-Jouy-en-Josas, France)
+  This dataset was prepared by Thibaut Jombart (t.jombart at imperia.ac.uk).
 }
 \references{
-  Lalo\"e D., Jombart T., Dufour A.-B. and Moazami-Goudarzi K. (2007)
-  Consensus genetic structuring and typological value of markers using
-  Multiple Co-Inertia Analysis. \emph{Genetics Selection Evolution}.
-  \bold{39}: 545--567.
+Jombart, T., Devillard, S. and Balloux, F.
+Discriminant analysis of principal components: a new method for the analysis of
+genetically structured populations. Submitted to \emph{PLoS genetics}.
 }
 \examples{
-data(microbov)
-microbov
-summary(microbov)
+\dontrun{
+## LOAD DATA
+data(H3N2)
+H3N2
 
-# make Y, a genpop object
-Y <- genind2genpop(microbov)
 
-# make allelic frequency table
-temp <- makefreq(Y,missing="mean")
-X <- temp$tab
-nsamp <- temp$nobs
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc1 <- dapc(H3N2, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes 2 minutes
+dapc1
 
-# perform 1 PCA per marker 
+## (see ?dapc for details about the output)
 
-if(require(ade4)){
-kX <- ktab.data.frame(data.frame(X),Y at loc.nall)
 
-kpca <- list()
-for(i in 1:30) {kpca[[i]] <- dudi.pca(kX[[i]],scannf=FALSE,nf=2,center=TRUE,scale=FALSE)}
-}
 
-sel <- sample(1:30,4)
-col = rep('red',15)
-col[c(2,10)] = 'darkred'
-col[c(4,12,14)] = 'deepskyblue4'
-col[c(8,15)] = 'darkblue'
+## SCREEPLOT OF EIGENVALUES
+barplot(dapc1$eig, main="H3N2 - DAPC eigenvalues", col=c("red","green","blue", rep("grey", 1000)))
 
-# display %PCA
-par(mfrow=c(2,2))
-for(i in sel) {
-s.multinom(kpca[[i]]$c1,kX[[i]],n.sample=nsamp[,i],coulrow=col,sub=Y at loc.names[i])
-add.scatter.eig(kpca[[i]]$eig,3,xax=1,yax=2,posi="top")
-}
 
-# perform a Multiple Coinertia Analysis
-kXcent <- kX
-for(i in 1:30) kXcent[[i]] <- as.data.frame(scalewt(kX[[i]],center=TRUE,scale=FALSE))
-mcoa1 <- mcoa(kXcent,scannf=FALSE,nf=3, option="uniform")
 
-# coordinated %PCA
-mcoa.axes <- split(mcoa1$axis,Y at loc.fac)
-mcoa.coord <- split(mcoa1$Tli,mcoa1$TL[,1])
-var.coord <- lapply(mcoa.coord,function(e) apply(e,2,var))
+## SCATTERPLOTS
+## (!) Note: colors may be inverted with respect to the
+## original paper (as signs of principal components are arbitrary)
+## axes 1-2
+s.label(dapc1$grp.coord[,1:2], clab=0, sub="Axes 1-2")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,1:2], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
 
-par(mfrow=c(2,2))
-for(i in sel) {
-s.multinom(mcoa.axes[[i]][,1:2],kX[[i]],n.sample=nsamp[,i],coulrow=col,sub=Y at loc.names[i])
-add.scatter.eig(var.coord[[i]],2,xax=1,yax=2,posi="top")
+## axes 2-3
+s.label(dapc1$grp.coord[,2:3], clab=0, sub="Axes 2-3")
+par(xpd=T)
+colorplot(dapc1$grp.coord[,2:3], dapc1$grp.coord, cex=3, add=TRUE)
+add.scatter.eig(dapc1$eig,10,1,2, posi="bottomright", ratio=.3, csub=1.25)
+
+
+
+## MAP DAPC1 RESULTS
+if(require(maps)){
+
+xy <- cbind(H3N2$other$popInfo$Longitude, H3N2$other$popInfo$Latitude)
+
+par(mar=rep(.1,4))
+map(fill=TRUE, col="lightgrey")
+colorplot(xy, -dapc1$grp.coord, cex=3, add=TRUE, trans=FALSE)
 }
 
-# reference typology
-par(mfrow=c(1,1))
-s.label(mcoa1$SynVar,lab=microbov at pop.names,sub="Reference typology",csub=1.5)
-add.scatter.eig(mcoa1$pseudoeig,nf=3,xax=1,yax=2,posi="top")
 
-# typologial values
-tv <- mcoa1$cov2
-tv <- apply(tv,2,function(c) c/sum(c))*100
-rownames(tv) <- Y at loc.names
-tv <- tv[order(Y at loc.names),]
 
-par(mfrow=c(3,1),mar=c(5,3,3,4),las=3)
-for(i in 1:3){
-barplot(round(tv[,i],3),ylim=c(0,12),yaxt="n",main=paste("Typological value -
-structure",i))
-axis(side=2,at=seq(0,12,by=2),labels=paste(seq(0,12,by=2),"\%"),cex=3)
-abline(h=seq(0,12,by=2),col="grey",lty=2)
+## LOOK FOR OTHER CLUSTERS
+## to reproduce results of the reference paper, use :
+## grp <- find.clusters(hgdp, max.n=50, n.pca=200, scale=FALSE)
+## and then
+## plot(grp$Kstat, type="b", col="blue")
+
+grp <- find.clusters(H3N2, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+names(grp)
+
+## (see ?find.clusters for details about the output)
+
+
+
+## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
+## to reproduce exactly analyses from the paper, use "n.pca=1000"
+dapc2 <- dapc(H3N2, pop=grp$grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around 2 minutes
+dapc2
+
+
+## PRODUCE SCATTERPLOT
+scatter(dapc2) # axes 1-2
+scatter(dapc2,2,3) # axes 2-3
+
+
+## MAP DAPC2 RESULTS
+if(require(maps)){
+xy <- cbind(H3N2$other$popInfo$Longitude, H3N2$other$popInfo$Latitude)
+
+myCoords <- apply(dapc2$ind.coord, 2, tapply, pop(H3N2), mean)
+
+par(mar=rep(.1,4))
+map(fill=TRUE, col="lightgrey")
+colorplot(xy, myCoords, cex=3, add=TRUE, trans=FALSE)
 }
 
 }
+}
 \keyword{datasets}