[adegenet-commits] r909 - in pkg: R inst/doc inst/doc/Robjects inst/doc/figs man

Tue Jun 14 21:08:52 CEST 2011

Author: jombart
Date: 2011-06-14 21:08:49 +0200 (Tue, 14 Jun 2011)
New Revision: 909

Added:
   pkg/inst/doc/Robjects/dapcGL.RData
   pkg/inst/doc/Robjects/pca1.RData
   pkg/inst/doc/Robjects/x.RData
   pkg/inst/doc/figs/devilsign.png
   pkg/inst/doc/figs/glsim1.png
   pkg/inst/doc/figs/glsim2.png
   pkg/inst/doc/figs/loadingplotGL.png
   pkg/inst/doc/figs/pca1.pdf
Modified:
   pkg/R/colorplot.R
   pkg/R/glPlot.R
   pkg/inst/doc/adegenet-genomics.Rnw
   pkg/inst/doc/adegenet-genomics.pdf
   pkg/inst/doc/adegenet-genomics.tex
   pkg/man/colorplot.Rd
   pkg/man/glPca.Rd
Log:
Finished off first complete draft of genomics vignette.


Modified: pkg/R/colorplot.R
===================================================================

--- pkg/R/colorplot.R	2011-06-14 09:17:33 UTC (rev 908)
+++ pkg/R/colorplot.R	2011-06-14 19:08:49 UTC (rev 909)
@@ -71,5 +71,6 @@
         do.call(points,listArgs)
     }
 
-    return(invisible(match.call()))
+    ##return(invisible(match.call()))
+    return(invisible(col))
 } # end colorplot.default

Modified: pkg/R/glPlot.R
===================================================================
--- pkg/R/glPlot.R	2011-06-14 09:17:33 UTC (rev 908)
+++ pkg/R/glPlot.R	2011-06-14 19:08:49 UTC (rev 909)
@@ -9,22 +9,21 @@
     X <- X[,ncol(X):1]
     ylabpos <- pretty(1:nInd(x),5)
     if(is.null(col)) {
-        myCol <- colorRampPalette(c("royalblue3", "firebrick1"))(max(X)+1)
+        myCol <- colorRampPalette(c("royalblue3", "firebrick1"))(max(X,na.rm=TRUE)+1)
     } else {
         myCol <- col
     }
 
     ## draw the plot ##
     ## main plot
-    image(x=1:nLoc(x), y=1:nInd(x), z=X, xlab="SNP index", ylab="Individual index", yaxt="n",
-          col=myCol, ...)
+    image(x=1:nLoc(x), y=1:nInd(x), z=X, xlab="SNP index", ylab="Individual index", yaxt="n", col=myCol, ...)
 
     ## add y axis
     axis(side=2, at=nInd(x)-ylabpos+1, lab=ylabpos)
 
     ## add legend
     if(legend){
-        legend("bottomleft", fill=myCol, legend=0:max(X), horiz=TRUE, bg=bg, title="Number of 2nd allele")
+        legend(posi, fill=myCol, legend=0:max(X,na.rm=TRUE), horiz=TRUE, bg=bg, title="Number of 2nd allele")
     }
 
     return(invisible())

Added: pkg/inst/doc/Robjects/dapcGL.RData
===================================================================
(Binary files differ)


Property changes on: pkg/inst/doc/Robjects/dapcGL.RData
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/inst/doc/Robjects/pca1.RData
===================================================================
(Binary files differ)


Property changes on: pkg/inst/doc/Robjects/pca1.RData
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: pkg/inst/doc/Robjects/x.RData
===================================================================
(Binary files differ)


Property changes on: pkg/inst/doc/Robjects/x.RData
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: pkg/inst/doc/adegenet-genomics.Rnw
===================================================================
--- pkg/inst/doc/adegenet-genomics.Rnw	2011-06-14 09:17:33 UTC (rev 908)
+++ pkg/inst/doc/adegenet-genomics.Rnw	2011-06-14 19:08:49 UTC (rev 909)
@@ -45,8 +45,8 @@
   representation using allele frequencies.
   This vignette introduces these classes and illustrates how these objects can be handled and
   analyzed in R.
-  It also introduces more advanced features of an API in C language which may be useful to develop
-  new method based on these objects.
+  %% It also introduces more advanced features of an API in C language which may be useful to develop
+  %% new method based on these objects.
 \end{abstract}
 
 \newpage
@@ -554,55 +554,61 @@
 The file is first located using \texttt{system.file}, and then processed using \texttt{fasta2genlight}:
 <<>>=
 myPath <- system.file("files/usflu.fasta",package="adegenet")
-obj <- fasta2genlight(myPath, chunk=10)
-obj
+flu <- fasta2genlight(myPath, chunk=10)
+flu
 @
 
-\noindent \texttt{obj} is a \texttt{genlight} object containing SNPs of 80 isolates of seasonal
+\noindent \texttt{flu} is a \texttt{genlight} object containing SNPs of 80 isolates of seasonal
 influenza (H3N2) sampled within the US over the last two decades; sequences correspond to the
 hemagglutinin (HA) segment.
-Besides genotypes, \texttt{obj} contains the positions of the SNPs and the alleles at each retained loci.
+Besides genotypes, \texttt{flu} contains the positions of the SNPs and the alleles at each retained loci.
 Names of the loci are constructed as the combination of both:
 <<>>=
-head(position(obj), 20)
-head(alleles(obj), 20)
-head(locNames(obj), 20)
+head(position(flu), 20)
+head(alleles(flu), 20)
+head(locNames(flu), 20)
 @
 
 \noindent It is usually informative to assess the position of the polymorphic sites within the
 genome; this is very easily done in R, using \texttt{density} with an appropriate bandewidth:
 <<fig=TRUE>>=
-temp <- density(position(obj), bw=10)
+temp <- density(position(flu), bw=10)
 plot(temp, type="n", xlab="Position in the alignment", main="Location of the SNPs", xlim=c(0,1701))
 polygon(c(temp$x,rev(temp$x)), c(temp$y, rep(0,length(temp$x))), col=transp("blue",.3))
-points(position(obj), rep(0, nLoc(obj)), pch="|", col="blue")
+points(position(flu), rep(0, nLoc(flu)), pch="|", col="blue")
 @
 
-\noindent Note that retaining only biallelic sites may cause minor loss of information, as sites
+\noindent In this case, SNPs are distributed fairly homogeneously across the HA segment, with
+a few possible hotspots of polymorphism within positions 400---700.
+\\
+
+
+Note that retaining only biallelic sites may cause minor loss of information, as sites
 with more than 2 alleles are discarded from the data.
 It is however possible to ask \texttt{fasta2genlight} to keep track of the number of alleles for
 each site of the original alignment, by specifying:
 <<>>=
-obj <- fasta2genlight(myPath, chunk=10,saveNbAlleles=TRUE, quiet=TRUE)
-obj
+flu <- fasta2genlight(myPath, chunk=10,saveNbAlleles=TRUE, quiet=TRUE)
+flu
 @
 
-\noindent The output object \texttt{obj} now contains the number of alleles of each position, stored
+\noindent The output object \texttt{flu} now contains the number of alleles of each position, stored
 in the \texttt{other} slot:
 <<>>=
-head(other(obj)$nb.all.per.loc, 20)
-100*mean(unlist(other(obj))>1)
+head(other(flu)$nb.all.per.loc, 20)
+100*mean(unlist(other(flu))>1)
 @
-About \Sexpr{round(100*mean(unlist(other(obj))>1))}\% of the sites are polymorphic, which is fairly high.
+About \Sexpr{round(100*mean(unlist(other(flu))>1))}\% of the sites are polymorphic, which is fairly high.
 This is not entirely surprising, given that the HA segment of influenza is known for its high
 mutation rate.
 What is the nature of this polymorphism?
 <<fig=TRUE>>=
-temp <- table(unlist(other(obj)))
+temp <- table(unlist(other(flu)))
 barplot(temp, main="Distribution of the number \nof alleles per loci",
-        xlab="Number of alleles", ylab="Number of sites")
+        xlab="Number of alleles", ylab="Number of sites", col=heat.colors(4))
 @
-Most polymorphic loci are biallelic, but a few loci with 3 or 4 alleles were lost.
+
+\noindent Most polymorphic loci are biallelic, but a few loci with 3 or 4 alleles were lost.
 We can estimate the loss of information very simply:
 <<>>=
 temp <- temp[-1]
@@ -614,17 +620,11 @@
 This is probably a fairly exceptional situation due to the high mutation rate of the HA segment.
 
 
-%% % % % % % % % % % % % % %
-%% \subsubsection{Conversions within R}
-%% % % % % % % % % % % % % %
 
 
 
 
 
-
-
-
 %%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%
 \section{Data analysis using \texttt{genlight} objects}
@@ -633,15 +633,67 @@
 
 In the following, we illustrate some methods for the analysis of \texttt{genlight} objects, ranging
 from simple tools for diagnosing allele frequencies or missing data to recently developed multivariate approaches.
-Troughout these examples, we use \texttt{glSim} to simulate \texttt{genlight} objects.
+Some examples below are illustrated using toy datasets generated using the function \texttt{glSim}.
 This simple simulation tool allows for simulating large SNPs data with possibly contrasted
-structures between two groups of individuals. See \texttt{?glSim} for more details on this tool.
+structures between two groups of individuals or patterns of linkage disequilibrium (LD).
+See \texttt{?glSim} for more details on this tool.
 
 
-
 %%%%%%%%%%%%%%%%
-\subsection{Simple operations}
+\subsection{Basic analyses}
 %%%%%%%%%%%%%%%%
+
+
+% % % % % % % % % %
+\subsubsection{Plotting \texttt{genlight} objects}
+% % % % % % % % % %
+Basic features of the data may also be inferred by simply looking at the data.
+\texttt{genlight} objects can be plotted using \texttt{glPlot}, or simply \texttt{plot} (both names
+actually correspond to the same function).
+This function displays the data as images, representing numbers of second alleles using colours.
+For instance, we can have a feel for the amount and location of missing data in the influenza
+dataset (see previous section) fairly easily:
+<<>>=
+glPlot(flu, posi="topleft")
+@
+The white streches in the first 30 SNPs observed around individual 70 indicate missing data.
+There are only a few missing data, and they only concern a couple of individuals.
+\\
+
+In some simple cases, some biological structures might also be apparent in such plot.
+For instance, we can generate data with independent SNPs distributions for two groups:
+<<eval=FALSE>>=
+x <- glSim(100, 0, 100, ploidy=2)
+@
+<<eval=FALSE>>=
+plot(x)
+@
+\begin{center}
+  \includegraphics{figs/glsim1}
+\end{center}
+
+Strong patterns of LD between contiguous sites are also very easy to spot:
+<<eval=FALSE>>=
+x <- glSim(100, 100, ploidy=2, LD=TRUE, block.size=10)
+@
+<<eval=FALSE>>=
+plot(x)
+@
+\begin{center}
+  \includegraphics{figs/glsim2}
+\end{center}
+
+Of course, this is merely a very preliminary approach to the data.
+More detailed analysis can be achieved using both standard and \textit{ad hoc} procedures as
+detailed below.
+
+
+
+% % % % % % % % % %
+\subsubsection{\texttt{genlight}-optimized routines}
+% % % % % % % % % %
+
+
 Some simple operations such as computing allele frequencies or diagnosing missing values can be
 problematic when the data matrix cannot be represented in memory.
 \textit{adegenet} implements a few basic procedures which perform such basic tasks on
@@ -663,42 +715,311 @@
 \item \texttt{glDotProd}: computes the dot products between all pairs of individuals, with
   possible centring and scaling.
 \end{itemize}
-For instance, one can easily map missing data across loci as we have done for SNP positions in the
+For instance, one can easily derive the distributiong of allele frequencies using:
+<<fig=TRUE>>=
+myFreq <- glMean(flu)
+hist(myFreq, proba=TRUE, col="gold", xlab="Allele frequencies", main="Distribution of (second) allele frequencies")
+temp <- density(myFreq)
+lines(temp$x, temp$y*1.8,lwd=3)
+@
+
+\noindent In biallelic loci, one allele is always entirely redundant with the other, so it is generally
+sufficient to analyse a single allele per loci.
+However, the distribution of allele frequencies may be more interpretable by restoring its native symmetry:
+<<fig=TRUE>>=
+myFreq <- glMean(flu)
+myFreq <- c(myFreq, 1-myFreq)
+hist(myFreq, proba=TRUE, col="darkseagreen3", xlab="Allele frequencies", main="Distribution of allele frequencies", nclass=20)
+temp <- density(myFreq, bw=.05)
+lines(temp$x, temp$y*2,lwd=3)
+@
+
+\noindent While a large number of loci are nearly fixed (frequencies close to 0 or 1), there is an
+appreciable number of alleles with intermediate frequencies and therefore susceptible to contain
+interesting biological signal.
+More generally and perhaps more importantly, this figure may also cast light on a well-known social
+phenomenon occuring mainly in young people attending noisy kinds of conferences:
+\begin{center}
+  \includegraphics{figs/devilsign}
+\end{center}
+
+\noindent We can indeed wonder whether the gesture usually referred to as the '\textit{devil sign}'
+is not actually a reference to the usual shape of SNPs frequency distributions.  It is still
+unclear, however, how many geneticists do attend metal gigs, although recent observations suggest
+they would be more frequent in grindcore events than in classical heavy metal shows.
+\\
+
+Besides these considerations, we can also map missing data across loci as we have done for SNP positions in the
 US influenza dataset (see previous section) using \texttt{glNA} and \texttt{density}:
 <<>>=
-head(glNA(obj),20)
-temp <- density(glNA(obj), bw=10)
+head(glNA(flu),20)
+temp <- density(glNA(flu), bw=10)
 plot(temp, type="n", xlab="Position in the alignment", main="Location of the missing values (NAs)", xlim=c(0,1701))
 polygon(c(temp$x,rev(temp$x)), c(temp$y, rep(0,length(temp$x))), col=transp("blue",.3))
-points(glNA(obj), rep(0, nLoc(obj)), pch="|", col="blue")
+points(glNA(flu), rep(0, nLoc(flu)), pch="|", col="blue")
 @
 
 \noindent Here, the few missing values are all located at the beginning at the alignment, probably
 reflecting heterogeneity in DNA amplification during the sequencing process.
-In actually large datasets, such simple investigation can give crucial insights about the quality of
-the data and the existence of possible biases.
+In larger datasets, such simple investigation can give crucial insights about the quality of
+the data and the existence of possible sequencing biases.
+\\
 
 
 
 
+% % % % % % % % % %
+\subsubsection{Analysing data per block}
+% % % % % % % % % %
 
+Some operations such as computations of distances between individuals can also be useful,
+and have yet to be implemented for \texttt{genlight} objects.
+These operations are easy to carry out by converting data to alleles counts (using
+\texttt{as.matrix}), but this conversion itself can be problematic because of memory limitations.
+One easy workaround consists in parallelizing computations across blocks of loci.
+\texttt{seploc} is first used to create a list of smaller \texttt{genlight} objects, each of which
+can individually be converted to absolute allele frequencies using \texttt{as.matrix}.
+Then, computations are carried on the list of object, without ever having to convert the entire
+dataset, and results are finally reunited.
+\\
+
+Let us illustrate this procedure using 40 simulated individuals with 100,000 SNPs each:
+<<>>=
+x <- glSim(40, 1e5, LD=FALSE)
+x
+@
+\texttt{seploc} is used to create a list of smaller objects (here, 10 blocks of 10,000 SNPs):
+<<>>=
+x <- seploc(x, n.block=10)
+class(x)
+names(x)
+x[1:2]
+@
+\texttt{dist} is used within a \texttt{lapply} loop to compute pairwise distances between
+individuals for each block:
+<<>>=
+lD <- lapply(x, function(e) dist(as.matrix(e)))
+class(lD)
+names(lD)
+class(lD[[1]])
+@
+\texttt{lD} is a list of distances matrices (\texttt{dist} objects) between pairs of individuals.
+The general distance matrix is obtained by summing these:
+<<>>=
+D <- Reduce("+", lD)
+@
+And we could now carry on further analyses, such as a neighbor-joining tree using the \textit{ape} package:
+<<fig=TRUE>>=
+library(ape)
+plot(nj(D), type="fan")
+title("A simple NJ tree of simulated genlight data")
+@
+
+
+
+
+% % % % % % % % % %
+\subsubsection{What is the unit of observation?}
+% % % % % % % % % %
+Whenever ploidy varies across individuals, an issue arises as to what is defined as the \textit{unit of observation}.
+Technically speaking, the unit of observation is the entity on which the observation is made.
+When working with allelic data, it is not always clear what the unit of observation is.
+The unit of observation may be:
+\begin{itemize}
+  \item \textit{individuals}: in this case each individual is represented by a vector of allele frequencies
+  \item \textit{alleles}: in this case we consider that each individual represents a sample of
+    alleles, with a sample size equalling the ploidy for each locus
+\end{itemize}
+
+This distinction is most of the time overlooked when analysing genetic data.
+As a matter of fact, it does not matter when all individuals have the same ploidy.
+For instance, if we take the following data:
+<<>>=
+x <- new("genlight", list(a=c(0,0,1,1), b=c(1,1,0,0), c=c(1,1,1,1)))
+locNames(x) <- 1:4
+x
+as.matrix(x)
+@
+and assume that all individuals are haploid, then computing e.g. the allele frequencies is
+straightforward (they all equal $2/3$):
+<<>>=
+glMean(x)
+@
+Let us no consider a sightly different case:
+<<>>=
+x <- new("genlight", list(a=c(0,0,2,2), b=c(1,1,0,0), c=c(1,1,1,1)))
+locNames(x) <- 1:4
+x
+as.matrix(x)
+ploidy(x)
+@
+What are the allele frequencies in this case?
+Well, it depends on what we mean by '\textit{allele frequency}'.
+\\
+
+
+Is it the frequency of the alleles in the population?
+In this case, the unit of observation is the allele.
+We have a total of 4 samples for each loci, (since '\texttt{a}' is diploid, it
+represents actually two samples) and the frequencies are $1/2$, $1/2$, $3/4$, $3/4$.
+Note, however, that this assumes that alleles are randomly associated within individuals (pangamy).
+
+Or is it the frequency of the alleles within the individuals?
+In this case, the unit of observation is the individual, and the vector of allele frequencies represents the 'average individual'.
+We first need to convert each individual vector into relative frequencies (i.e., divide by their respective ploidy), and then compute the
+average frequency across individuals, which ends up with $2/3$ for each locus:
+<<>>=
+M <- as.matrix(x)/ ploidy(x)
+apply(M,2,mean)
+@
+
+The procedures designed for \texttt{genlight} objects seen above (\texttt{glMean}, \texttt{glNA},
+etc.) allow for this distinction to be made.
+The option \texttt{alleleAsUnit} is a logical indicating whether the observation unit is the allele
+(\texttt{TRUE}, default) or the individual (\texttt{FALSE}).
+For instance:
+<<>>=
+as.matrix(x)
+glMean(x, alleleAsUnit=TRUE)
+glMean(x, alleleAsUnit=FALSE)
+@
+
+
+
+
+
 %%%%%%%%%%%%%%%%
 \subsection{Principal Component Analysis (PCA)}
 %%%%%%%%%%%%%%%%
+Principal Component Analysis (PCA) is implemented for \texttt{genlight} objects by the function \texttt{glPca}.
+This function can accommodate any level of ploidy in the data (including varying ploidy across
+individuals). More importantly, it performs computations without ever processing more than a
+couple of genomes at a time, thereby minimizing memory requirements.
+It also uses compiled C code and possibly multicore ressources if available to speed up computations.
+We illustrate the method on the previously introduced influenza dataset (object \texttt{flu}):
+<<eval=FALSE>>=
+flu
+pca1 <- glPca(flu)
+@
+<<echo=FALSE>>=
+load("Robjects/pca1.RData")
+@
+\begin{center}
+  \includegraphics{figs/pca1}
+\end{center}
+When \texttt{nf} (number of retained factors) is not specified, the function displays the barplot of
+eigenvalues of the analysis and asks the user for a number of retained principal components.
+\texttt{glPca} returns a list with the class \texttt{glPca} containing the eigenvalues, principal
+components and loadings of the analysis:
+<<>>=
+pca1
+@
 
+In addition to usual graphics, \texttt{glPca} object can displayed using \texttt{scatter} (produces
+a scatterplot of the principal components (PCs)) and \texttt{loadingplot} (plots the allele contributions,
+i.e. squared loadings).
+The scatterplot is obtained by:
+<<fig=TRUE>>=
+scatter(pca1, posi="bottomright")
+title("PCA of the US influenza data\n axes 1-2")
+@
+The first PC suggests the existence of two clades in the data, while the second one shows groups of
+closely related isolates arranged along a cline of genetic differentiation.
+This structure is confirmed by a simple neighbour-joining (NJ) tree:
+<<>>=
+library(ape)
+tre <- nj(dist(as.matrix(flu)))
+tre
+plot(tre, typ="fan", cex=0.7)
+title("NJ tree of the US influenza data")
+@
 
+The correspondance between both analyses can be better assessed using colors based on PCs; this is
+achieved by \texttt{colorplot}:
+<<fig=TRUE>>=
+myCol <- colorplot(pca1$scores,pca1$scores, transp=TRUE, cex=4)
+abline(h=0,v=0, col="grey")
+add.scatter.eig(pca1$eig[1:40],2,1,2, posi="topright", inset=.05, ratio=.3)
+@
 
+<<fig=TRUE>>=
+plot(tre, typ="fan", show.tip=FALSE)
+tiplabels(pch=20, col=myCol, cex=4)
+title("NJ tree of the US influenza data")
+@
 
+\noindent As expected, both approaches give congruent results, but both are complementary: NJ is
+better at showing bunches of related isolates, but the cline of genetic differentiation is much
+clearer in PCA.
+
+
+
+
 %%%%%%%%%%%%%%%%
 \subsection{Discriminant Analysis of Principal Components (DAPC)}
 %%%%%%%%%%%%%%%%
+Discriminant analysis of Principal Components (DAPC) is implemented for \texttt{genlight} objects by
+an appropriate method for the \texttt{find.clusters} and \texttt{dapc} generics.
+To put it simply, you can run \texttt{find.clusters} and \texttt{dapc} on \texttt{genlight} objects and the appropriate
+functions will be used.
+As in \texttt{glPca}, these methods never require more than a couple of genomes to be translated into
+allele frequencies at a time, thereby minimizing RAM requirements.
+\\
 
+Below, we illustrate DAPC on a \texttt{genlight} including only 50 structured SNPs out of 100,000
+non-structured SNPs:
+<<eval=FALSE>>=
+x <- glSim(100, 1e5, 50)
+@
+<<echo=FALSE>>=
+load("Robjects/x.RData")
+load("Robjects/dapcGL.RData")
+@
+<<>>=
+x
+@
+<<eval=FALSE>>=
+dapc1 <- dapc(x, n.pca=20, n.da=1)
+@
+<<>>=
+dapc1
+@
 
+For the last 50 structured SNPs (located at the end of the alignment), the two groups of individuals have different (random) distribution of
+allele frequencies, while they share the same distributions in other loci.
+DAPC can still make some decent discrimination:
+<<fig=TRUE>>=
+scatter(dapc1,scree.da=FALSE, bg="white", posi.pca="topright", legen=TRUE, txt.leg=paste("group", 1:2), col=c("red","blue"))
+@
 
+\noindent While the composition plot confirms that groups are not entirely disentangled...
+<<fig=TRUE>>=
+compoplot(dapc1, col=c("red","blue"),lab="", txt.leg=paste("group", 1:2), ncol=2)
+@
 
+\noindent ... the loading plot identifies pretty well the most discriminating alleles:
+<<eval=FALSE>>=
+loadingplot(dapc1$var.contr, thres=4e-4)
+@
 
+\begin{center}
+  \includegraphics{figs/loadingplotGL}
+\end{center}
 
+\noindent And we can zoom in to the contributions of the last 100 SNPs to make sure that the tail indeed
+corresponds to the 50 last structured loci:
+<<fig=TRUE>>=
+loadingplot(tail(dapc1$var.contr[,1],100))
+@
 
+\noindent Here, we indeed identified the structured region of the genome fairly well.
+
+
+
+
+
+
+
 \begin{thebibliography}{9}
 
 \bibitem{tjart05}

Modified: pkg/inst/doc/adegenet-genomics.pdf
===================================================================
--- pkg/inst/doc/adegenet-genomics.pdf	2011-06-14 09:17:33 UTC (rev 908)
+++ pkg/inst/doc/adegenet-genomics.pdf	2011-06-14 19:08:49 UTC (rev 909)
@@ -76,205 +76,266 @@
 << /S /GoTo /D (subsection.4.1) >>
 endobj
 52 0 obj
-(Simple operations)
+(Basic analyses)
 endobj
 53 0 obj
-<< /S /GoTo /D (subsection.4.2) >>
+<< /S /GoTo /D (subsubsection.4.1.1) >>
 endobj
 56 0 obj
-(Principal Component Analysis \(PCA\))
+(Plotting genlight objects)
 endobj
 57 0 obj
-<< /S /GoTo /D (subsection.4.3) >>
+<< /S /GoTo /D (subsubsection.4.1.2) >>
 endobj
 60 0 obj
-(Discriminant Analysis of Principal Components \(DAPC\))
+(genlight-optimized routines)
 endobj
 61 0 obj
-<< /S /GoTo /D [62 0 R  /Fit ] >>
+<< /S /GoTo /D (subsubsection.4.1.3) >>
 endobj
-66 0 obj <<
-/Length 1080      
+64 0 obj
+(Analysing data per block)
+endobj
+65 0 obj
+<< /S /GoTo /D (subsubsection.4.1.4) >>
+endobj
+68 0 obj
+(What is the unit of observation?)
+endobj
+69 0 obj
+<< /S /GoTo /D (subsection.4.2) >>
+endobj
+72 0 obj
+(Principal Component Analysis \(PCA\))
+endobj
+73 0 obj
+<< /S /GoTo /D (subsection.4.3) >>
+endobj
+76 0 obj
+(Discriminant Analysis of Principal Components \(DAPC\))
+endobj
+77 0 obj
+<< /S /GoTo /D [78 0 R  /Fit ] >>
+endobj
+82 0 obj <<
+/Length 934       
 /Filter /FlateDecode
 >>
 stream
-xÚ…VMoÛ8½çWè(#’¢D»Å&h€]YßÒT[þhm+µäÙ_¿óEIvÜí)ÎgÞ{9‹VQÝßd¿yþ1»¹½³y¤Ke´Ë£Ù2ÒÖªÊû¨ÐNÙÊD³EôÜ':®alßà§ƒ±Á7«$µY¯˜ãºÝá,…qÚÀÏFƒ&:þ¦?²ùõ5/Žo#þ«äëìª¦	Ÿ)_”/¥R/šÄá™û¦ó³üSmœ*!ñÔhUiÏ^¢++Éeìg²Hƒ‰s†üL©œ.áé•¯4»ÍÖX*ŒoR?¦ÛCæÆÅí.1~Ø9ÐŽ¼ªLq¬²eÎqŽR!Âa
-JÇ~à8†2Ò±ÖèýŽ¢Û»<ÇEÁôäÊ—¥u J!ì`‚HJÀLæ!“k±ªI¬Tk£¼«`âT ºÇ4‰M»æ’ÍÒ'6a<âÒÑNà47y<¯©ZÚû) mæ‰©â,!Úù– ä!"í®ƒÈ¶!…•À/bC·ž2£i-;5Ú¿uc~Çs‘")©¶ÊkÏ(¯¸Ô®—÷R¹HR—g#A ã%PßT!œfi4lú"®sÄÄyZÈ€¤æÎ),@ £ ñlÐ´júŒ'[©Òë`ÿ,Ôâ‹òŠ“§û›è4tF;å—-)æš œäþÄ-–=ÒpÂ*‰†=þïDsq¢†#o_(WúsÀ‘Ë¢Eï€c8¯'¶R§=œøBçvÜQp“„…\¶,*4£\qZÕýønªG4?¡;Ò:\F>yn¬8É¦Y°aó%Ó…Ü¨
-	goT`v^ÙEK¦H[Ôóê:N˜¥QL˜.*¥¡gÓã5ãÄá¯èC+cýTO¬ç÷ñ¡OúAx¬O6ÞJø•¤|U†Ð }¼?pQ§Ð!åSáXi½çùøy«ƒ`q$ø”ZžÙøŠ¤Â aÂvw!&Ò`™š"ëøã_8·îÒ†ðÑsý…]‚ØpkäþRoƒ÷„\ô®µm³eŠñåòÐP©VÒ
-m#·ÙøA‘´_èôqè©Å\“×f¸‡–úæB:Òœ¥ ¾¿Ö‰è¤¬Ž®•×¬˜{ŠNM—
-Å0ô!éƒ9ÄY'X2x™~yVK:M]GÏy?ž6éÀåÐûq±¾hÂ[¾Sü¥´SëG§áÉ¿rwqs³ççƒÓÏòñöf`¯kÅú·ø‚Ñ®
-–hôÚ}à;$°l‚”ŽÜÓØOzW!Yâ'üñó¯™OM¥ãOüØNðZIøz%"„ý“P³	WÒTY¼«qñÆü¸ Î‘¯åQîF¥Ã¥Ä)µÆWôn¶-_[H‚=±É.ˆw-nˆŸœ$Ò`ð]¸XdTÑUÕO50;uöÅ¹½óQ-ÊüiÍUjI-ü³)|þüM=þœÝüyü“Å
+xÚ…VÉnÛ0½û+t”€ˆWQÇhhQ¤¾¥9(–¼4^[®‘~}g!-ÙqÛ-Rœ¾™÷8r‘Ì’"¹ÿy~®oµId)”´&O©µ¨¼Oœ´BW*7Écz³ÎdZÃX¾ÃÏÆßÌ²\E:kaŽëÍ
+g9ŒÃ~-šÈô;L¿~cóu5/öoügÙÓø UC@ÊÂ»ð”ºi3‹g®Û.˜ŸàÏ¥²¢à¹’¢’ž½$D:€+ØO‰kù©RXYÂÓ_IvÏ1UÏ!„ÛreÓûÍ*Sþ¸³¥œxQ)q´Ð¥á8÷û!–C9‚c®8Ž"D2•½?Pt}k†tŽé1Â—€R[(Šì @$¥Û"’IDr)V5ˆ•K©„·L¬p±Tw“Ø„±jÏÙ4 ŸØ„ñ
+—–v"§F™tRS¶´÷Š¶˜dªJ_XB´óœAÉcDÚG‘-#„Y(ºu„Œ¦uØ©Ñþ}×ãÛŸŠIÉ¥^jx@yÅ©îºpâ:äGD6YnMAe¤ÀxÔ·[‘A+Y-›¾×	
+â%³žV32 ©ÙS
+´4ž
+šÖCMŸð¤+Qzíµøb‹¼âäán”<‚†Nhã'8ÝBa.©€ìüØ¡Å´C˜U¢e¨ÎN”p$×Û;aKZpär…Õ"†WÀ1œ×[¹•N|¥swÜQp“„…\nXThFXqZõÙúñÝPh~@w6¤u¼Œ|òÜ6¬¸€¦mØ°ýQHnÔ‚‰³wJ°8Íì¬åZWÃJë ¨%âÚí0KÃ
+˜v•Ð3Óý5ãÄá/èC
+¥ýPO¬çñ¡Oú£ðXŸl¼ágòEBƒô.z_qR‡Ø!Ã
+¦Ä1ÓzÍó-ñ¶VÛP‹=•ÿX¥
+O]Ñ¿"©p‘°?a»;ij™+g@Æ2½ù‚s}¬{hCøŠèÙÇþÂ.Ql¸Õs®·£÷€\ô®CÕ–í’)Æ—ÓmK©Æ²²pnhFý%Àþ‹N‡ŽZÌ%y-Ž÷a»¡¾Ù„Ž4a)AQ?ÞmƒèBZ;ºVÁ¸fÀÜStjº”(†¡IÍ!Î<SÀ’ÂËô×³6¤ÓÜºô'zNºþ´IO.½ó³&¼ä8¬‚?—68öNÇ?&¿ÃÝÅÍÅšŸTüa“º¾õIªVŽ»± ááŠ2þ_z|þ Ò':
 endstream
 endobj
-62 0 obj <<
+78 0 obj <<
 /Type /Page
-/Contents 66 0 R
-/Resources 65 0 R
+/Contents 82 0 R
+/Resources 81 0 R
 /MediaBox [0 0 612 792]
-/Parent 77 0 R
-/Annots [ 63 0 R 64 0 R ]
+/Parent 93 0 R
+/Annots [ 79 0 R 80 0 R ]
 >> endobj
-63 0 obj <<
+79 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[0 1 0]
 /Rect [346.812 477.77 353.404 485.537]
 /A << /S /GoTo /D (cite.tjart05) >>
 >> endobj
-64 0 obj <<
+80 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[0 1 0]
 /Rect [444.422 477.77 451.014 485.537]
 /A << /S /GoTo /D (cite.np145) >>
 >> endobj
-67 0 obj <<
-/D [62 0 R /XYZ 132.768 705.06 null]
+83 0 obj <<
+/D [78 0 R /XYZ 132.768 705.06 null]
 >> endobj
-68 0 obj <<
-/D [62 0 R /XYZ 133.768 667.198 null]
+84 0 obj <<
+/D [78 0 R /XYZ 133.768 667.198 null]
 >> endobj
-65 0 obj <<
-/Font << /F34 69 0 R /F39 70 0 R /F20 71 0 R /F44 72 0 R /F49 73 0 R /F54 74 0 R /F56 75 0 R /F8 76 0 R >>
+81 0 obj <<
+/Font << /F34 85 0 R /F39 86 0 R /F20 87 0 R /F44 88 0 R /F49 89 0 R /F54 90 0 R /F56 91 0 R /F8 92 0 R >>
 /ProcSet [ /PDF /Text ]
 >> endobj
-97 0 obj <<
-/Length 959       
+117 0 obj <<
+/Length 1159      
 /Filter /FlateDecode
 >>
 stream
-xÚÕ˜Ms›<Çïùá€"^€ÞR·é¤ít<OüœÚ(!®;&öÒi¿}w%Á€Ác7Múr BìJ?íµ„Ë€¯Î¸ÿ}¾8;¿42ŠIe XÜBJ–˜40:a u°¸	Þ‡³M$Â»HŠ°)»V}\¼>hˆ»eàÿ½"‹i±Ì€!ƒ<ˆA°L$Î”ˆâtx…6 lvhmÅ’‡7ØºÇ«hVÎoç¼ÁðByÀgç%u^Ày™¡¥u^ãßº¬ÑM
-!¿¥¦´ÍOQl ü‚²ð+ê;•:I:=¿ì¯fJ±ÀihÏ
-ôúùe"{ƒp®
-ÍØa×8ê^s;!®Z CR0	²}éY+%Ãº!„Ä1_–¸.‰ë¢õ!ÝÚÙár]ºžeI·›Š6q´Ø,¡Ñ,;ö£9„!îgoÂˆxÛ;±Û±Vœ‰‡mz°á(ì%íµ³ö°©ï3^Íº(u:ô*‚Ä2ÙnÈøÖßøKëüÀH¥Ÿˆópä˜º	õê{ºŽ“Œ€¡º¥ÓÝ"ÓäNf6'Ú$ñu“NŽ÷$Î•ëïœ Îx–þÄÖ
-¦¨4ÓIÖ¾Ý“{YD]ëc	î`z’Üc¡9K¤Æ t‚þ_÷‹[Ÿ…ÝõÚFÏÈ½òÑ6øQ£åô!Sq•S3&Métì^ßûœX—Ýå6;vP©ë³Ïo79Ýå£4žè¿
-Þñ‘û(¢ü!,¥géTiYrÅæ.‚4üJ‹ârW{º›»qlfÿnlN’„c$ý©1³Õ¸P&\`èó–¬§ºCƒÙLï‚v;>a at 1‘uYêÖæ†ÊmÕ(AÀß)û_'†i%‘OúªÚFX"œ‰$pÓkB¸ÛT.%Ìß^½{³?³ô÷œäb$åVÏDäå7Ÿm!RtX<[UÎmì±äö ^Âª´i ™ªG@ÃÓÆÑƒÐìW"2a©N‡_ j¢±UH¾þîõ¸òŸtÄôŽê‰2Dh[Ûÿ¶2Dã%ðYvb)¢Œõ0ˆÕ/E®iŽÕ^%ºq‚*wí¹p ÷ÃŸÌ)OûáäSÔôP¶É|×ê¬ðm¾v¢šm*›â±à@ŽÚ+Ë>½¸Ë	þ÷6Új×ýkNß~³l‰Q¦é)Ü,'ñ“)JíW+’YÑ«Z¡‘P‡e,JìvßSÒ~‰oíøŽ¸?z¸‡`_PßÅ|6EWþøÚéü*U 0©”™ÚEô¿\œý C,°\
+xÚÕ˜ËrÛ6†÷~
+.ÉaÜAfÓq”8ã¶“Ñ4êt‘tAÓ´¬V5"ióô= iR¤*[µw!	¢ \¾óŸ@ƒy@ƒgÔ¾_j0I„Ô<˜ÝLbthe*˜ÝŸÃI±p	ÖEÛª¢ßg?ž1ˆÛyà¿|À“ %©æ¤AÌI™qC±(N¹
+¯`Ö[ŒbAÃhÝÃ+¯n2¼Ygð9æš¦<{ælgIÜ,ÜÍ2‘–YïUQÁ4	qð[l
+Û¼ŽbÍÃ? Uä~GÝI…2fï¤ç—Ý}ÃTÊ f°å¹q3üûù¥ÎœÃZ%c»}‚^á5µbá¢ÐŸA0"¸hþô&Š¥aU#Bä˜ÍØ—€}áþ€nåÆaá|Y¸'ó¿–+4â`³©á!AÓôÐ‡¢ôº¸E0ÍàÍÓkÇJ‚8‡;°ùAØs´µ³ô°ñÙ¼ê!tÉÀäã¡¯"n¬qìßø/¾ãÜN¾ÇBªâÜï9¤®Cu€úŽ_Ç&%šó¾wçwïL97C°ÒF_¶štîxÎ¹p†–cœš&O0]o‰ReÒæßw/ò¨
+`]Â€“G¹{Ì%Fˆ¾…sø˜Q.Â_«îfÁôYž[«WV=ƒé5ÏfàgUËã»Œé*9äÍ´4…ócÇðÓ½‰UQ[+7Ñ±…Šî||»Éð[6ãF½6x‡{î¢d€’ÑcX
+ÏÒy¥dÉåå:âIø#êr[yºåz¨Íôÿ«ÍQ’üIŸ5Fp6>Î¤g ½aÜRê©6iéh7ÃÃ%ai¥nmlX9S
+nÿŸ@ÇF%Å>ÒÜ“¾Zm"¨ÎH¸éênË•	ÓŸ¯>þ´»²ä´G¹è£\¹ñg$òþ/m!’·X<[UNö†X2›ˆçˆpUØ0PÕ#\ñ—ÕÑQhv+aH¢’þ	@ŽT"¶
+É–{\ø#¦˜Nª)C˜²µýÉÊ	‹Ãoô‘¥ˆÔv†ž^d·y›Ù
+æÞøë×m9Œ—¡T¾–˜ò±ŸÓ'Å~ÝåÙýS„XöÊ’1õJyôñCš°V; %A8ö <j9£¿Ÿ‘Žƒ[÷âjL¾ûŽtpN°s¤kÏÑq¹éXp…oß
+{Z±QÓÞPÜwSN1€ž0}Jõb~DšDï^P<Pn’ÊE'6ü[’uÉ¸Øºˆr½t9PIæ˜Lþ\.ŒíñT‘a˜zbþ}@%=ªßî\•f	,mí¥çü˜^Ö«1Û³¹)(¯1	(°¯‘R®4wå7¼ý°»j“¨ÓžÝ,%sPVcu²ìá¦ÛÆyrßØ€Ä,‚I¹²51êÆîº)Eì¯»R¬Üã/TQ¼,›\@‹
+}1ù.']K+=FSÝãÙ%‘o»Áhí®O€P‹eXÅ´²ö±O|cû·Ä}ÝÁÝûŸ]L'ct…¦G$\Ñ¿ÐkfJÉpr¤áÍÝm·óûÙÙ?´ÈÅ
 endstream
 endobj
-96 0 obj <<
+116 0 obj <<
 /Type /Page
-/Contents 97 0 R
-/Resources 95 0 R
+/Contents 117 0 R
+/Resources 115 0 R
 /MediaBox [0 0 612 792]
-/Parent 77 0 R
-/Annots [ 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R 86 0 R 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R 92 0 R 93 0 R 94 0 R ]
+/Parent 93 0 R
+/Annots [ 96 0 R 97 0 R 98 0 R 99 0 R 100 0 R 101 0 R 102 0 R 103 0 R 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R 109 0 R 110 0 R 111 0 R 112 0 R 113 0 R 114 0 R ]
 >> endobj
-80 0 obj <<
+96 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [132.772 634.321 212.801 643.188]
 /A << /S /GoTo /D (section.1) >>
 >> endobj
-81 0 obj <<
+97 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [132.772 610.471 237.867 621.27]
 /A << /S /GoTo /D (section.2) >>
 >> endobj
-82 0 obj <<
+98 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [147.716 598.516 321.094 609.355]
 /A << /S /GoTo /D (subsection.2.1) >>
 >> endobj
-83 0 obj <<
+99 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [147.716 586.281 342.813 597.4]
 /A << /S /GoTo /D (subsection.2.2) >>
 >> endobj
-84 0 obj <<
+100 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [132.772 564.364 335.948 575.442]
 /A << /S /GoTo /D (section.3) >>
 >> endobj
-85 0 obj <<
+101 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [147.716 552.687 239.511 563.337]
 /A << /S /GoTo /D (subsection.3.1) >>
 >> endobj
-86 0 obj <<
+102 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [147.716 540.732 258.37 551.572]
 /A << /S /GoTo /D (subsection.3.2) >>
 >> endobj
-87 0 obj <<
+103 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [147.716 530.71 246.455 539.427]
 /A << /S /GoTo /D (subsection.3.3) >>
 >> endobj
-88 0 obj <<
+104 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [170.63 516.543 278.226 527.661]
 /A << /S /GoTo /D (subsubsection.3.3.1) >>
 >> endobj
-89 0 obj <<
+105 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [170.63 504.867 329.543 515.706]
 /A << /S /GoTo /D (subsubsection.3.3.2) >>
 >> endobj
-90 0 obj <<
+106 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [170.63 492.912 351.48 503.751]
 /A << /S /GoTo /D (subsubsection.3.3.3) >>
 >> endobj
-91 0 obj <<
+107 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
 /Rect [132.772 470.715 331.794 481.793]
 /A << /S /GoTo /D (section.4) >>
 >> endobj
-92 0 obj <<
+108 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
-/Rect [147.716 459.039 250.38 469.878]
+/Rect [147.716 459.039 234.859 469.878]
 /A << /S /GoTo /D (subsection.4.1) >>
 >> endobj
-93 0 obj <<
+109 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
-/Rect [147.716 446.526 338.09 458.386]
+/Rect [170.63 446.805 319.281 457.923]
+/A << /S /GoTo /D (subsubsection.4.1.1) >>
+>> endobj
+110 0 obj <<
+/Type /Annot
+/Subtype /Link
+/Border[0 0 0]/H/I/C[1 0 0]
+/Rect [170.63 434.849 330.38 445.968]
+/A << /S /GoTo /D (subsubsection.4.1.2) >>
+>> endobj
+111 0 obj <<
+/Type /Annot
+/Subtype /Link
+/Border[0 0 0]/H/I/C[1 0 0]
+/Rect [170.63 423.173 314.061 434.012]
+/A << /S /GoTo /D (subsubsection.4.1.3) >>
+>> endobj
+112 0 obj <<
+/Type /Annot
+/Subtype /Link
+/Border[0 0 0]/H/I/C[1 0 0]
+/Rect [170.63 413.151 346.18 422.057]
+/A << /S /GoTo /D (subsubsection.4.1.4) >>
+>> endobj
+113 0 obj <<
+/Type /Annot
+/Subtype /Link
+/Border[0 0 0]/H/I/C[1 0 0]
+/Rect [147.716 398.705 338.09 410.565]
 /A << /S /GoTo /D (subsection.4.2) >>
 >> endobj
-94 0 obj <<
+114 0 obj <<
 /Type /Annot
 /Subtype /Link
 /Border[0 0 0]/H/I/C[1 0 0]
-/Rect [147.716 434.57 420.061 446.431]
+/Rect [147.716 386.75 420.061 398.61]
 /A << /S /GoTo /D (subsection.4.3) >>
 >> endobj
-98 0 obj <<
-/D [96 0 R /XYZ 132.768 705.06 null]
+118 0 obj <<
+/D [116 0 R /XYZ 132.768 705.06 null]
 >> endobj
-100 0 obj <<
-/D [96 0 R /XYZ 133.768 647.382 null]
+120 0 obj <<
+/D [116 0 R /XYZ 133.768 647.382 null]
 >> endobj
-95 0 obj <<
-/Font << /F63 99 0 R /F68 101 0 R /F8 76 0 R /F73 102 0 R >>
+115 0 obj <<
+/Font << /F63 119 0 R /F68 121 0 R /F8 92 0 R /F73 122 0 R >>
 /ProcSet [ /PDF /Text ]
 >> endobj
-105 0 obj <<
+125 0 obj <<
 /Length 2656      
 /Filter /FlateDecode
 >>
@@ -308,30 +369,30 @@
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/adegenet -r 909