[adegenet-commits] r912 - in pkg: R inst/doc man

Wed Jun 15 17:05:05 CEST 2011

Author: jombart
Date: 2011-06-15 17:05:04 +0200 (Wed, 15 Jun 2011)
New Revision: 912

Added:
   pkg/man/inbreeding.Rd
   pkg/man/inbreedingBalloux.old.Rd
Removed:
   pkg/man/inbreeding.Rd
   pkg/man/inbreeding.ml.Rd
Modified:
   pkg/R/inbreeding.R
   pkg/inst/doc/adegenet-basics.Rnw
Log:
A few changes on inbreeding.ml, which now is the only inbreeding function.


Modified: pkg/R/inbreeding.R
===================================================================

--- pkg/R/inbreeding.R	2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/R/inbreeding.R	2011-06-15 15:05:04 UTC (rev 912)
@@ -1,82 +1,82 @@
-#############
-## inbreeding
-#############
-inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, ...){
-    ## CHECKS ##
-    if(!is.genind(x)) stop("x is not a valid genind object")
-    checkType(x)
-    res.type <- match.arg(res.type)
+## #############
+## ## inbreeding
+## #############
+## inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, ...){
+##     ## CHECKS ##
+##     if(!is.genind(x)) stop("x is not a valid genind object")
+##     checkType(x)
+##     res.type <- match.arg(res.type)
 
-    if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+##     if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
 
-    if(!is.null(pop)) pop(x) <- pop
-    if(is.null(x at pop) && is.null(pop)) {
-        pop(x) <- factor(rep(1, nrow(x at tab)))
-    }
+##     if(!is.null(pop)) pop(x) <- pop
+##     if(is.null(x at pop) && is.null(pop)) {
+##         pop(x) <- factor(rep(1, nrow(x at tab)))
+##     }
 
 
-    ## COMPUTATIONS ##
+##     ## COMPUTATIONS ##
 
-    ## get allele frequencies and \sum p_i^2 by pop and loc ##
-    tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
-    sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
+##     ## get allele frequencies and \sum p_i^2 by pop and loc ##
+##     tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+##     sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
 
-    ## function to check a 1-locus genotype for homozigosity
-    ## returns 1 if homoz, 0 otherwise
-    ## !!! NOTE : reverse the values returned by f1 to obtain a strange thing !!!
-    f1 <- function(gen){
-        if(any(is.na(gen))) return(NA)
-        if(any(round(gen, 10)==1)) return(1)
-        return(0)
-    }
+##     ## function to check a 1-locus genotype for homozigosity
+##     ## returns 1 if homoz, 0 otherwise
+##     ## !!! NOTE : reverse the values returned by f1 to obtain a strange thing !!!
+##     f1 <- function(gen){
+##         if(any(is.na(gen))) return(NA)
+##         if(any(round(gen, 10)==1)) return(1)
+##         return(0)
+##     }
 
-    ## get the table of binary hetero/homo data
-    if(truenames) {
-        X <- truenames(x)$tab
-    } else
-    X <- x$tab
+##     ## get the table of binary hetero/homo data
+##     if(truenames) {
+##         X <- truenames(x)$tab
+##     } else
+##     X <- x$tab
 
-    homotab <- t(apply(X, 1, tapply, x at loc.fac, f1))
+##     homotab <- t(apply(X, 1, tapply, x at loc.fac, f1))
 
 
-    ## get pi2 for the appropriate pop
-    if(truenames){
-    popx <- pop(x)
-    } else {
-        popx <- x$pop
-    }
+##     ## get pi2 for the appropriate pop
+##     if(truenames){
+##     popx <- pop(x)
+##     } else {
+##         popx <- x$pop
+##     }
 
-    popx <- as.character(popx)
-    tabpi2 <- sumpi2[popx, , drop=FALSE]
+##     popx <- as.character(popx)
+##     tabpi2 <- sumpi2[popx, , drop=FALSE]
 
 
-    ## COMPUTE FINAL RESULT ##
-    num <- homotab - tabpi2
-    ## denom <- tabpi2 * (1 - tabpi2) # does not actually compute a weighted mean
-    denom <- 1 - tabpi2
-    res <- num / denom
-    ## return values per locus ##
-    if(res.type=="byloc") return(res)
+##     ## COMPUTE FINAL RESULT ##
+##     num <- homotab - tabpi2
+##     ## denom <- tabpi2 * (1 - tabpi2) # does not actually compute a weighted mean
+##     denom <- 1 - tabpi2
+##     res <- num / denom
+##     ## return values per locus ##
+##     if(res.type=="byloc") return(res)
 
-    ## return mean weighted by effective nb of alleles ##
-    wtab <- 1/tabpi2
-    wtab[is.na(res)] <- NA
-    wtab <- t(apply(wtab, 1, function(e) return(e/sum(e,na.rm=TRUE))))
-    res <- wtab * res
+##     ## return mean weighted by effective nb of alleles ##
+##     wtab <- 1/tabpi2
+##     wtab[is.na(res)] <- NA
+##     wtab <- t(apply(wtab, 1, function(e) return(e/sum(e,na.rm=TRUE))))
+##     res <- wtab * res
 
-    res <- apply(res, 1, sum, na.rm=TRUE)
-    if(plot){
-        par(bg="grey")
-        nPop <- length(unique(popx))
-        myCol <- rainbow(nPop)[as.integer(pop(x))]
-        if(min(res)>0) ylim <- c(0, 1.1*max(res))
-        if(max(res)<0) ylim <- c(min(res), 0+abs(min(res))*0.1)
-        plot(res, col=myCol, type="h", ylab="Inbreeding", xlab="Individuals", ...)
-    }
+##     res <- apply(res, 1, sum, na.rm=TRUE)
+##     if(plot){
+##         par(bg="grey")
+##         nPop <- length(unique(popx))
+##         myCol <- rainbow(nPop)[as.integer(pop(x))]
+##         if(min(res)>0) ylim <- c(0, 1.1*max(res))
+##         if(max(res)<0) ylim <- c(min(res), 0+abs(min(res))*0.1)
+##         plot(res, col=myCol, type="h", ylab="Inbreeding", xlab="Individuals", ...)
+##     }
 
-    return(res)
+##     return(res)
 
-} # end inbreeding
+## } # end inbreeding
 
 
 
@@ -95,7 +95,8 @@
     checkType(x)
     res.type <- match.arg(res.type)
 
-    if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+    ## if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+    PLO <- ploidy(x)
 
     if(!is.null(pop)) pop(x) <- pop
     if(is.null(x at pop) && is.null(pop)) {
@@ -105,7 +106,9 @@
       ## COMPUTATIONS ##
 
     ## get allele frequencies and \sum p_i^2 by pop and loc ##
-    tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+    ## (generalized to any ploidy) ##
+    ## tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+    tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^ PLO
     sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
 
     ## function to check a 1-locus genotype for homozigosity

Modified: pkg/inst/doc/adegenet-basics.Rnw
===================================================================
--- pkg/inst/doc/adegenet-basics.Rnw	2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/inst/doc/adegenet-basics.Rnw	2011-06-15 15:05:04 UTC (rev 912)
@@ -482,7 +482,96 @@
 
 
 
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Handling presence/absence data}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Adegenet was primarly suited to handle codominant, multiallelic markers like microsatellites.
+However, dominant binary markers, like AFLP, can be used as well.
+In such a case, only presence/absence of alleles can be deduced accurately from
+the genotypes.
+This has several consequences, like the unability to compute allele frequencies.
+Hence, some functionalities in adegenet won't be available for
+dominant markers.
+
+From version 1.2-3 of adegenet, the distinction between both types of markers is made by the slot
+'type' of genind or genpop objects, which equals "codom" for
+codominant markers, and "PA" for presence/absence data.
+In the latter case, the 'tab' slot of a genind object no longer contains allele
+frequencies, but only presence/absence of alleles in a genotype.
+Similarly, the 'tab' slot of a genpop object not longer contains
+counts of alleles in the populations; instead, it contains the number
+of genotypes in each population possessing at least one copy of the concerned alleles.
+Moreover, in the case of presence/absence, the slots 'loc.nall', 'loc.fac', and 'all.names'
+become useless, and are thus all set to NULL.
+\\
+
+
+Objects of type 'PA' are otherwise handled like usual (type 'codom')
+objects.
+Operations that are not available for PA type will issue an appropriate error message.
+
+Here is an example using a toy dataset 'AFLP.txt' that can be downloaded
+from the adegenet website, section 'Documentation':
+<<aflpread>>=
+dat <- read.table("http://adegenet.r-forge.r-project.org/files/AFLP.txt",header=TRUE)
+ dat
+@
+\noindent The function \texttt{df2genind} is used to obtain a genind object:
+<<>>=
+obj <- genind(dat, ploidy=1, type="PA")
+obj
+truenames(obj)
+@
+
+One can see that for instance, the summary of this object is more simple (no numbers of alleles per locus, no heterozygosity):
+<<>>=
+pop(obj) <- rep(c('a','b'),4:3)
+summary(obj)
+@
+
+\noindent But we can still perform basic manipulation, like converting
+our object into a genpop:
+<<>>=
+obj2 <- genind2genpop(obj)
+obj2
+obj2 at tab
+@
+
+\noindent To continue with the toy example, we can proceed to a simple PCA.
+NAs are first replaced:
+<<>>=
+objNoNa <- na.replace(obj,met=0)
+objNoNa at tab
+@
+
+\noindent Now the PCA is performed:
+<<pcaaflp,fig=TRUE>>=
+library(ade4)
+pca1 <- dudi.pca(objNoNa,scannf=FALSE,scale=FALSE)
+scatter(pca1)
+@
+
+\noindent More generally, multivariate analyses from ade4, the sPCA (\texttt{spca}), the
+global and local tests (\texttt{global.rtest}, \texttt{local.rtest}), or
+the Monmonier's algorithm (\texttt{monmonier}) will work just fine
+with presence/absence data.
+However, it is clear that the usual Euclidean distance (used in PCA
+and sPCA), as well as many other distances, is not as accurate to measure genetic dissimilarity using
+presence/absence data as it is when using allele frequencies.
+The reason for this is that in presence/absence data, a part of the
+information is simply hidden.
+For instance, two individuals possessing the same allele will be
+considered at the same distance, whether they possess one or more
+copies of the allele.
+This might be especially problematic in organisms having a high degree
+of ploidy.
+
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{SNPs data}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 In adegenet, SNP data can be handled in two different ways.
@@ -723,7 +812,14 @@
 
 
 
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Basics of data analysis}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Manipulating data}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Data manipulation is meant to be easy in \textit{adegenet} (if it is
@@ -856,48 +952,55 @@
 \subsection{Measuring and testing population structure (a.k.a F statistics)}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Population structure is traditionally measured and tested using F statistics, in particular Fst.
-\emph{adegenet} proposes different tools in this respect: general F statistics (\texttt{fstat}), a test of overall
-population structure (\texttt{gstat.randtest}), and pairwise $Fst$ between all pairs of populations in a
-dataset (\texttt{pairwise.fst}).
-The first two are wrappers for functions implemented in the \emph{hierfstat} package; pairwise Fst
-is implemented in \emph{adegenet}.
+Since version 2.13.0 of R, the package \emph{hierfstat}, which implemented most F statistics and
+related tests, has been removed from CRAN for maintenance issues.
+As a consequence, \emph{adegenet} has lost a few functionalities, namely general F statistics
+(function \texttt{fstat}), a test of overall population structure (\texttt{gstat.randtest}).
+\\
 
-We illustrate their use using the dataset of microsatellite of cats from Nancy:
-<<>>=
-library(hierfstat)
-data(nancycats)
-fstat(nancycats)
-@
-This table provides the three F statistics $Fst$ (pop/total), $Fit$ (Ind/total), and $Fis$ (ind/pop).
-These are overall measures which take into account all genotypes and all loci.
 
-Is the structure between populations significant?
-This question can be addressed using the G-statistic test \cite{tj511}; it is implemented for \texttt{genind} objects and produces a \texttt{randtest} object (package ade4).
-<<fig=TRUE>>=
-library(ade4)
-toto <- gstat.randtest(nancycats,nsim=99)
-toto
-plot(toto)
-@
+%% The first two are wrappers for functions implemented in the \emph{hierfstat} package; pairwise Fst
+%% is implemented in \emph{adegenet}.
 
-\noindent Yes, it is (the observed value is indicated on the right, while histograms correspond to
-the permuted values).
-Note that \emph{hierfstat} allows for more ellaborated tests, in particular when different levels of
-hierarchical clustering are available.
-Such tests are better done directly in \emph{hierfstat}; for this, \texttt{genind} objects can be
-converted to the adequat format using \texttt{genind2hierfstat}.
-For instance:
-<<>>=
-toto <- genind2hierfstat(nancycats)
-head(toto)
-varcomp.glob(toto$pop,toto[,-1])
-@
-F statistics are provided in \$F; for instance, here, $F_{st}$ is $0.083$.
+%% We illustrate their use using the dataset of microsatellite of cats from Nancy:
+%% <<>>=
+%% library(hierfstat)
+%% data(nancycats)
+%% fstat(nancycats)
+%% @
+%% This table provides the three F statistics $Fst$ (pop/total), $Fit$ (Ind/total), and $Fis$ (ind/pop).
+%% These are overall measures which take into account all genotypes and all loci.
 
+%% Is the structure between populations significant?
+%% This question can be addressed using the G-statistic test \cite{tj511}; it is implemented for \texttt{genind} objects and produces a \texttt{randtest} object (package ade4).
+%% <<fig=TRUE>>=
+%% library(ade4)
+%% toto <- gstat.randtest(nancycats,nsim=99)
+%% toto
+%% plot(toto)
+%% @
 
-~\\
-Lastly, pairwise $Fst$ is frequently used as a measure of distance between populations.
-The function \texttt{pairwise.fst} computes Nei's estimator \cite{tj814} of pairwise $Fst$, computed as:
+%% \noindent Yes, it is (the observed value is indicated on the right, while histograms correspond to
+%% the permuted values).
+%% Note that \emph{hierfstat} allows for more ellaborated tests, in particular when different levels of
+%% hierarchical clustering are available.
+%% Such tests are better done directly in \emph{hierfstat}; for this, \texttt{genind} objects can be
+%% converted to the adequat format using \texttt{genind2hierfstat}.
+%% For instance:
+%% <<>>=
+%% toto <- genind2hierfstat(nancycats)
+%% head(toto)
+%% varcomp.glob(toto$pop,toto[,-1])
+%% @
+%% F statistics are provided in \$F; for instance, here, $F_{st}$ is $0.083$.
+
+
+%% ~\\
+
+
+However, it is still possible to compute pairwise $Fst$ using \textit{adegenet}.
+Pairwise $Fst$ is frequently used as a measure of distance between populations.
+The function \texttt{pairwise.fst} computes Nei's estimator \cite{tj814} of pairwise $Fst$, defined as:
 $$
 Fst(A,B) = \frac{H_t - (n_AH_s(A) + n_BH_s(B))/(n_A + n_B)}{Ht}
 $$
@@ -910,6 +1013,7 @@
 illustrate this on a subset of individuals of \texttt{nancycats} (computations for the whole dataset
 would take a few tens of seconds):
 <<>>=
+data(nancycats)
 matFst <- pairwise.fst(nancycats[1:50, treatOther=FALSE])
 matFst
 @
@@ -956,9 +1060,52 @@
 
 
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Estimating inbreeding}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Inbreeding refers to an excess of homozygosity in a given individual due to the mating of
+genetically related parents.
+This excess of homozygosity is due to the fact that there are non-negligible chances of inheriting
+two identical alleles from a recent common ancestor.
+Inbreeding can be associated to a loss of fitness leading to "\textit{inbreeding depression}".
+Typically, loss of fitness is caused by recessive deleterious alleles which have usually low
+frequency in the population, but for which inbred individuals are more likely to be homozygotes.
+\\
 
 
+The inbreeding coefficient $F$ is defined as the probability that at a given locus, two identical
+alleles have been inherited from a common ancestor.
+In the absence of inbreeding, the probability of being homozygote at one loci simply is (for diploid
+individuals) simply $\sum_i p_i^2$ where $i$ indexes the alleles and $p_i$ is the frequency of
+allele $i$.
+This can be generalized incorporating $F$ as:
+$$
+p(\mbox{homozygote}) = F + (1-F) \sum_ip_i^2
+$$
+and even more generally, for any ploidy $\pi$:
+$$
+p(\mbox{homozygote}) = F + (1-F) \sum_ip_i^{\pi}
+$$
+This therefore allows for computing the likelihood of a given state (homozygote/heterozygote) in a
+given genotype (log-likelihood are summed across loci for more than one marker).
+\\
+
+This estimation is achieved by \texttt{inbreeding}.
+Depending on the value of the argument \texttt{res.type}, the function returns a sample from the
+likelihood function (\texttt{res.type='sample'}) or the likelihood function itself, as a R function (\texttt{res.type='function'}).
+While likelihood function are quickly obtained and easy to display graphically, sampling from the
+distributions is required to compute summary statistics of the distributions.
+
+
+
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Multivariate analysis}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Performing a Principal Component Analysis on \texttt{genind} objects}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 The tables contained in \texttt{genind} objects can be submitted to a Principal Component Analysis (PCA) to seek a typology of individuals.
@@ -1312,94 +1459,10 @@
 
 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Handling presence/absence data}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-Adegenet was primarly suited to handle codominant, multiallelic markers like microsatellites.
-However, dominant binary markers, like AFLP, can be used as well.
-In such a case, only presence/absence of alleles can be deduced accurately from
-the genotypes.
-This has several consequences, like the unability to compute allele frequencies.
-Hence, some functionalities in adegenet won't be available for
-dominant markers.
 
-From version 1.2-3 of adegenet, the distinction between both types of markers is made by the slot
-'type' of genind or genpop objects, which equals "codom" for
-codominant markers, and "PA" for presence/absence data.
-In the latter case, the 'tab' slot of a genind object no longer contains allele
-frequencies, but only presence/absence of alleles in a genotype.
-Similarly, the 'tab' slot of a genpop object not longer contains
-counts of alleles in the populations; instead, it contains the number
-of genotypes in each population possessing at least one copy of the concerned alleles.
-Moreover, in the case of presence/absence, the slots 'loc.nall', 'loc.fac', and 'all.names'
-become useless, and are thus all set to NULL.
-\\
 
 
-Objects of type 'PA' are otherwise handled like usual (type 'codom')
-objects.
-Operations that are not available for PA type will issue an appropriate error message.
 
-Here is an example using a toy dataset 'AFLP.txt' that can be downloaded
-from the adegenet website, section 'Documentation':
-<<aflpread>>=
-dat <- read.table("http://adegenet.r-forge.r-project.org/files/AFLP.txt",header=TRUE)
-dat
-@
-\noindent The function \texttt{df2genind} is used to obtain a genind object:
-<<>>=
-obj <- genind(dat, ploidy=1, type="PA")
-obj
-truenames(obj)
-@
-
-One can see that for instance, the summary of this object is more simple (no numbers of alleles per locus, no heterozygosity):
-<<>>=
-pop(obj) <- rep(c('a','b'),4:3)
-summary(obj)
-@
-
-\noindent But we can still perform basic manipulation, like converting
-our object into a genpop:
-<<>>=
-obj2 <- genind2genpop(obj)
-obj2
-obj2 at tab
-@
-
-\noindent To continue with the toy example, we can proceed to a simple PCA.
-NAs are first replaced:
-<<>>=
-objNoNa <- na.replace(obj,met=0)
-objNoNa at tab
-@
-
-\noindent Now the PCA is performed:
-<<pcaaflp,fig=TRUE>>=
-library(ade4)
-pca1 <- dudi.pca(objNoNa,scannf=FALSE,scale=FALSE)
-scatter(pca1)
-@
-
-\noindent More generally, multivariate analyses from ade4, the sPCA (\texttt{spca}), the
-global and local tests (\texttt{global.rtest}, \texttt{local.rtest}), or
-the Monmonier's algorithm (\texttt{monmonier}) will work just fine
-with presence/absence data.
-However, it is clear that the usual Euclidean distance (used in PCA
-and sPCA), as well as many other distances, is not as accurate to measure genetic dissimilarity using
-presence/absence data as it is when using allele frequencies.
-The reason for this is that in presence/absence data, a part of the
-information is simply hidden.
-For instance, two individuals possessing the same allele will be
-considered at the same distance, whether they possess one or more
-copies of the allele.
-This might be especially problematic in organisms having a high degree
-of ploidy.
-
-
-
-
-
 \begin{thebibliography}{9}
 
 \bibitem{tjart19}

Deleted: pkg/man/inbreeding.Rd
===================================================================
--- pkg/man/inbreeding.Rd	2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/man/inbreeding.Rd	2011-06-15 15:05:04 UTC (rev 912)
@@ -1,78 +0,0 @@
-\encoding{UTF-8}
-\name{Inbreeding}
-\alias{inbreeding}
-\title{Inbreeding coefficient for diploid genotypes}
-\description{
-  WARNING: this function is under development. Please contact the author
-  (\email{t.jombart at imperial.ac.uk}) before using it.
-  
-  The function \code{inbreeding} computes Balloux's inbreeding
-  coefficient for each individual of a \linkS4class{genind}
-  objects. Results can be averaged over loci or detailed per locus. By
-  default, \code{inbreeding} also produces a graphical output of the results.
-}
-\usage{
-inbreeding(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, \ldots)
-}
-\arguments{
-  \item{x}{an object of class \linkS4class{genind}.}
-  \item{pop}{a factor giving the 'population' of each individual. If NULL,
-    pop is seeked from \code{pop(x)}. Note that the term population refers in
-    fact to any grouping of individuals'.}
-  \item{truenames}{a logical indicating whether true names should be
-    used (TRUE, default) instead of generic labels (FALSE); used if
-    res.type is "matrix".}
- \item{res.type}{a character string matching "mean" or "byloc",
-   specifying whether results should be averaged over loci ("mean") or
-   detailed by locus ("byloc").}
- \item{plot}{a logical indicating whether a graphical
-   output should be produced (TRUE, default), or not (FALSE).}
- \item{\ldots}{other arguments to be passed to \code{plot}.}
-}
-\value{
-  A vector (if res.type is "mean"), or a matrix (if res.type is "byloc")
-  of inbreeding coefficient values.
-}
-\seealso{
-  \code{\link{inbreeding.ml}}: a maximum-likelihood estimation of
-  inbreeding.
-  
-  \code{\link{Hs}}%, \code{\link[hierfstat]{varcomp.glob}},
-%  \code{\link{gstat.randtest}}
-}
-\references{
-  Brown AR, Hosken DJ, Balloux F, et al. 2009 Genetic variation,
-  inbreeding and chemical exposure - combined effects in wildlife and
-  critical considerations for ecotoxicology. Philosophical Transactions
-  of the Royal Society B, London 364: 3377 - 3390
-}
-\details{
-  Let \eqn{p_i} refer to the allele frequencies in a population. Let
-  \eqn{h} be an variable which equates 1 if the individual is
-  homozygote, and 0 otherwise. For one locus, Balloux's inbreeding coefficient is
-  defined as:
-
-  \eqn{  \frac{h - \sum_i p_i^2}{ \sum_i p_i^2 (1- \sum_i p_i^2)} }
-
-  For multi-locus genotypes, inbreeding values are averaged over the
-  loci.
-
-  Important note: to estimate F, the probability of being homozygote at
-  a locus an individual has to be inferred from a single
-  observation. This can results in inaccuracy of the estimation of F,
-  and possible negative values. To circumvent such issues, use the
-  maximum-likelihood estimation of F (\code{\link{inbreeding.ml}}).
-}
-\author{
-  Implementation: Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
-  Formula by Francois Balloux \email{f.balloux at imperial.ac.uk}
-}
-\examples{
-## cat colonies of Nancy
-data(nancycats)
-inbreeding(nancycats)
-
-## French/African cattle breeds
-data(microbov)
-inbreeding(microbov)
-}

Copied: pkg/man/inbreeding.Rd (from rev 907, pkg/man/inbreeding.ml.Rd)
===================================================================
--- pkg/man/inbreeding.Rd	                        (rev 0)
+++ pkg/man/inbreeding.Rd	2011-06-15 15:05:04 UTC (rev 912)
@@ -0,0 +1,89 @@
+\encoding{UTF-8}
+\name{Inbreeding estimation}
+\alias{inbreeding}
+\title{Likelihood-based estimation of inbreeding}
+\description{
+  The function \code{inbreeding} estimates the inbreeding coefficient
+  of an individuals (F) by computing its likelihood function. It can
+  return either the density of probability of F, or a sample of F values
+  from this distribution. This operation is performed for all the
+  individuals of a \linkS4class{genind} object. Any ploidy greater than
+  1 is acceptable.
+}
+\usage{
+inbreeding(x, pop = NULL, truenames = TRUE, res.type = c("sample", "function"), N = 200, M = N * 10)
+}
+\arguments{
+  \item{x}{an object of class \linkS4class{genind}.}
+  \item{pop}{a factor giving the 'population' of each individual. If NULL,
+    pop is seeked from \code{pop(x)}. Note that the term population refers in
+    fact to any grouping of individuals'.}
+  \item{truenames}{a logical indicating whether true names should be
+    used (TRUE, default) instead of generic labels (FALSE); used if
+    res.type is "matrix".}
+ \item{res.type}{a character string matching "sample" or "function",
+   specifying whether the output should be a function giving the density of probability
+   of F values ("function") or a sample of F values taken from this
+   distribution ("sample", default).}
+ \item{N}{an integer indicating the size of the sample to be taken from
+   the distribution of F values.}
+ \item{M}{an integer indicating the number of different F values to be
+   used to generate the sample. Values larger than N are recommended to
+   avoid poor sampling of the distribution.}
+}
+\value{
+  A named list with one component for each individual, each of which is
+  a function or a vector of sampled F values (see \code{res.type} argument).
+}
+\seealso{
+  \code{\link{Hs}}: computation of expected heterozygosity.
+}
+\details{
+  Let \eqn{F} denote the inbreeding coefficient, defined as the
+  probability for an individual to inherit two identical alleles from a
+  single ancestor.
+
+  Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the population. Let
+  \eqn{h} be an variable which equates 1 if the individual is
+  homozygote, and 0 otherwise. For one locus, the probability of being
+  homozygote is computed as:
+
+  \eqn{ F + (1-F) \sum_i p_i^2}
+
+  The probability of being heterozygote is:
+  \eqn{1 - (F + (1-F) \sum_i p_i^2)}
+
+  The likelihood of a genotype is defined as the probability of being
+  the observed state (homozygote or heterozygote). In the case of
+  multilocus genotypes, log-likelihood are summed over the loci.
+}
+\author{
+  Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
+}
+\examples{
+## cattle breed microsatellite data
+data(microbov)
+
+## isolate Lagunaire breed
+lagun <- seppop(microbov)$Lagunaire
+
+## estimate inbreeding - return sample of F values
+Fsamp <- inbreeding(lagun)
+
+## plot the first 10 results
+invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F", xlim=c(0,1), main="Density of the sampled F values")))
+
+## compute means for all individuals
+Fmean=sapply(Fsamp, mean)
+hist(Fmean, col="orange", xlab="mean value of F", main="Distribution of mean F across individuals")
+
+## estimate inbreeding - return proba density functions
+Fdens <- inbreeding(lagun, res.type="function")
+
+## view function for the first individual
+Fdens[[1]]
+
+## plot the first 10 functions
+invisible(sapply(Fdens[1:10], plot, ylab="Density", main="Density of probability of F values"))
+
+}
\ No newline at end of file

Deleted: pkg/man/inbreeding.ml.Rd
===================================================================
--- pkg/man/inbreeding.ml.Rd	2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/man/inbreeding.ml.Rd	2011-06-15 15:05:04 UTC (rev 912)
@@ -1,89 +0,0 @@
-\encoding{UTF-8}
-\name{Inbreeding estimation}
-\alias{inbreeding.ml}
-\title{Likelihood-based estimation of inbreeding}
-\description{
-  The function \code{inbreeding.ml} estimates the inbreeding coefficient
-  of an individuals (F) by computing its likelihood function. It can
-  return either the density of probability of F, or a sample of F values
-  from this distribution. This operation is performed for all the
-  individuals of a \linkS4class{genind} object.
-}
-\usage{
-inbreeding.ml(x, pop = NULL, truenames = TRUE, res.type = c("sample", "function"), N = 200, M = N * 10)
-}
-\arguments{
-  \item{x}{an object of class \linkS4class{genind}.}
-  \item{pop}{a factor giving the 'population' of each individual. If NULL,
-    pop is seeked from \code{pop(x)}. Note that the term population refers in
-    fact to any grouping of individuals'.}
-  \item{truenames}{a logical indicating whether true names should be
-    used (TRUE, default) instead of generic labels (FALSE); used if
-    res.type is "matrix".}
- \item{res.type}{a character string matching "sample" or "function",
-   specifying whether the output should be a function giving the density of probability
-   of F values ("function") or a sample of F values taken from this
-   distribution ("sample", default).}
- \item{N}{an integer indicating the size of the sample to be taken from
-   the distribution of F values.}
- \item{M}{an integer indicating the number of different F values to be
-   used to generate the sample. Values larger than N are recommended to
-   avoid poor sampling of the distribution.}
-}
-\value{
-  A named list with one component for each individual, each of which is
-  a function or a vector of sampled F values (see \code{res.type} argument).
-}
-\seealso{
-  \code{\link{inbreeding}}: a more basic estimator of F with graphical outputs.
-  \code{\link{Hs}}: computation of expected heterozygosity.
-}
-\details{
-  Let \eqn{F} denote the inbreeding coefficient, defined as the
-  probability for an individual to inherit two identical alleles from a
-  single ancestor.
-
-  Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the population. Let
-  \eqn{h} be an variable which equates 1 if the individual is
-  homozygote, and 0 otherwise. For one locus, the probability of being
-  homozygote is computed as:
-
-  \eqn{ F + (1-F) \sum_i p_i^2}
-
-  The probability of being heterozygote is:
-  \eqn{1 - (F + (1-F) \sum_i p_i^2)}
-  
-  The likelihood of a genotype is defined as the probability of being
-  the observed state (homozygote or heterozygote). In the case of
-  multilocus genotypes, log-likelihood are summed over the loci.
-}
-\author{
-  Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
-}
-\examples{
-## cattle breed microsatellite data
-data(microbov)
-
-## isolate Lagunaire breed
-lagun <- seppop(microbov)$Lagunaire
-
-## estimate inbreeding - return sample of F values
-Fsamp <- inbreeding.ml(lagun)
-
-## plot the first 10 results
-invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F", xlim=c(0,1), main="Density of the sampled F values")))
-
-## compute means for all individuals
-Fmean=sapply(Fsamp, mean)
-hist(Fmean, col="orange", xlab="mean value of F", main="Distribution of mean F across individuals")
-
-## estimate inbreeding - return proba density functions
-Fdens <- inbreeding.ml(lagun, res.type="function")
-
-## view function for the first individual
-Fdens[[1]]
-
-## plot the first 10 functions
-invisible(sapply(Fdens[1:10], plot, ylab="Density", main="Density of probability of F values"))
-
-}
\ No newline at end of file

Copied: pkg/man/inbreedingBalloux.old.Rd (from rev 907, pkg/man/inbreeding.Rd)
===================================================================
--- pkg/man/inbreedingBalloux.old.Rd	                        (rev 0)
+++ pkg/man/inbreedingBalloux.old.Rd	2011-06-15 15:05:04 UTC (rev 912)
@@ -0,0 +1,78 @@
+% \encoding{UTF-8}
+% \name{Inbreeding}
+% \alias{inbreeding}
+% \title{Inbreeding coefficient for diploid genotypes}
+% \description{
+%   WARNING: this function is under development. Please contact the author
+%   (\email{t.jombart at imperial.ac.uk}) before using it.
+  
+%   The function \code{inbreeding} computes Balloux's inbreeding
+%   coefficient for each individual of a \linkS4class{genind}
+%   objects. Results can be averaged over loci or detailed per locus. By
+%   default, \code{inbreeding} also produces a graphical output of the results.
+% }
+% \usage{
+% inbreeding(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, \ldots)
+% }
+% \arguments{
+%   \item{x}{an object of class \linkS4class{genind}.}
+%   \item{pop}{a factor giving the 'population' of each individual. If NULL,
+%     pop is seeked from \code{pop(x)}. Note that the term population refers in
+%     fact to any grouping of individuals'.}
+%   \item{truenames}{a logical indicating whether true names should be
+%     used (TRUE, default) instead of generic labels (FALSE); used if
+%     res.type is "matrix".}
+%  \item{res.type}{a character string matching "mean" or "byloc",
+%    specifying whether results should be averaged over loci ("mean") or
+%    detailed by locus ("byloc").}
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/adegenet -r 912