[adegenet-commits] r912 - in pkg: R inst/doc man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jun 15 17:05:05 CEST 2011
Author: jombart
Date: 2011-06-15 17:05:04 +0200 (Wed, 15 Jun 2011)
New Revision: 912
Added:
pkg/man/inbreeding.Rd
pkg/man/inbreedingBalloux.old.Rd
Removed:
pkg/man/inbreeding.Rd
pkg/man/inbreeding.ml.Rd
Modified:
pkg/R/inbreeding.R
pkg/inst/doc/adegenet-basics.Rnw
Log:
A few changes on inbreeding.ml, which now is the only inbreeding function.
Modified: pkg/R/inbreeding.R
===================================================================
--- pkg/R/inbreeding.R 2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/R/inbreeding.R 2011-06-15 15:05:04 UTC (rev 912)
@@ -1,82 +1,82 @@
-#############
-## inbreeding
-#############
-inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, ...){
- ## CHECKS ##
- if(!is.genind(x)) stop("x is not a valid genind object")
- checkType(x)
- res.type <- match.arg(res.type)
+## #############
+## ## inbreeding
+## #############
+## inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, ...){
+## ## CHECKS ##
+## if(!is.genind(x)) stop("x is not a valid genind object")
+## checkType(x)
+## res.type <- match.arg(res.type)
- if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+## if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
- if(!is.null(pop)) pop(x) <- pop
- if(is.null(x at pop) && is.null(pop)) {
- pop(x) <- factor(rep(1, nrow(x at tab)))
- }
+## if(!is.null(pop)) pop(x) <- pop
+## if(is.null(x at pop) && is.null(pop)) {
+## pop(x) <- factor(rep(1, nrow(x at tab)))
+## }
- ## COMPUTATIONS ##
+## ## COMPUTATIONS ##
- ## get allele frequencies and \sum p_i^2 by pop and loc ##
- tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
- sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
+## ## get allele frequencies and \sum p_i^2 by pop and loc ##
+## tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+## sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
- ## function to check a 1-locus genotype for homozigosity
- ## returns 1 if homoz, 0 otherwise
- ## !!! NOTE : reverse the values returned by f1 to obtain a strange thing !!!
- f1 <- function(gen){
- if(any(is.na(gen))) return(NA)
- if(any(round(gen, 10)==1)) return(1)
- return(0)
- }
+## ## function to check a 1-locus genotype for homozigosity
+## ## returns 1 if homoz, 0 otherwise
+## ## !!! NOTE : reverse the values returned by f1 to obtain a strange thing !!!
+## f1 <- function(gen){
+## if(any(is.na(gen))) return(NA)
+## if(any(round(gen, 10)==1)) return(1)
+## return(0)
+## }
- ## get the table of binary hetero/homo data
- if(truenames) {
- X <- truenames(x)$tab
- } else
- X <- x$tab
+## ## get the table of binary hetero/homo data
+## if(truenames) {
+## X <- truenames(x)$tab
+## } else
+## X <- x$tab
- homotab <- t(apply(X, 1, tapply, x at loc.fac, f1))
+## homotab <- t(apply(X, 1, tapply, x at loc.fac, f1))
- ## get pi2 for the appropriate pop
- if(truenames){
- popx <- pop(x)
- } else {
- popx <- x$pop
- }
+## ## get pi2 for the appropriate pop
+## if(truenames){
+## popx <- pop(x)
+## } else {
+## popx <- x$pop
+## }
- popx <- as.character(popx)
- tabpi2 <- sumpi2[popx, , drop=FALSE]
+## popx <- as.character(popx)
+## tabpi2 <- sumpi2[popx, , drop=FALSE]
- ## COMPUTE FINAL RESULT ##
- num <- homotab - tabpi2
- ## denom <- tabpi2 * (1 - tabpi2) # does not actually compute a weighted mean
- denom <- 1 - tabpi2
- res <- num / denom
- ## return values per locus ##
- if(res.type=="byloc") return(res)
+## ## COMPUTE FINAL RESULT ##
+## num <- homotab - tabpi2
+## ## denom <- tabpi2 * (1 - tabpi2) # does not actually compute a weighted mean
+## denom <- 1 - tabpi2
+## res <- num / denom
+## ## return values per locus ##
+## if(res.type=="byloc") return(res)
- ## return mean weighted by effective nb of alleles ##
- wtab <- 1/tabpi2
- wtab[is.na(res)] <- NA
- wtab <- t(apply(wtab, 1, function(e) return(e/sum(e,na.rm=TRUE))))
- res <- wtab * res
+## ## return mean weighted by effective nb of alleles ##
+## wtab <- 1/tabpi2
+## wtab[is.na(res)] <- NA
+## wtab <- t(apply(wtab, 1, function(e) return(e/sum(e,na.rm=TRUE))))
+## res <- wtab * res
- res <- apply(res, 1, sum, na.rm=TRUE)
- if(plot){
- par(bg="grey")
- nPop <- length(unique(popx))
- myCol <- rainbow(nPop)[as.integer(pop(x))]
- if(min(res)>0) ylim <- c(0, 1.1*max(res))
- if(max(res)<0) ylim <- c(min(res), 0+abs(min(res))*0.1)
- plot(res, col=myCol, type="h", ylab="Inbreeding", xlab="Individuals", ...)
- }
+## res <- apply(res, 1, sum, na.rm=TRUE)
+## if(plot){
+## par(bg="grey")
+## nPop <- length(unique(popx))
+## myCol <- rainbow(nPop)[as.integer(pop(x))]
+## if(min(res)>0) ylim <- c(0, 1.1*max(res))
+## if(max(res)<0) ylim <- c(min(res), 0+abs(min(res))*0.1)
+## plot(res, col=myCol, type="h", ylab="Inbreeding", xlab="Individuals", ...)
+## }
- return(res)
+## return(res)
-} # end inbreeding
+## } # end inbreeding
@@ -95,7 +95,8 @@
checkType(x)
res.type <- match.arg(res.type)
- if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+ ## if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
+ PLO <- ploidy(x)
if(!is.null(pop)) pop(x) <- pop
if(is.null(x at pop) && is.null(pop)) {
@@ -105,7 +106,9 @@
## COMPUTATIONS ##
## get allele frequencies and \sum p_i^2 by pop and loc ##
- tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+ ## (generalized to any ploidy) ##
+ ## tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
+ tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^ PLO
sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
## function to check a 1-locus genotype for homozigosity
Modified: pkg/inst/doc/adegenet-basics.Rnw
===================================================================
--- pkg/inst/doc/adegenet-basics.Rnw 2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/inst/doc/adegenet-basics.Rnw 2011-06-15 15:05:04 UTC (rev 912)
@@ -482,7 +482,96 @@
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Handling presence/absence data}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Adegenet was primarly suited to handle codominant, multiallelic markers like microsatellites.
+However, dominant binary markers, like AFLP, can be used as well.
+In such a case, only presence/absence of alleles can be deduced accurately from
+the genotypes.
+This has several consequences, like the unability to compute allele frequencies.
+Hence, some functionalities in adegenet won't be available for
+dominant markers.
+
+From version 1.2-3 of adegenet, the distinction between both types of markers is made by the slot
+'type' of genind or genpop objects, which equals "codom" for
+codominant markers, and "PA" for presence/absence data.
+In the latter case, the 'tab' slot of a genind object no longer contains allele
+frequencies, but only presence/absence of alleles in a genotype.
+Similarly, the 'tab' slot of a genpop object not longer contains
+counts of alleles in the populations; instead, it contains the number
+of genotypes in each population possessing at least one copy of the concerned alleles.
+Moreover, in the case of presence/absence, the slots 'loc.nall', 'loc.fac', and 'all.names'
+become useless, and are thus all set to NULL.
+\\
+
+
+Objects of type 'PA' are otherwise handled like usual (type 'codom')
+objects.
+Operations that are not available for PA type will issue an appropriate error message.
+
+Here is an example using a toy dataset 'AFLP.txt' that can be downloaded
+from the adegenet website, section 'Documentation':
+<<aflpread>>=
+dat <- read.table("http://adegenet.r-forge.r-project.org/files/AFLP.txt",header=TRUE)
+ dat
+@
+\noindent The function \texttt{df2genind} is used to obtain a genind object:
+<<>>=
+obj <- genind(dat, ploidy=1, type="PA")
+obj
+truenames(obj)
+@
+
+One can see that for instance, the summary of this object is more simple (no numbers of alleles per locus, no heterozygosity):
+<<>>=
+pop(obj) <- rep(c('a','b'),4:3)
+summary(obj)
+@
+
+\noindent But we can still perform basic manipulation, like converting
+our object into a genpop:
+<<>>=
+obj2 <- genind2genpop(obj)
+obj2
+obj2 at tab
+@
+
+\noindent To continue with the toy example, we can proceed to a simple PCA.
+NAs are first replaced:
+<<>>=
+objNoNa <- na.replace(obj,met=0)
+objNoNa at tab
+@
+
+\noindent Now the PCA is performed:
+<<pcaaflp,fig=TRUE>>=
+library(ade4)
+pca1 <- dudi.pca(objNoNa,scannf=FALSE,scale=FALSE)
+scatter(pca1)
+@
+
+\noindent More generally, multivariate analyses from ade4, the sPCA (\texttt{spca}), the
+global and local tests (\texttt{global.rtest}, \texttt{local.rtest}), or
+the Monmonier's algorithm (\texttt{monmonier}) will work just fine
+with presence/absence data.
+However, it is clear that the usual Euclidean distance (used in PCA
+and sPCA), as well as many other distances, is not as accurate to measure genetic dissimilarity using
+presence/absence data as it is when using allele frequencies.
+The reason for this is that in presence/absence data, a part of the
+information is simply hidden.
+For instance, two individuals possessing the same allele will be
+considered at the same distance, whether they possess one or more
+copies of the allele.
+This might be especially problematic in organisms having a high degree
+of ploidy.
+
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{SNPs data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In adegenet, SNP data can be handled in two different ways.
@@ -723,7 +812,14 @@
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Basics of data analysis}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Manipulating data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Data manipulation is meant to be easy in \textit{adegenet} (if it is
@@ -856,48 +952,55 @@
\subsection{Measuring and testing population structure (a.k.a F statistics)}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Population structure is traditionally measured and tested using F statistics, in particular Fst.
-\emph{adegenet} proposes different tools in this respect: general F statistics (\texttt{fstat}), a test of overall
-population structure (\texttt{gstat.randtest}), and pairwise $Fst$ between all pairs of populations in a
-dataset (\texttt{pairwise.fst}).
-The first two are wrappers for functions implemented in the \emph{hierfstat} package; pairwise Fst
-is implemented in \emph{adegenet}.
+Since version 2.13.0 of R, the package \emph{hierfstat}, which implemented most F statistics and
+related tests, has been removed from CRAN for maintenance issues.
+As a consequence, \emph{adegenet} has lost a few functionalities, namely general F statistics
+(function \texttt{fstat}), a test of overall population structure (\texttt{gstat.randtest}).
+\\
-We illustrate their use using the dataset of microsatellite of cats from Nancy:
-<<>>=
-library(hierfstat)
-data(nancycats)
-fstat(nancycats)
-@
-This table provides the three F statistics $Fst$ (pop/total), $Fit$ (Ind/total), and $Fis$ (ind/pop).
-These are overall measures which take into account all genotypes and all loci.
-Is the structure between populations significant?
-This question can be addressed using the G-statistic test \cite{tj511}; it is implemented for \texttt{genind} objects and produces a \texttt{randtest} object (package ade4).
-<<fig=TRUE>>=
-library(ade4)
-toto <- gstat.randtest(nancycats,nsim=99)
-toto
-plot(toto)
-@
+%% The first two are wrappers for functions implemented in the \emph{hierfstat} package; pairwise Fst
+%% is implemented in \emph{adegenet}.
-\noindent Yes, it is (the observed value is indicated on the right, while histograms correspond to
-the permuted values).
-Note that \emph{hierfstat} allows for more ellaborated tests, in particular when different levels of
-hierarchical clustering are available.
-Such tests are better done directly in \emph{hierfstat}; for this, \texttt{genind} objects can be
-converted to the adequat format using \texttt{genind2hierfstat}.
-For instance:
-<<>>=
-toto <- genind2hierfstat(nancycats)
-head(toto)
-varcomp.glob(toto$pop,toto[,-1])
-@
-F statistics are provided in \$F; for instance, here, $F_{st}$ is $0.083$.
+%% We illustrate their use using the dataset of microsatellite of cats from Nancy:
+%% <<>>=
+%% library(hierfstat)
+%% data(nancycats)
+%% fstat(nancycats)
+%% @
+%% This table provides the three F statistics $Fst$ (pop/total), $Fit$ (Ind/total), and $Fis$ (ind/pop).
+%% These are overall measures which take into account all genotypes and all loci.
+%% Is the structure between populations significant?
+%% This question can be addressed using the G-statistic test \cite{tj511}; it is implemented for \texttt{genind} objects and produces a \texttt{randtest} object (package ade4).
+%% <<fig=TRUE>>=
+%% library(ade4)
+%% toto <- gstat.randtest(nancycats,nsim=99)
+%% toto
+%% plot(toto)
+%% @
-~\\
-Lastly, pairwise $Fst$ is frequently used as a measure of distance between populations.
-The function \texttt{pairwise.fst} computes Nei's estimator \cite{tj814} of pairwise $Fst$, computed as:
+%% \noindent Yes, it is (the observed value is indicated on the right, while histograms correspond to
+%% the permuted values).
+%% Note that \emph{hierfstat} allows for more ellaborated tests, in particular when different levels of
+%% hierarchical clustering are available.
+%% Such tests are better done directly in \emph{hierfstat}; for this, \texttt{genind} objects can be
+%% converted to the adequat format using \texttt{genind2hierfstat}.
+%% For instance:
+%% <<>>=
+%% toto <- genind2hierfstat(nancycats)
+%% head(toto)
+%% varcomp.glob(toto$pop,toto[,-1])
+%% @
+%% F statistics are provided in \$F; for instance, here, $F_{st}$ is $0.083$.
+
+
+%% ~\\
+
+
+However, it is still possible to compute pairwise $Fst$ using \textit{adegenet}.
+Pairwise $Fst$ is frequently used as a measure of distance between populations.
+The function \texttt{pairwise.fst} computes Nei's estimator \cite{tj814} of pairwise $Fst$, defined as:
$$
Fst(A,B) = \frac{H_t - (n_AH_s(A) + n_BH_s(B))/(n_A + n_B)}{Ht}
$$
@@ -910,6 +1013,7 @@
illustrate this on a subset of individuals of \texttt{nancycats} (computations for the whole dataset
would take a few tens of seconds):
<<>>=
+data(nancycats)
matFst <- pairwise.fst(nancycats[1:50, treatOther=FALSE])
matFst
@
@@ -956,9 +1060,52 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Estimating inbreeding}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Inbreeding refers to an excess of homozygosity in a given individual due to the mating of
+genetically related parents.
+This excess of homozygosity is due to the fact that there are non-negligible chances of inheriting
+two identical alleles from a recent common ancestor.
+Inbreeding can be associated to a loss of fitness leading to "\textit{inbreeding depression}".
+Typically, loss of fitness is caused by recessive deleterious alleles which have usually low
+frequency in the population, but for which inbred individuals are more likely to be homozygotes.
+\\
+The inbreeding coefficient $F$ is defined as the probability that at a given locus, two identical
+alleles have been inherited from a common ancestor.
+In the absence of inbreeding, the probability of being homozygote at one loci simply is (for diploid
+individuals) simply $\sum_i p_i^2$ where $i$ indexes the alleles and $p_i$ is the frequency of
+allele $i$.
+This can be generalized incorporating $F$ as:
+$$
+p(\mbox{homozygote}) = F + (1-F) \sum_ip_i^2
+$$
+and even more generally, for any ploidy $\pi$:
+$$
+p(\mbox{homozygote}) = F + (1-F) \sum_ip_i^{\pi}
+$$
+This therefore allows for computing the likelihood of a given state (homozygote/heterozygote) in a
+given genotype (log-likelihood are summed across loci for more than one marker).
+\\
+
+This estimation is achieved by \texttt{inbreeding}.
+Depending on the value of the argument \texttt{res.type}, the function returns a sample from the
+likelihood function (\texttt{res.type='sample'}) or the likelihood function itself, as a R function (\texttt{res.type='function'}).
+While likelihood function are quickly obtained and easy to display graphically, sampling from the
+distributions is required to compute summary statistics of the distributions.
+
+
+
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Multivariate analysis}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Performing a Principal Component Analysis on \texttt{genind} objects}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The tables contained in \texttt{genind} objects can be submitted to a Principal Component Analysis (PCA) to seek a typology of individuals.
@@ -1312,94 +1459,10 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Handling presence/absence data}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-Adegenet was primarly suited to handle codominant, multiallelic markers like microsatellites.
-However, dominant binary markers, like AFLP, can be used as well.
-In such a case, only presence/absence of alleles can be deduced accurately from
-the genotypes.
-This has several consequences, like the unability to compute allele frequencies.
-Hence, some functionalities in adegenet won't be available for
-dominant markers.
-From version 1.2-3 of adegenet, the distinction between both types of markers is made by the slot
-'type' of genind or genpop objects, which equals "codom" for
-codominant markers, and "PA" for presence/absence data.
-In the latter case, the 'tab' slot of a genind object no longer contains allele
-frequencies, but only presence/absence of alleles in a genotype.
-Similarly, the 'tab' slot of a genpop object not longer contains
-counts of alleles in the populations; instead, it contains the number
-of genotypes in each population possessing at least one copy of the concerned alleles.
-Moreover, in the case of presence/absence, the slots 'loc.nall', 'loc.fac', and 'all.names'
-become useless, and are thus all set to NULL.
-\\
-Objects of type 'PA' are otherwise handled like usual (type 'codom')
-objects.
-Operations that are not available for PA type will issue an appropriate error message.
-Here is an example using a toy dataset 'AFLP.txt' that can be downloaded
-from the adegenet website, section 'Documentation':
-<<aflpread>>=
-dat <- read.table("http://adegenet.r-forge.r-project.org/files/AFLP.txt",header=TRUE)
-dat
-@
-\noindent The function \texttt{df2genind} is used to obtain a genind object:
-<<>>=
-obj <- genind(dat, ploidy=1, type="PA")
-obj
-truenames(obj)
-@
-
-One can see that for instance, the summary of this object is more simple (no numbers of alleles per locus, no heterozygosity):
-<<>>=
-pop(obj) <- rep(c('a','b'),4:3)
-summary(obj)
-@
-
-\noindent But we can still perform basic manipulation, like converting
-our object into a genpop:
-<<>>=
-obj2 <- genind2genpop(obj)
-obj2
-obj2 at tab
-@
-
-\noindent To continue with the toy example, we can proceed to a simple PCA.
-NAs are first replaced:
-<<>>=
-objNoNa <- na.replace(obj,met=0)
-objNoNa at tab
-@
-
-\noindent Now the PCA is performed:
-<<pcaaflp,fig=TRUE>>=
-library(ade4)
-pca1 <- dudi.pca(objNoNa,scannf=FALSE,scale=FALSE)
-scatter(pca1)
-@
-
-\noindent More generally, multivariate analyses from ade4, the sPCA (\texttt{spca}), the
-global and local tests (\texttt{global.rtest}, \texttt{local.rtest}), or
-the Monmonier's algorithm (\texttt{monmonier}) will work just fine
-with presence/absence data.
-However, it is clear that the usual Euclidean distance (used in PCA
-and sPCA), as well as many other distances, is not as accurate to measure genetic dissimilarity using
-presence/absence data as it is when using allele frequencies.
-The reason for this is that in presence/absence data, a part of the
-information is simply hidden.
-For instance, two individuals possessing the same allele will be
-considered at the same distance, whether they possess one or more
-copies of the allele.
-This might be especially problematic in organisms having a high degree
-of ploidy.
-
-
-
-
-
\begin{thebibliography}{9}
\bibitem{tjart19}
Deleted: pkg/man/inbreeding.Rd
===================================================================
--- pkg/man/inbreeding.Rd 2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/man/inbreeding.Rd 2011-06-15 15:05:04 UTC (rev 912)
@@ -1,78 +0,0 @@
-\encoding{UTF-8}
-\name{Inbreeding}
-\alias{inbreeding}
-\title{Inbreeding coefficient for diploid genotypes}
-\description{
- WARNING: this function is under development. Please contact the author
- (\email{t.jombart at imperial.ac.uk}) before using it.
-
- The function \code{inbreeding} computes Balloux's inbreeding
- coefficient for each individual of a \linkS4class{genind}
- objects. Results can be averaged over loci or detailed per locus. By
- default, \code{inbreeding} also produces a graphical output of the results.
-}
-\usage{
-inbreeding(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, \ldots)
-}
-\arguments{
- \item{x}{an object of class \linkS4class{genind}.}
- \item{pop}{a factor giving the 'population' of each individual. If NULL,
- pop is seeked from \code{pop(x)}. Note that the term population refers in
- fact to any grouping of individuals'.}
- \item{truenames}{a logical indicating whether true names should be
- used (TRUE, default) instead of generic labels (FALSE); used if
- res.type is "matrix".}
- \item{res.type}{a character string matching "mean" or "byloc",
- specifying whether results should be averaged over loci ("mean") or
- detailed by locus ("byloc").}
- \item{plot}{a logical indicating whether a graphical
- output should be produced (TRUE, default), or not (FALSE).}
- \item{\ldots}{other arguments to be passed to \code{plot}.}
-}
-\value{
- A vector (if res.type is "mean"), or a matrix (if res.type is "byloc")
- of inbreeding coefficient values.
-}
-\seealso{
- \code{\link{inbreeding.ml}}: a maximum-likelihood estimation of
- inbreeding.
-
- \code{\link{Hs}}%, \code{\link[hierfstat]{varcomp.glob}},
-% \code{\link{gstat.randtest}}
-}
-\references{
- Brown AR, Hosken DJ, Balloux F, et al. 2009 Genetic variation,
- inbreeding and chemical exposure - combined effects in wildlife and
- critical considerations for ecotoxicology. Philosophical Transactions
- of the Royal Society B, London 364: 3377 - 3390
-}
-\details{
- Let \eqn{p_i} refer to the allele frequencies in a population. Let
- \eqn{h} be an variable which equates 1 if the individual is
- homozygote, and 0 otherwise. For one locus, Balloux's inbreeding coefficient is
- defined as:
-
- \eqn{ \frac{h - \sum_i p_i^2}{ \sum_i p_i^2 (1- \sum_i p_i^2)} }
-
- For multi-locus genotypes, inbreeding values are averaged over the
- loci.
-
- Important note: to estimate F, the probability of being homozygote at
- a locus an individual has to be inferred from a single
- observation. This can results in inaccuracy of the estimation of F,
- and possible negative values. To circumvent such issues, use the
- maximum-likelihood estimation of F (\code{\link{inbreeding.ml}}).
-}
-\author{
- Implementation: Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
- Formula by Francois Balloux \email{f.balloux at imperial.ac.uk}
-}
-\examples{
-## cat colonies of Nancy
-data(nancycats)
-inbreeding(nancycats)
-
-## French/African cattle breeds
-data(microbov)
-inbreeding(microbov)
-}
Copied: pkg/man/inbreeding.Rd (from rev 907, pkg/man/inbreeding.ml.Rd)
===================================================================
--- pkg/man/inbreeding.Rd (rev 0)
+++ pkg/man/inbreeding.Rd 2011-06-15 15:05:04 UTC (rev 912)
@@ -0,0 +1,89 @@
+\encoding{UTF-8}
+\name{Inbreeding estimation}
+\alias{inbreeding}
+\title{Likelihood-based estimation of inbreeding}
+\description{
+ The function \code{inbreeding} estimates the inbreeding coefficient
+ of an individuals (F) by computing its likelihood function. It can
+ return either the density of probability of F, or a sample of F values
+ from this distribution. This operation is performed for all the
+ individuals of a \linkS4class{genind} object. Any ploidy greater than
+ 1 is acceptable.
+}
+\usage{
+inbreeding(x, pop = NULL, truenames = TRUE, res.type = c("sample", "function"), N = 200, M = N * 10)
+}
+\arguments{
+ \item{x}{an object of class \linkS4class{genind}.}
+ \item{pop}{a factor giving the 'population' of each individual. If NULL,
+ pop is seeked from \code{pop(x)}. Note that the term population refers in
+ fact to any grouping of individuals'.}
+ \item{truenames}{a logical indicating whether true names should be
+ used (TRUE, default) instead of generic labels (FALSE); used if
+ res.type is "matrix".}
+ \item{res.type}{a character string matching "sample" or "function",
+ specifying whether the output should be a function giving the density of probability
+ of F values ("function") or a sample of F values taken from this
+ distribution ("sample", default).}
+ \item{N}{an integer indicating the size of the sample to be taken from
+ the distribution of F values.}
+ \item{M}{an integer indicating the number of different F values to be
+ used to generate the sample. Values larger than N are recommended to
+ avoid poor sampling of the distribution.}
+}
+\value{
+ A named list with one component for each individual, each of which is
+ a function or a vector of sampled F values (see \code{res.type} argument).
+}
+\seealso{
+ \code{\link{Hs}}: computation of expected heterozygosity.
+}
+\details{
+ Let \eqn{F} denote the inbreeding coefficient, defined as the
+ probability for an individual to inherit two identical alleles from a
+ single ancestor.
+
+ Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the population. Let
+ \eqn{h} be an variable which equates 1 if the individual is
+ homozygote, and 0 otherwise. For one locus, the probability of being
+ homozygote is computed as:
+
+ \eqn{ F + (1-F) \sum_i p_i^2}
+
+ The probability of being heterozygote is:
+ \eqn{1 - (F + (1-F) \sum_i p_i^2)}
+
+ The likelihood of a genotype is defined as the probability of being
+ the observed state (homozygote or heterozygote). In the case of
+ multilocus genotypes, log-likelihood are summed over the loci.
+}
+\author{
+ Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
+}
+\examples{
+## cattle breed microsatellite data
+data(microbov)
+
+## isolate Lagunaire breed
+lagun <- seppop(microbov)$Lagunaire
+
+## estimate inbreeding - return sample of F values
+Fsamp <- inbreeding(lagun)
+
+## plot the first 10 results
+invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F", xlim=c(0,1), main="Density of the sampled F values")))
+
+## compute means for all individuals
+Fmean=sapply(Fsamp, mean)
+hist(Fmean, col="orange", xlab="mean value of F", main="Distribution of mean F across individuals")
+
+## estimate inbreeding - return proba density functions
+Fdens <- inbreeding(lagun, res.type="function")
+
+## view function for the first individual
+Fdens[[1]]
+
+## plot the first 10 functions
+invisible(sapply(Fdens[1:10], plot, ylab="Density", main="Density of probability of F values"))
+
+}
\ No newline at end of file
Deleted: pkg/man/inbreeding.ml.Rd
===================================================================
--- pkg/man/inbreeding.ml.Rd 2011-06-15 10:30:43 UTC (rev 911)
+++ pkg/man/inbreeding.ml.Rd 2011-06-15 15:05:04 UTC (rev 912)
@@ -1,89 +0,0 @@
-\encoding{UTF-8}
-\name{Inbreeding estimation}
-\alias{inbreeding.ml}
-\title{Likelihood-based estimation of inbreeding}
-\description{
- The function \code{inbreeding.ml} estimates the inbreeding coefficient
- of an individuals (F) by computing its likelihood function. It can
- return either the density of probability of F, or a sample of F values
- from this distribution. This operation is performed for all the
- individuals of a \linkS4class{genind} object.
-}
-\usage{
-inbreeding.ml(x, pop = NULL, truenames = TRUE, res.type = c("sample", "function"), N = 200, M = N * 10)
-}
-\arguments{
- \item{x}{an object of class \linkS4class{genind}.}
- \item{pop}{a factor giving the 'population' of each individual. If NULL,
- pop is seeked from \code{pop(x)}. Note that the term population refers in
- fact to any grouping of individuals'.}
- \item{truenames}{a logical indicating whether true names should be
- used (TRUE, default) instead of generic labels (FALSE); used if
- res.type is "matrix".}
- \item{res.type}{a character string matching "sample" or "function",
- specifying whether the output should be a function giving the density of probability
- of F values ("function") or a sample of F values taken from this
- distribution ("sample", default).}
- \item{N}{an integer indicating the size of the sample to be taken from
- the distribution of F values.}
- \item{M}{an integer indicating the number of different F values to be
- used to generate the sample. Values larger than N are recommended to
- avoid poor sampling of the distribution.}
-}
-\value{
- A named list with one component for each individual, each of which is
- a function or a vector of sampled F values (see \code{res.type} argument).
-}
-\seealso{
- \code{\link{inbreeding}}: a more basic estimator of F with graphical outputs.
- \code{\link{Hs}}: computation of expected heterozygosity.
-}
-\details{
- Let \eqn{F} denote the inbreeding coefficient, defined as the
- probability for an individual to inherit two identical alleles from a
- single ancestor.
-
- Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the population. Let
- \eqn{h} be an variable which equates 1 if the individual is
- homozygote, and 0 otherwise. For one locus, the probability of being
- homozygote is computed as:
-
- \eqn{ F + (1-F) \sum_i p_i^2}
-
- The probability of being heterozygote is:
- \eqn{1 - (F + (1-F) \sum_i p_i^2)}
-
- The likelihood of a genotype is defined as the probability of being
- the observed state (homozygote or heterozygote). In the case of
- multilocus genotypes, log-likelihood are summed over the loci.
-}
-\author{
- Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
-}
-\examples{
-## cattle breed microsatellite data
-data(microbov)
-
-## isolate Lagunaire breed
-lagun <- seppop(microbov)$Lagunaire
-
-## estimate inbreeding - return sample of F values
-Fsamp <- inbreeding.ml(lagun)
-
-## plot the first 10 results
-invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F", xlim=c(0,1), main="Density of the sampled F values")))
-
-## compute means for all individuals
-Fmean=sapply(Fsamp, mean)
-hist(Fmean, col="orange", xlab="mean value of F", main="Distribution of mean F across individuals")
-
-## estimate inbreeding - return proba density functions
-Fdens <- inbreeding.ml(lagun, res.type="function")
-
-## view function for the first individual
-Fdens[[1]]
-
-## plot the first 10 functions
-invisible(sapply(Fdens[1:10], plot, ylab="Density", main="Density of probability of F values"))
-
-}
\ No newline at end of file
Copied: pkg/man/inbreedingBalloux.old.Rd (from rev 907, pkg/man/inbreeding.Rd)
===================================================================
--- pkg/man/inbreedingBalloux.old.Rd (rev 0)
+++ pkg/man/inbreedingBalloux.old.Rd 2011-06-15 15:05:04 UTC (rev 912)
@@ -0,0 +1,78 @@
+% \encoding{UTF-8}
+% \name{Inbreeding}
+% \alias{inbreeding}
+% \title{Inbreeding coefficient for diploid genotypes}
+% \description{
+% WARNING: this function is under development. Please contact the author
+% (\email{t.jombart at imperial.ac.uk}) before using it.
+
+% The function \code{inbreeding} computes Balloux's inbreeding
+% coefficient for each individual of a \linkS4class{genind}
+% objects. Results can be averaged over loci or detailed per locus. By
+% default, \code{inbreeding} also produces a graphical output of the results.
+% }
+% \usage{
+% inbreeding(x, pop=NULL, truenames=TRUE, res.type=c("mean","byloc"), plot=TRUE, \ldots)
+% }
+% \arguments{
+% \item{x}{an object of class \linkS4class{genind}.}
+% \item{pop}{a factor giving the 'population' of each individual. If NULL,
+% pop is seeked from \code{pop(x)}. Note that the term population refers in
+% fact to any grouping of individuals'.}
+% \item{truenames}{a logical indicating whether true names should be
+% used (TRUE, default) instead of generic labels (FALSE); used if
+% res.type is "matrix".}
+% \item{res.type}{a character string matching "mean" or "byloc",
+% specifying whether results should be averaged over loci ("mean") or
+% detailed by locus ("byloc").}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/adegenet -r 912
More information about the adegenet-commits
mailing list