[adegenet-commits] r1113 - in pkg: . R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Apr 24 19:08:08 CEST 2013
Author: greatsage
Date: 2013-04-24 19:08:08 +0200 (Wed, 24 Apr 2013)
New Revision: 1113
Modified:
pkg/DESCRIPTION
pkg/R/dapc.R
pkg/man/adegenet.package.Rd
pkg/man/dapc.Rd
Log:
Package: adegenet
Version: 1.3-8
Date: 2013/04/24
Title: adegenet: an R package for the exploratory analysis of genetic and genomic data.
Author: Thibaut Jombart, Ismail Ahmed, Federico Calboli, Anne Cori, Tobias Erik Reiners, Peter Solymos
Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
Suggests: genetics, spdep, tripack, pegas, seqinr, adehabitat, multicore, akima, maps, splancs, hierfstat
Depends: R (>= 2.10), methods, MASS, ade4, igraph, ape
Description: Classes and functions for genetic data analysis within the multivariate framework.
Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
License: GPL (>=2)
LazyLoad: yes
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/DESCRIPTION 2013-04-24 17:08:08 UTC (rev 1113)
@@ -1,12 +1,12 @@
Package: adegenet
-Version: 1.3-7
-Date: 2013/04/05
+Version: 1.3-8
+Date: 2013/04/24
Title: adegenet: an R package for the exploratory analysis of genetic and genomic data.
-Author: Thibaut Jombart, Ismail Ahmed, Anne Cori, Tobias Erik Reiners, Peter Solymos
+Author: Thibaut Jombart, Ismail Ahmed, Federico Calboli, Anne Cori, Tobias Erik Reiners, Peter Solymos
Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
Suggests: genetics, spdep, tripack, pegas, seqinr, adehabitat, multicore, akima, maps, splancs, hierfstat
Depends: R (>= 2.10), methods, MASS, ade4, igraph, ape
Description: Classes and functions for genetic data analysis within the multivariate framework.
-Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R dapcXval.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
+Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
License: GPL (>=2)
LazyLoad: yes
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/R/dapc.R 2013-04-24 17:08:08 UTC (rev 1113)
@@ -986,14 +986,30 @@
## ############
## ## crossval
## ############
-## crossval <- function (x, ...) UseMethod("crossval")
+#xval <- function (x, ...) UseMethod("xval")
-## crossval.dapc <- function(){
+xval.dapc <- function(object, n.pca, n.da, training.set = 90){
+ training.set = training.set/100
+ kept.id <- unlist(tapply(1:nInd(object), pop(object), function(e) {pop.size = length(e); pop.size.train = round(pop.size * training.set); sample(e, pop.size.train, replace=FALSE)}))
+ training <- object[kept.id]
+ validating <- object[-kept.id]
+ post = vector(mode = 'list', length = n.pca)
+ asgn = vector(mode = 'list', length = n.pca)
+ ind = vector(mode = 'list', length = n.pca)
+ mtch = vector(mode = 'list', length = n.pca)
+ for(i in 1:n.pca){
+ dapc.base = dapc(training, n.pca = i, n.da = 15)
+ dapc.p = predict.dapc(dapc.base, newdata = validating)
+ match.prp = mean(as.character(dapc.p$assign)==as.character(pop(validating)))
+ post[[i]] = dapc.p$posterior
+ asgn[[i]] = dapc.p$assign
+ ind[[i]] = dapc.p$ind.score
+ mtch[[i]] = match.prp
+ }
+ res = list(assign = asgn, posterior = post, ind.score = ind, match.prp = mtch)
+ return(res)
+} # end of xval.dapc
-## }
-
-
-
## ###############
## ## randtest.dapc
## ###############
Modified: pkg/man/adegenet.package.Rd
===================================================================
--- pkg/man/adegenet.package.Rd 2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/man/adegenet.package.Rd 2013-04-24 17:08:08 UTC (rev 1113)
@@ -190,14 +190,14 @@
\tabular{ll}{
Package: \tab adegenet\cr
Type: \tab Package\cr
- Version: \tab 1.3-7\cr
- Date: \tab 2013-04-05 \cr
+ Version: \tab 1.3-8\cr
+ Date: \tab 2013-04-24 \cr
License: \tab GPL (>=2)
}
}
\author{
Thibaut Jombart <t.jombart at imperial.ac.uk>\cr
- Developpers: Ismail Ahmed <ismail.ahmed at inserm.fr>, Tobias Erik Reiners, Peter Solymos, Anne Cori\cr
+ Developpers: Ismail Ahmed <ismail.ahmed at inserm.fr>, Federico Calboli <f.calboli at imperial.ac.uk>,Tobias Erik Reiners, Peter Solymos, Anne Cori\cr
and contributed datasets from: Katayoun Moazami-Goudarzi, Denis Laloë,
Dominique Pontier, Daniel Maillard, Francois Balloux.
}
Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd 2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/man/dapc.Rd 2013-04-24 17:08:08 UTC (rev 1113)
@@ -9,6 +9,7 @@
\alias{print.dapc}
\alias{summary.dapc}
\alias{predict.dapc}
+\alias{xval.dapc}
\alias{as.lda}
\alias{as.lda.dapc}
\title{Discriminant Analysis of Principal Components (DAPC)}
@@ -36,7 +37,8 @@
- \code{print.dapc}: prints the content of a \code{dapc} object.\cr
- \code{summary.dapc}: extracts useful information from a \code{dapc} object.\cr
- - \code{predict.dapc}: predicts group memberships based on DAPC results.
+ - \code{predict.dapc}: predicts group memberships based on DAPC results.\cr
+ - \code{xval.dapc}: performs cross-validation of DAPC function varying the number of PCs and keeping the number of DAs fixed.
DAPC implementation calls upon \code{\link[ade4]{dudi.pca}} from the
@@ -73,6 +75,8 @@
\method{predict}{dapc}(object, newdata, prior = object$prior, dimen,
method = c("plug-in", "predictive", "debiased"), ...)
+
+\method{xval}{dapc}(object, n.pca, n.da, training.set = 90)
}
\arguments{
\item{x}{\code{a data.frame}, \code{matrix}, or \code{\linkS4class{genind}}
@@ -132,6 +136,10 @@
original ('training') data. In particular, variables must be exactly
the same as in the original data. For \linkS4class{genind}
objects, see \code{\link{repool}} to ensure matching of alleles.}
+ \item{training.set}{the percentage of individuals randomly chosen in each population
+ as the training set used for cross-validation. This value is applied to all groups/pops
+ defined in the object. The default is set to 90\%.
+ For meaningful cross-validation it is recommended not to go below 80\%}
\item{prior,dimen,method}{see \code{?predict.lda}.}
}
\details{
@@ -172,6 +180,8 @@
\item{var.contr}{(optional) a data.frame giving the contributions of original
variables (alleles in the case of genetic data) to the principal components
of DAPC.}
+ \item{match.prp}{a list, where each item is the proportion of individuals
+ correctly matched to their original population in cross-validation.}
=== other outputs ===\cr
@@ -181,7 +191,11 @@
\code{assign.prop} (proportion of overall correct assignment),
\code{assign.per.pop} (proportion of correct assignment per group),
\code{prior.grp.size} (prior group sizes), and \code{post.grp.size} (posterior
- group sizes).
+ group sizes), \code{xval.dapc} (returns a list of four lists, each one with as
+ many items as cross-validation runs. The first item is a list of \code{assign} components,
+ the secon is a list of \code{posterior} components, the thirs is a list of \code{ind.score}
+ components and the fourth is a list of \code{match.prp} items, i.e. the prortion of the validation
+ set correctly matched to its original population)
}
\references{
Jombart T, Devillard S and Balloux F (2010) Discriminant analysis of
@@ -291,8 +305,28 @@
## image using compoplot
compoplot(dapc1, new.pred=hyb.pred, ncol=2)
title("30 indiv popA, 30 indiv pop B, 30 hybrids")
+
+## CROSS-VALIDATION ##
+# select dataset
+data(microbov)
+summary(microbov) # the dataset contains 15 populations of different sizes
+
+# we take a fixed number of disriminant functions (15 in this case)
+# and we test how the cross-validation does varying the number of PCs
+# we specify the *maximum* number of PCs, and we will test how
+# the cross-validation performs by going from 1 PC to the maximum
+# we specified in the fucntion call
+
+crossval.test <- xval.dapc(microbov, n.pca = 40, n.da = 15, training.set = 90)
+
+attributes(crossval.test) # we get four lists of lists
+# namely "assign" "posterior" "ind.score" "match.prp"
+# a quick visual inspection of the cross-validation
+
+plot(unlist(crossval.test$match.prp))
+
}
}
-\keyword{multivariate}
\ No newline at end of file
+\keyword{multivariate}
More information about the adegenet-commits
mailing list