[adegenet-commits] r1113 - in pkg: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Apr 24 19:08:08 CEST 2013


Author: greatsage
Date: 2013-04-24 19:08:08 +0200 (Wed, 24 Apr 2013)
New Revision: 1113

Modified:
   pkg/DESCRIPTION
   pkg/R/dapc.R
   pkg/man/adegenet.package.Rd
   pkg/man/dapc.Rd
Log:
Package: adegenet
Version: 1.3-8
Date: 2013/04/24
Title: adegenet: an R package for the exploratory analysis of genetic and genomic data.
Author:  Thibaut Jombart, Ismail Ahmed, Federico Calboli, Anne Cori, Tobias Erik Reiners, Peter Solymos
Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
Suggests: genetics, spdep, tripack, pegas, seqinr, adehabitat, multicore, akima, maps, splancs, hierfstat
Depends: R (>= 2.10), methods, MASS, ade4, igraph, ape
Description: Classes and functions for genetic data analysis within the multivariate framework.
Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
License: GPL (>=2)
LazyLoad: yes


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/DESCRIPTION	2013-04-24 17:08:08 UTC (rev 1113)
@@ -1,12 +1,12 @@
 Package: adegenet
-Version: 1.3-7
-Date: 2013/04/05
+Version: 1.3-8
+Date: 2013/04/24
 Title: adegenet: an R package for the exploratory analysis of genetic and genomic data.
-Author:  Thibaut Jombart, Ismail Ahmed, Anne Cori, Tobias Erik Reiners, Peter Solymos
+Author:  Thibaut Jombart, Ismail Ahmed, Federico Calboli, Anne Cori, Tobias Erik Reiners, Peter Solymos
 Maintainer: Thibaut Jombart <t.jombart at imperial.ac.uk>
 Suggests: genetics, spdep, tripack, pegas, seqinr, adehabitat, multicore, akima, maps, splancs, hierfstat
 Depends: R (>= 2.10), methods, MASS, ade4, igraph, ape
 Description: Classes and functions for genetic data analysis within the multivariate framework.
-Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R dapcXval.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
+Collate: classes.R basicMethods.R handling.R auxil.R setAs.R SNPbin.R glHandle.R glFunctions.R glSim.R find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R chooseCN.R genind2genpop.R loadingplot.R sequences.R gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R simOutbreak.R mutations.R zzz.R
 License: GPL (>=2)
 LazyLoad: yes

Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R	2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/R/dapc.R	2013-04-24 17:08:08 UTC (rev 1113)
@@ -986,14 +986,30 @@
 ## ############
 ## ## crossval
 ## ############
-## crossval <- function (x, ...) UseMethod("crossval")
+#xval <- function (x, ...) UseMethod("xval")
 
-## crossval.dapc <- function(){
+xval.dapc <- function(object, n.pca, n.da, training.set = 90){
+  training.set = training.set/100
+  kept.id <- unlist(tapply(1:nInd(object), pop(object), function(e) {pop.size = length(e); pop.size.train = round(pop.size * training.set); sample(e, pop.size.train, replace=FALSE)}))
+  training <- object[kept.id]
+  validating <- object[-kept.id]
+  post = vector(mode = 'list', length = n.pca)
+  asgn = vector(mode = 'list', length = n.pca)
+  ind = vector(mode = 'list', length = n.pca)
+  mtch = vector(mode = 'list', length = n.pca)
+  for(i in 1:n.pca){
+    dapc.base = dapc(training, n.pca = i, n.da = 15)
+    dapc.p = predict.dapc(dapc.base, newdata = validating)
+    match.prp = mean(as.character(dapc.p$assign)==as.character(pop(validating)))
+    post[[i]] = dapc.p$posterior
+    asgn[[i]] = dapc.p$assign
+    ind[[i]] = dapc.p$ind.score
+    mtch[[i]] = match.prp
+  }
+  res = list(assign = asgn, posterior = post, ind.score = ind, match.prp = mtch) 
+  return(res)
+} # end of xval.dapc
 
-## }
-
-
-
 ## ###############
 ## ## randtest.dapc
 ## ###############

Modified: pkg/man/adegenet.package.Rd
===================================================================
--- pkg/man/adegenet.package.Rd	2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/man/adegenet.package.Rd	2013-04-24 17:08:08 UTC (rev 1113)
@@ -190,14 +190,14 @@
   \tabular{ll}{
     Package: \tab adegenet\cr
     Type: \tab Package\cr
-    Version: \tab 1.3-7\cr
-    Date: \tab 2013-04-05 \cr
+    Version: \tab 1.3-8\cr
+    Date: \tab 2013-04-24 \cr
     License: \tab GPL (>=2)
   } 
 }
 \author{
   Thibaut Jombart <t.jombart at imperial.ac.uk>\cr
-  Developpers: Ismail Ahmed <ismail.ahmed at inserm.fr>, Tobias Erik Reiners, Peter Solymos, Anne Cori\cr
+  Developpers: Ismail Ahmed <ismail.ahmed at inserm.fr>, Federico Calboli <f.calboli at imperial.ac.uk>,Tobias Erik Reiners, Peter Solymos, Anne Cori\cr
   and contributed datasets from: Katayoun Moazami-Goudarzi, Denis Laloë,
   Dominique Pontier, Daniel Maillard, Francois Balloux.
 }

Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd	2013-04-17 11:23:48 UTC (rev 1112)
+++ pkg/man/dapc.Rd	2013-04-24 17:08:08 UTC (rev 1113)
@@ -9,6 +9,7 @@
 \alias{print.dapc}
 \alias{summary.dapc}
 \alias{predict.dapc}
+\alias{xval.dapc}
 \alias{as.lda}
 \alias{as.lda.dapc}
 \title{Discriminant Analysis of Principal Components (DAPC)}
@@ -36,7 +37,8 @@
 
   - \code{print.dapc}: prints the content of a \code{dapc} object.\cr
   - \code{summary.dapc}: extracts useful information from a  \code{dapc} object.\cr
-  - \code{predict.dapc}: predicts group memberships based on DAPC results.
+  - \code{predict.dapc}: predicts group memberships based on DAPC results.\cr
+  - \code{xval.dapc}: performs cross-validation of DAPC function varying the number of PCs and keeping the number of DAs fixed. 
 
 
   DAPC implementation calls upon \code{\link[ade4]{dudi.pca}} from the
@@ -73,6 +75,8 @@
 
 \method{predict}{dapc}(object, newdata, prior = object$prior, dimen,
          method = c("plug-in", "predictive", "debiased"), ...)
+
+\method{xval}{dapc}(object, n.pca, n.da, training.set = 90)
 }
 \arguments{
   \item{x}{\code{a data.frame}, \code{matrix}, or \code{\linkS4class{genind}}
@@ -132,6 +136,10 @@
     original ('training') data. In particular, variables must be exactly
     the same as in the original data. For  \linkS4class{genind}
     objects, see \code{\link{repool}} to ensure matching of alleles.}
+  \item{training.set}{the percentage of individuals randomly chosen in each population 
+    as the training set used for cross-validation. This value is applied to all groups/pops 
+    defined in the object. The default is set to 90\%. 
+    For meaningful cross-validation it is recommended not to go below 80\%}
   \item{prior,dimen,method}{see \code{?predict.lda}.}
 }
 \details{
@@ -172,6 +180,8 @@
   \item{var.contr}{(optional) a data.frame giving the contributions of original
     variables (alleles in the case of genetic data) to the principal components
     of DAPC.}
+  \item{match.prp}{a list, where each item is the proportion of individuals 
+    correctly matched to their original population in cross-validation.}
 
   
   === other outputs ===\cr
@@ -181,7 +191,11 @@
   \code{assign.prop} (proportion of overall correct assignment),
   \code{assign.per.pop} (proportion of correct assignment per group),
   \code{prior.grp.size} (prior group sizes), and \code{post.grp.size} (posterior
-  group sizes).
+  group sizes),  \code{xval.dapc} (returns a list of four lists, each one with as 
+  many items as cross-validation runs.  The first item is a list of \code{assign} components,  
+  the secon is a list of \code{posterior} components, the thirs is a list of \code{ind.score}
+  components and the fourth is a list of \code{match.prp} items, i.e. the prortion of the validation
+  set correctly matched to its original population)
 }
 \references{
   Jombart T, Devillard S and Balloux F  (2010) Discriminant analysis of
@@ -291,8 +305,28 @@
 ## image using compoplot
 compoplot(dapc1, new.pred=hyb.pred, ncol=2)
 title("30 indiv popA, 30 indiv pop B, 30 hybrids")
+
+## CROSS-VALIDATION ##
+# select dataset
+data(microbov)
+summary(microbov) # the dataset contains 15 populations of different sizes
+
+# we take a fixed number of disriminant functions (15 in this case)
+# and we test how the cross-validation does varying the number of PCs
+# we specify the *maximum* number of PCs, and we will test how
+# the cross-validation performs by going from 1 PC to the maximum
+# we specified in the fucntion call
+
+crossval.test <- xval.dapc(microbov, n.pca = 40, n.da = 15, training.set = 90)
+
+attributes(crossval.test) # we get four lists of lists
+# namely "assign" "posterior" "ind.score" "match.prp"
+# a quick visual inspection of the cross-validation 
+
+plot(unlist(crossval.test$match.prp))
+
 }
 
 
 }
-\keyword{multivariate}
\ No newline at end of file
+\keyword{multivariate}



More information about the adegenet-commits mailing list