[adegenet-commits] r1128 - in pkg: . R man vignettes
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue May 14 23:54:19 CEST 2013
Author: jombart
Date: 2013-05-14 23:54:19 +0200 (Tue, 14 May 2013)
New Revision: 1128
Modified:
pkg/ChangeLog
pkg/R/dapc.R
pkg/R/find.clust.R
pkg/R/scale.R
pkg/R/spca.R
pkg/man/as.genlight.Rd
pkg/man/ascore.Rd
pkg/man/dapc.Rd
pkg/man/dapcGraphics.Rd
pkg/man/eHGDP.Rd
pkg/man/find.clusters.Rd
pkg/man/inbreeding.Rd
pkg/man/loadingplot.Rd
pkg/man/read.structure.Rd
pkg/man/scale.Rd
pkg/man/seqTrack.Rd
pkg/man/sequences.Rd
pkg/man/spca.Rd
pkg/man/spcaIllus.Rd
pkg/vignettes/adegenet-spca.Rnw
Log:
Fixed release 1.3-8; new version of xvalDapc;removed method argument in scaleGen
Modified: pkg/ChangeLog
===================================================================
--- pkg/ChangeLog 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/ChangeLog 2013-05-14 21:54:19 UTC (rev 1128)
@@ -7,8 +7,8 @@
o new function any2col translates (numeric, factor, character)
vectors into colors, also providing information for a legend
- o new function xval.dapc (and its wrapper xval), that performs
- cross-validation for a dapc analysis.
+ o new function xvalDapc which performs cross-validation for a dapc
+ analysis.
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/R/dapc.R 2013-05-14 21:54:19 UTC (rev 1128)
@@ -64,7 +64,7 @@
## keep relevant PCs - stored in XU
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
- if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
+ if(n.pca >= N) n.pca <- N-1
if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
n.pca <- round(n.pca)
@@ -152,7 +152,7 @@
## dapc.genind
#############
dapc.genind <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
- scale=FALSE, scale.method=c("sigma", "binom"), truenames=TRUE, var.contrib=TRUE, pca.info=TRUE,
+ scale=FALSE, truenames=TRUE, var.contrib=TRUE, pca.info=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, ...){
## FIRST CHECKS
@@ -176,7 +176,7 @@
## PERFORM PCA ##
maxRank <- min(dim(x at tab))
- X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
+ X <- scaleGen(x, center = TRUE, scale = scale,
missing = "mean", truenames = truenames)
## CALL DATA.FRAME METHOD ##
@@ -299,7 +299,7 @@
N <- nInd(x)
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
- if(n.pca >= N) stop("number of retained PCs of PCA is greater than N")
+ if(n.pca >= N) n.pca <- N-1
if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
U <- pcaX$loadings[, 1:n.pca, drop=FALSE] # principal axes
@@ -983,37 +983,89 @@
-## ############
-## ## crossval
-## ############
+############
+## crossval
+############
-xval.dapc <- function(object, n.pca, n.da, training.set = 90, ...){
- training.set = training.set/100
- kept.id <- unlist(tapply(1:nInd(object), pop(object), function(e) {pop.size = length(e); pop.size.train = round(pop.size * training.set); sample(e, pop.size.train, replace=FALSE)}))
- training <- object[kept.id]
- validating <- object[-kept.id]
- post = vector(mode = 'list', length = n.pca)
- asgn = vector(mode = 'list', length = n.pca)
- ind = vector(mode = 'list', length = n.pca)
- mtch = vector(mode = 'list', length = n.pca)
- for(i in 1:n.pca){
- dapc.base = dapc(training, n.pca = i, n.da = 15)
- dapc.p = predict.dapc(dapc.base, newdata = validating)
- match.prp = mean(as.character(dapc.p$assign)==as.character(pop(validating)))
- post[[i]] = dapc.p$posterior
- asgn[[i]] = dapc.p$assign
- ind[[i]] = dapc.p$ind.score
- mtch[[i]] = match.prp
- }
- res = list(assign = asgn, posterior = post, ind.score = ind, match.prp = mtch)
- return(res)
-} # end of xval.dapc
+xvalDapc <- function (x, ...) UseMethod("xvalDapc")
-xval <- function (object, n.pca, n.da, training.set, ...) UseMethod("xval")
-xval.genind <- function(object, n.pca, n.da, training.set = 90, ...){
- res = xval.dapc(object = object, n.pca = n.pca, n.da = n.da, training.set = training.set)
- return(res)
+xvalDapc.data.frame <- function(x, grp, n.pca.max, n.da=NULL, training.set = 1/2,
+ center=TRUE, scale=FALSE,
+ n.pca=NULL, n.rep=10, ...){
+
+ ## CHECKS ##
+ grp <- factor(grp)
+ n.pca <- n.pca[n.pca>0]
+ if(is.null(n.da)) {
+ n.da <- length(levels(grp))-1
+ }
+
+ ## GET TRAINING SET SIZE ##
+ N <- nrow(x)
+ N.training <- round(N*training.set)
+
+ ## GET FULL PCA ##
+ pcaX <- dudi.pca(x, nf=n.pca.max, scannf=FALSE, center=center, scale=scale)
+ n.pca.max <- min(n.pca.max,pcaX$rank,N.training-1)
+
+ ## DETERMINE N.PCA IF NEEDED ##
+ if(is.null(n.pca)){
+ n.pca <- round(pretty(1:n.pca.max,10))
+ }
+ n.pca <- n.pca[n.pca>0 & n.pca<(N.training-1)]
+
+ ## FUNCTION GETTING THE % OF ACCURATE PREDICTION FOR ONE NUMBER OF PCA PCs ##
+ ## n.pca is a number of retained PCA PCs
+ get.prop.pred <- function(n.pca){
+ f1 <- function(){
+ toKeep <- sample(1:N, N.training)
+ temp.pca <- pcaX
+ temp.pca$li <- temp.pca$li[toKeep,,drop=FALSE]
+ temp.dapc <- suppressWarnings(dapc(x[toKeep,,drop=FALSE], grp[toKeep], n.pca=n.pca, n.da=n.da, dudi=temp.pca))
+ temp.pred <- predict.dapc(temp.dapc, newdata=x[-toKeep,,drop=FALSE])
+ return(mean(temp.pred$assign==grp[-toKeep]))
+ }
+ return(replicate(n.rep, f1()))
+ }
+
+
+ ## GET %SUCCESSFUL OF ACCURATE PREDICTION FOR ALL VALUES ##
+ res.all <- unlist(lapply(n.pca, get.prop.pred))
+ res <- list(success=res.all, n.pca=factor(rep(n.pca, each=n.rep)))
+ return(res)
}
+
+
+xvalDapc.matrix <- xvalDapc.data.frame
+
+
+## There's a bunch of problems down there, commenting it for nowé
+## xval.dapc <- function(object, n.pca, n.da, training.set = 90, ...){
+## training.set = training.set/100
+## kept.id <- unlist(tapply(1:nInd(object), pop(object), function(e) {pop.size = length(e); pop.size.train = round(pop.size * training.set); sample(e, pop.size.train, replace=FALSE)})) # this can't work: nInd/pop not defined for DAPC objects
+## training <- object[kept.id]
+## validating <- object[-kept.id]
+## post = vector(mode = 'list', length = n.pca)
+## asgn = vector(mode = 'list', length = n.pca)
+## ind = vector(mode = 'list', length = n.pca)
+## mtch = vector(mode = 'list', length = n.pca)
+## for(i in 1:n.pca){
+## dapc.base = dapc(training, n.pca = i, n.da = 15) # Why 15??
+## dapc.p = predict.dapc(dapc.base, newdata = validating)
+## match.prp = mean(as.character(dapc.p$assign)==as.character(pop(validating)))
+## post[[i]] = dapc.p$posterior
+## asgn[[i]] = dapc.p$assign
+## ind[[i]] = dapc.p$ind.score
+## mtch[[i]] = match.prp
+## }
+## res = list(assign = asgn, posterior = post, ind.score = ind, match.prp = mtch)
+## return(res)
+## } # end of xval.dapc
+
+## xval.genind <- function(object, n.pca, n.da, training.set = 90, ...){
+## res = xval.dapc(object = object, n.pca = n.pca, n.da = n.da, training.set = training.set)
+## return(res)
+## }
## ###############
## ## randtest.dapc
## ###############
Modified: pkg/R/find.clust.R
===================================================================
--- pkg/R/find.clust.R 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/R/find.clust.R 2013-05-14 21:54:19 UTC (rev 1128)
@@ -193,7 +193,7 @@
find.clusters.genind <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat=c("BIC", "AIC", "WSS"), choose.n.clust=TRUE,
criterion=c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
max.n.clust=round(nrow(x at tab)/10), n.iter=1e5, n.start=10,
- scale=FALSE, scale.method=c("sigma", "binom"), truenames=TRUE, ...){
+ scale=FALSE, truenames=TRUE, ...){
## CHECKS ##
if(!require(ade4, quietly=TRUE)) stop("ade4 library is required.")
@@ -210,7 +210,7 @@
## PERFORM PCA ##
maxRank <- min(dim(x at tab))
- X <- scaleGen(x, center = TRUE, scale = scale, method = scale.method,
+ X <- scaleGen(x, center = TRUE, scale = scale,
missing = "mean", truenames = truenames)
## CALL DATA.FRAME METHOD
Modified: pkg/R/scale.R
===================================================================
--- pkg/R/scale.R 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/R/scale.R 2013-05-14 21:54:19 UTC (rev 1128)
@@ -4,13 +4,11 @@
setGeneric("scaleGen", function(x,...){standardGeneric("scaleGen")})
setMethod("scaleGen", "genind", function(x, center=TRUE, scale=TRUE,
- method=c("sigma", "binom"), missing=c("NA","0","mean"), truenames=TRUE){
+ missing=c("NA","0","mean"), truenames=TRUE){
THRES <- 1e-10
- method <- match.arg(method)
missing <- match.arg(missing)
## checkType(x)
- if(method=="binom" & x at type=="PA") stop("This scaling is not available for presence/absence markers.")
## handle "missing" arg
if(missing %in% c("0","mean")){
@@ -18,7 +16,7 @@
}
## handle specific cases
- if(scale[1] & tolower(method)=="binom"){
+ if(scale[1]){
## get allele freq
temp <- apply(x$tab,2,mean,na.rm=TRUE)
if(x at type=="codom"){
@@ -59,13 +57,11 @@
setMethod("scaleGen", "genpop", function(x, center=TRUE, scale=TRUE,
- method=c("sigma", "binom"), missing=c("NA","0","mean"), truenames=TRUE){
+ missing=c("NA","0","mean"), truenames=TRUE){
THRES <- 1e-10
- method <- match.arg(method)
missing <- match.arg(missing)
## checkType(x)
- if(method=="binom" & x at type=="PA") stop("This scaling is not available for presence/absence markers.")
## make allele frequencies here
if(x at type=="codom"){
@@ -75,7 +71,7 @@
}
## handle specific cases
- if(scale[1] & tolower(method)=="binom"){
+ if(scale[1]){
## get allele freq
temp <- apply(X,2,mean,na.rm=TRUE)
if(x at type=="codom"){
Modified: pkg/R/spca.R
===================================================================
--- pkg/R/spca.R 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/R/spca.R 2013-05-14 21:54:19 UTC (rev 1128)
@@ -16,7 +16,7 @@
# spca genind
################
spca <- function(obj, xy=NULL, cn=NULL, matWeight=NULL,
- scale=FALSE, scale.method=c("sigma","binom"),
+ scale=FALSE,
scannf=TRUE, nfposi=1, nfnega=1,
type=NULL, ask=TRUE, plot.nb=TRUE, edit.nb=FALSE,
truenames=TRUE, d1=NULL, d2=NULL, k=NULL, a=NULL, dmin=NULL){
@@ -89,7 +89,7 @@
}
## handle NAs, centring and scaling
- X <- scaleGen(obj, center=TRUE, scale=scale, method=scale.method, missing="mean", truenames=truenames)
+ X <- scaleGen(obj, center=TRUE, scale=scale, missing="mean", truenames=truenames)
## perform analyses
pcaX <- dudi.pca(X, center=FALSE, scale=FALSE, scannf=FALSE)
Modified: pkg/man/as.genlight.Rd
===================================================================
--- pkg/man/as.genlight.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/as.genlight.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -45,6 +45,7 @@
- \code{\linkS4class{genind}}
}
\examples{
+\dontrun{
## data to be converted
dat <- list(toto=c(1,1,0,0,2,2,1,2,NA), titi=c(NA,1,1,0,1,1,1,0,0), tata=c(NA,0,3, NA,1,1,1,0,0))
@@ -58,7 +59,7 @@
identical(x1,x2)
identical(x1,x3)
+}
-
}
\keyword{classes}
Modified: pkg/man/ascore.Rd
===================================================================
--- pkg/man/ascore.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/ascore.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -10,7 +10,8 @@
\usage{
a.score(x, n.sim=10, \ldots)
-optim.a.score(x, n.pca=1:ncol(x$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10, n.da=length(levels(x$grp)), \ldots)
+optim.a.score(x, n.pca=1:ncol(x$tab), smart=TRUE, n=10, plot=TRUE,
+ n.sim=10, n.da=length(levels(x$grp)), \ldots)
}
\arguments{
\item{x}{a \code{dapc} object.}
Modified: pkg/man/dapc.Rd
===================================================================
--- pkg/man/dapc.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/dapc.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -9,9 +9,9 @@
\alias{print.dapc}
\alias{summary.dapc}
\alias{predict.dapc}
-\alias{xval.dapc}
-\alias{xval}
-\alias{xval.genind}
+\alias{xvalDapc}
+\alias{xvalDapc.data.frame}
+\alias{xvalDapc.matrix}
\alias{as.lda}
\alias{as.lda.dapc}
\title{Discriminant Analysis of Principal Components (DAPC)}
@@ -31,18 +31,19 @@
- \code{matrix} (only numeric data)\cr
- \code{\linkS4class{genind}} objects (genetic markers)\cr
- \code{\linkS4class{genlight}} objects (genome-wide SNPs)
-
+
These methods all return an object with class \code{dapc}.
Functions that can be applied to these objects are (the ".dapc" can be
ommitted):
- \code{print.dapc}: prints the content of a \code{dapc} object.\cr
- - \code{summary.dapc}: extracts useful information from a \code{dapc} object.\cr
+ - \code{summary.dapc}: extracts useful information from a \code{dapc}
+ object.\cr
- \code{predict.dapc}: predicts group memberships based on DAPC results.\cr
- - \code{xval.dapc}: performs cross-validation of DAPC function varying the number of PCs and keeping the number of DAs fixed.
- - \code{xval}: performs cross-validation of DAPC function varying the number of PCs and keeping the number of DAs fixed.
- - \code{xval.genind}: performs cross-validation of DAPC function varying the number of PCs and keeping the number of DAs fixed.
+ - \code{xvalDapc}: performs cross-validation of DAPC using varying
+ numbers of PCs (and keeping the number of discriminant functions
+ fixed); it currently has methods for \code{data.frame} and \code{matrix}.\cr
@@ -55,7 +56,6 @@
\code{as.lda} is a generic with a method for \code{dapc} object which
converts these objects into outputs similar to that of
\code{lda.default}.
-
}
\usage{
\method{dapc}{data.frame}(x, grp, n.pca=NULL, n.da=NULL, center=TRUE,
@@ -65,8 +65,8 @@
\method{dapc}{matrix}(x, \ldots)
\method{dapc}{genind}(x, pop=NULL, n.pca=NULL, n.da=NULL, scale=FALSE,
- scale.method=c("sigma", "binom"), truenames=TRUE, var.contrib=TRUE,
- pca.info=TRUE, pca.select=c("nbEig","percVar"), perc.pca=NULL, \ldots)
+ truenames=TRUE, var.contrib=TRUE, pca.info=TRUE,
+ pca.select=c("nbEig","percVar"), perc.pca=NULL, \ldots)
\method{dapc}{genlight}(x, pop = NULL, n.pca = NULL, n.da = NULL, scale
= FALSE, var.contrib = TRUE, pca.info=TRUE, pca.select = c("nbEig", "percVar"),
@@ -81,9 +81,13 @@
\method{predict}{dapc}(object, newdata, prior = object$prior, dimen,
method = c("plug-in", "predictive", "debiased"), ...)
-\method{xval}{dapc}(object, n.pca, n.da, training.set = 90, \ldots)
+\method{xvalDapc}{data.frame}(x, grp, n.pca.max, n.da=NULL,
+ training.set = 1/2, center=TRUE, scale=FALSE,
+ n.pca=NULL, n.rep=10, \ldots)
-\method{xval}{genind}(object, n.pca, n.da, training.set = 90, \ldots)
+\method{xvalDapc}{matrix}(x, grp, n.pca.max, n.da=NULL,
+ training.set = 1/2, center=TRUE, scale=FALSE,
+ n.pca=NULL, n.rep=10, \ldots)
}
\arguments{
\item{x}{\code{a data.frame}, \code{matrix}, or \code{\linkS4class{genind}}
@@ -100,8 +104,7 @@
\item{scale}{a \code{logical} indicating whether variables should be scaled
(TRUE) or not (FALSE, default). Scaling consists in dividing variables by their
(estimated) standard deviation to account for trivial differences in
- variances. Further scaling options are available for \linkS4class{genind}
- objects (see argument \code{scale.method}).}
+ variances.}
\item{var.contrib}{a \code{logical} indicating whether the
contribution of original variables (alleles, for \linkS4class{genind} objects)
should be provided (TRUE, default) or not (FALSE). Such output can be useful,
@@ -127,10 +130,6 @@
dimension reduction is not performed (saving computational time) but
taken directly from this object.}
\item{object}{a \code{dapc} object.}
- \item{scale.method}{a \code{character} specifying the scaling method to be used
- for allele frequencies, which must match "sigma" (usual estimate of standard
- deviation) or "binom" (based on binomial distribution). See \code{\link{scaleGen}} for
- further details.}
\item{truenames}{a \code{logical} indicating whether true (i.e., user-specified)
labels should be used in object outputs (TRUE, default) or not (FALSE).}
\item{dudi}{optionally, a multivariate analysis with the class
@@ -143,10 +142,11 @@
original ('training') data. In particular, variables must be exactly
the same as in the original data. For \linkS4class{genind}
objects, see \code{\link{repool}} to ensure matching of alleles.}
- \item{training.set}{the percentage of individuals randomly chosen in each population
- as the training set used for cross-validation. This value is applied to all groups/pops
- defined in the object. The default is set to 90\%.
- For meaningful cross-validation it is recommended not to go below 80\%}
+ \item{n.pca.max}{maximum number of PCA components to retain.}
+ \item{training.set}{the proportion of data (individuals) to be used
+ for the training set; defaults to one half.}
+ \item{n.rep}{the number of replicate to be used for each number of PCA
+ components retained.}
\item{prior,dimen,method}{see \code{?predict.lda}.}
}
\details{
@@ -179,8 +179,9 @@
\item{tab}{matrix of retained principal components of PCA}
\item{loadings}{principal axes of DAPC, giving coefficients of the linear
combination of retained PCA axes.}
- \item{ind.coord}{principal components of DAPC, giving the coordinates of individuals onto
- principal axes of DAPC; also called the discriminant functions.}
+ \item{ind.coord}{principal components of DAPC, giving the coordinates
+ of individuals onto principal axes of DAPC; also called the
+ discriminant functions.}
\item{grp.coord}{coordinates of the groups onto the principal axes of DAPC.}
\item{posterior}{a data.frame giving posterior membership probabilities for
all individuals and all clusters.}
@@ -198,10 +199,12 @@
\code{assign.prop} (proportion of overall correct assignment),
\code{assign.per.pop} (proportion of correct assignment per group),
\code{prior.grp.size} (prior group sizes), and \code{post.grp.size} (posterior
- group sizes), \code{xval.dapc}, \code{xval.genind} and \code{xval} (all return a list of four lists, each one with as
- many items as cross-validation runs. The first item is a list of \code{assign} components,
- the secon is a list of \code{posterior} components, the thirs is a list of \code{ind.score}
- components and the fourth is a list of \code{match.prp} items, i.e. the prortion of the validation
+ group sizes), \code{xval.dapc}, \code{xval.genind} and \code{xval}
+ (all return a list of four lists, each one with as many items as
+ cross-validation runs. The first item is a list of \code{assign}
+ components, the secon is a list of \code{posterior} components, the
+ thirs is a list of \code{ind.score} components and the fourth is a
+ list of \code{match.prp} items, i.e. the prortion of the validation
set correctly matched to its original population)
}
\references{
@@ -314,30 +317,12 @@
title("30 indiv popA, 30 indiv pop B, 30 hybrids")
## CROSS-VALIDATION ##
-# select dataset
-data(microbov)
-summary(microbov) # the dataset contains 15 populations of different sizes
+data(sim2pop)
+xval <- xvalDapc(sim2pop at tab, pop(sim2pop), n.pca.max=100, n.rep=3)
+xval
+boxplot(xval$success~xval$n.pca, xlab="Number of PCA components",
+ylab="Classification succes", main="DAPC - cross-validation")
-# we take a fixed number of disriminant functions (15 in this case)
-# and we test how the cross-validation does varying the number of PCs
-# we specify the *maximum* number of PCs, and we will test how
-# the cross-validation performs by going from 1 PC to the maximum
-# we specified in the fucntion call
-
-crossval.test <- xval.dapc(microbov, n.pca = 40, n.da = 15, training.set = 90)
-
-attributes(crossval.test) # we get four lists of lists
-# namely "assign" "posterior" "ind.score" "match.prp"
-# a quick visual inspection of the cross-validation
-
-plot(unlist(crossval.test$match.prp))
-
-# the use can also just call xval:
-crossval.test2 <- xval(microbov, n.pca = 40, n.da = 15, training.set = 90)
-plot(unlist(crossval.test2$match.prp))
-
-
-
}
Modified: pkg/man/dapcGraphics.Rd
===================================================================
--- pkg/man/dapcGraphics.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/dapcGraphics.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -22,23 +22,23 @@
}
\usage{
\method{scatter}{dapc}(x, xax=1, yax=2, grp=x$grp, col=rainbow(length(levels(grp))),
- pch=20, bg="lightgrey", solid=.7, scree.da=TRUE,
- scree.pca=FALSE, posi.da="bottomright",
- posi.pca="bottomleft", bg.inset="white", ratio.da=.25,
- ratio.pca=.25, inset.da=0.02, inset.pca=0.02,
- inset.solid=.5, onedim.filled=TRUE, mstree=FALSE, lwd=1,
- lty=1, segcol="black", legend=FALSE, posi.leg="topright",
- cleg=1, txt.leg=levels(grp), cstar = 1, cellipse = 1.5,
- axesell = FALSE, label = levels(grp), clabel = 1, xlim =
- NULL, ylim = NULL, grid = FALSE, addaxes = TRUE, origin =
- c(0,0), include.origin = TRUE, sub = "", csub = 1, possub =
- "bottomleft", cgrid = 1, pixmap = NULL, contour = NULL, area
- = NULL, \ldots)
+ pch=20, bg="lightgrey", solid=.7, scree.da=TRUE,
+ scree.pca=FALSE, posi.da="bottomright",
+ posi.pca="bottomleft", bg.inset="white", ratio.da=.25,
+ ratio.pca=.25, inset.da=0.02, inset.pca=0.02,
+ inset.solid=.5, onedim.filled=TRUE, mstree=FALSE, lwd=1,
+ lty=1, segcol="black", legend=FALSE, posi.leg="topright",
+ cleg=1, txt.leg=levels(grp), cstar = 1, cellipse = 1.5,
+ axesell = FALSE, label = levels(grp), clabel = 1, xlim =
+ NULL, ylim = NULL, grid = FALSE, addaxes = TRUE, origin =
+ c(0,0), include.origin = TRUE, sub = "", csub = 1, possub =
+ "bottomleft", cgrid = 1, pixmap = NULL, contour = NULL, area
+ = NULL, \ldots)
assignplot(x, only.grp=NULL, subset=NULL, new.pred=NULL, cex.lab=.75,pch=3)
compoplot(x, only.grp=NULL, subset=NULL, new.pred=NULL, col=NULL, lab=NULL,
- legend=TRUE, txt.leg=NULL, ncol=4, posi=NULL, cleg=.8, bg=transp("white"), ...)
+ legend=TRUE, txt.leg=NULL, ncol=4, posi=NULL, cleg=.8, bg=transp("white"), ...)
}
\arguments{
\item{x}{a \code{dapc} object.}
@@ -156,7 +156,8 @@
inset.pca=c(.01,.3), lab=paste("year\n",2001:2006), axesel=FALSE, col=terrain.colors(10))
## without ellipses, use legend for groups
-scatter(dapc1, cell=0, cstar=0, scree.da=FALSE, clab=0, cex=3, solid=.4, bg="white", leg=TRUE, posi.leg="topleft")
+scatter(dapc1, cell=0, cstar=0, scree.da=FALSE, clab=0, cex=3,
+solid=.4, bg="white", leg=TRUE, posi.leg="topleft")
## only one axis
scatter(dapc1,1,1,scree.da=FALSE, legend=TRUE, solid=.4,bg="white")
@@ -174,7 +175,8 @@
dapc2
## plot results
-scatter(dapc2, scree.da=FALSE, leg=TRUE, txt.leg=paste("group", c('A','B')), col=c("red","blue"))
+scatter(dapc2, scree.da=FALSE, leg=TRUE, txt.leg=paste("group",
+c('A','B')), col=c("red","blue"))
## SNP contributions
loadingplot(dapc2$var.contr)
Modified: pkg/man/eHGDP.Rd
===================================================================
--- pkg/man/eHGDP.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/eHGDP.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -30,7 +30,7 @@
d'Etude du Polymorphisme Humain (CEPH). See reference [4] for Native
American populations.
- This copy of the dataset was prepared by Francois Balloux (f.balloux at imperial.ac.uk).
+ This copy of the dataset was prepared by Francois Balloux.
}
\references{
[1] Rosenberg NA, Pritchard JK, Weber JL, Cann HM, Kidd KK, et
@@ -62,7 +62,8 @@
## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
## to reproduce exactly analyses from the paper, use "n.pca=1000"
-dapc1 <- dapc(eHGDP, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes 2 minutes
+dapc1 <- dapc(eHGDP, all.contrib=TRUE, scale=FALSE,
+n.pca=200, n.da=80) # takes 2 minutes
dapc1
## (see ?dapc for details about the output)
@@ -70,7 +71,8 @@
## SCREEPLOT OF EIGENVALUES
-barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues", col=c("red","green","blue", rep("grey", 1000)))
+barplot(dapc1$eig, main="eHGDP - DAPC eigenvalues",
+col=c("red","green","blue", rep("grey", 1000)))
@@ -111,7 +113,8 @@
## and then
## plot(grp$Kstat, type="b", col="blue")
-grp <- find.clusters(eHGDP, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+grp <- find.clusters(eHGDP, max.n=30, n.pca=200,
+scale=FALSE, n.clust=4) # takes about 2 minutes
names(grp)
## (see ?find.clusters for details about the output)
@@ -120,7 +123,8 @@
## PERFORM DAPC - USE POPULATIONS AS CLUSTERS
## to reproduce exactly analyses from the paper, use "n.pca=1000"
-dapc2 <- dapc(eHGDP, pop=grp$grp, all.contrib=TRUE, scale=FALSE, n.pca=200, n.da=80) # takes around a 1 minute
+dapc2 <- dapc(eHGDP, pop=grp$grp, all.contrib=TRUE,
+scale=FALSE, n.pca=200, n.da=80) # takes around a 1 minute
dapc2
@@ -131,7 +135,8 @@
## MAP DAPC2 RESULTS
if(require(maps)){
-xy <- cbind(eHGDP$other$popInfo$Longitude, eHGDP$other$popInfo$Latitude)
+xy <- cbind(eHGDP$other$popInfo$Longitude,
+eHGDP$other$popInfo$Latitude)
myCoords <- apply(dapc2$ind.coord, 2, tapply, pop(eHGDP), mean)
Modified: pkg/man/find.clusters.Rd
===================================================================
--- pkg/man/find.clusters.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/find.clusters.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -50,8 +50,7 @@
stat=c("BIC","AIC", "WSS"), choose.n.clust=TRUE,
criterion=c("diffNgroup", "min","goesup", "smoothNgoesup",
"goodfit"), max.n.clust=round(nrow(x at tab)/10), n.iter=1e5,
- n.start=10, scale=FALSE, scale.method=c("sigma", "binom"),
- truenames=TRUE, \ldots)
+ n.start=10, scale=FALSE, truenames=TRUE, \ldots)
\method{find.clusters}{genlight}(x, clust=NULL, n.pca=NULL,
n.clust=NULL, stat=c("BIC", "AIC",
@@ -118,11 +117,7 @@
percentage (interactively, or via \code{perc.pca}). }
\item{perc.pca}{a \code{numeric} value between 0 and 100 indicating the
minimal percentage of the total variance of the data to be expressed by the
- retained axes of PCA.}
- \item{scale.method}{a \code{character} specifying the scaling method to be used
- for allele frequencies, which must match "sigma" (usual estimate of standard
- deviation) or "binom" (based on binomial distribution). See
- \code{\link{scaleGen}} for further details.}
+ retained axes of PCA.}
\item{truenames}{a \code{logical} indicating whether true (i.e., user-specified)
labels should be used in object outputs (TRUE, default) or not
(FALSE), in which case generic labels are used.}
@@ -231,7 +226,8 @@
data(eHGDP)
## here, n.clust is specified, so that only on K value is used
-grp <- find.clusters(eHGDP, max.n=30, n.pca=200, scale=FALSE, n.clust=4) # takes about 2 minutes
+grp <- find.clusters(eHGDP, max.n=30, n.pca=200, scale=FALSE,
+n.clust=4) # takes about 2 minutes
names(grp)
grp$Kstat
grp$stat
@@ -258,7 +254,8 @@
## DETECTION WITH AIC (less clear-cut)
foo.AIC <- find.clusters(sim2pop, n.pca=100, choose=FALSE, stat="AIC")
-plot(foo.AIC$Kstat, type="o", xlab="number of clusters (K)", ylab="AIC", col="purple", main="Detection based on AIC")
+plot(foo.AIC$Kstat, type="o", xlab="number of clusters (K)",
+ylab="AIC", col="purple", main="Detection based on AIC")
points(2, foo.AIC$Kstat[2], pch="x", cex=3)
mtext(3, tex="'X' indicates the actual number of clusters")
@@ -276,7 +273,8 @@
x
plot(x)
grp <- find.clusters(x, n.pca=100, choose=FALSE, stat="BIC")
-plot(grp$Kstat, type="o", xlab="number of clusters (K)",ylab="BIC",main="find.clusters on a genlight object\n(two groups)")
+plot(grp$Kstat, type="o", xlab="number of clusters (K)",
+ylab="BIC",main="find.clusters on a genlight object\n(two groups)")
}
}
\keyword{multivariate}
Modified: pkg/man/inbreeding.Rd
===================================================================
--- pkg/man/inbreeding.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/inbreeding.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -11,7 +11,8 @@
1 is acceptable.
}
\usage{
-inbreeding(x, pop = NULL, truenames = TRUE, res.type = c("sample", "function"), N = 200, M = N * 10)
+inbreeding(x, pop = NULL, truenames = TRUE, res.type = c("sample","function"),
+ N = 200, M = N * 10)
}
\arguments{
\item{x}{an object of class \linkS4class{genind}.}
@@ -22,9 +23,9 @@
used (TRUE, default) instead of generic labels (FALSE); used if
res.type is "matrix".}
\item{res.type}{a character string matching "sample" or "function",
- specifying whether the output should be a function giving the density of probability
- of F values ("function") or a sample of F values taken from this
- distribution ("sample", default).}
+ specifying whether the output should be a function giving the density
+ of probability of F values ("function") or a sample of F values taken
+ from this distribution ("sample", default).}
\item{N}{an integer indicating the size of the sample to be taken from
the distribution of F values.}
\item{M}{an integer indicating the number of different F values to be
@@ -43,10 +44,10 @@
probability for an individual to inherit two identical alleles from a
single ancestor.
- Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the population. Let
- \eqn{h} be an variable which equates 1 if the individual is
- homozygote, and 0 otherwise. For one locus, the probability of being
- homozygote is computed as:
+ Let \eqn{p_i} refer to the frequency of allele \eqn{i} in the
+ population. Let \eqn{h} be an variable which equates 1 if the
+ individual is homozygote, and 0 otherwise. For one locus, the
+ probability of being homozygote is computed as:
\eqn{ F + (1-F) \sum_i p_i^2}
@@ -72,11 +73,13 @@
Fsamp <- inbreeding(lagun, N=30)
## plot the first 10 results
-invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F", xlim=c(0,1), main="Density of the sampled F values")))
+invisible(sapply(Fsamp[1:10], function(e) plot(density(e), xlab="F",
+xlim=c(0,1), main="Density of the sampled F values")))
## compute means for all individuals
Fmean=sapply(Fsamp, mean)
-hist(Fmean, col="orange", xlab="mean value of F", main="Distribution of mean F across individuals")
+hist(Fmean, col="orange", xlab="mean value of F",
+main="Distribution of mean F across individuals")
## estimate inbreeding - return proba density functions
Fdens <- inbreeding(lagun, res.type="function")
@@ -85,6 +88,7 @@
Fdens[[1]]
## plot the first 10 functions
-invisible(sapply(Fdens[1:10], plot, ylab="Density", main="Density of probability of F values"))
+invisible(sapply(Fdens[1:10], plot, ylab="Density",
+main="Density of probability of F values"))
}
}
\ No newline at end of file
Modified: pkg/man/loadingplot.Rd
===================================================================
--- pkg/man/loadingplot.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/loadingplot.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -13,10 +13,11 @@
\usage{
loadingplot(x, \dots)
-\method{loadingplot}{default}(x, at=NULL, threshold=quantile(x,0.75), axis=1, fac=NULL, byfac=FALSE,
+\method{loadingplot}{default}(x, at=NULL, threshold=quantile(x,0.75),
+ axis=1, fac=NULL, byfac=FALSE,
lab=NULL, cex.lab=0.7, cex.fac=1, lab.jitter=0,
- main="Loading plot", xlab="Variables", ylab="Loadings", srt = 0, adj = NULL, \dots)
-
+ main="Loading plot", xlab="Variables", ylab="Loadings",
+ srt = 0, adj = NULL, \dots)
}
\arguments{
\item{x}{either a vector with numeric values to be plotted, or a
Modified: pkg/man/read.structure.Rd
===================================================================
--- pkg/man/read.structure.Rd 2013-05-14 15:21:10 UTC (rev 1127)
+++ pkg/man/read.structure.Rd 2013-05-14 21:54:19 UTC (rev 1128)
@@ -17,7 +17,9 @@
\code{\link{df2genind}}.
}
\usage{
-read.structure(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL, col.lab=NULL, col.pop=NULL, col.others=NULL, row.marknames=NULL, NA.char="-9", pop=NULL, missing=NA, ask=TRUE, quiet=FALSE)
+read.structure(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
+ col.lab=NULL, col.pop=NULL, col.others=NULL, row.marknames=NULL,
+ NA.char="-9", pop=NULL, missing=NA, ask=TRUE, quiet=FALSE)
}
\arguments{
\item{file}{ a character string giving the path to the file to
@@ -31,9 +33,9 @@
labels of genotypes. '0' if absent.}
\item{col.pop}{an integer giving the index of the column containing
population to which genotypes belong. '0' if absent.}
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/adegenet -r 1128
More information about the adegenet-commits
mailing list