[adegenet-commits] r851 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Mar 29 22:18:07 CEST 2011
Author: jombart
Date: 2011-03-29 22:18:07 +0200 (Tue, 29 Mar 2011)
New Revision: 851
Modified:
pkg/R/dapc.R
Log:
first attempt for predict.dapc
bit of a pain since new info must be stored in DAPC object
made this optional through argument pca.loadings
Modified: pkg/R/dapc.R
===================================================================
--- pkg/R/dapc.R 2011-03-11 18:09:56 UTC (rev 850)
+++ pkg/R/dapc.R 2011-03-29 20:18:07 UTC (rev 851)
@@ -7,7 +7,7 @@
## dapc.data.frame
###################
dapc.data.frame <- function(x, grp, n.pca=NULL, n.da=NULL,
- center=TRUE, scale=FALSE, var.contrib=TRUE,
+ center=TRUE, scale=FALSE, pca.loadings=TRUE, var.contrib=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, ..., dudi=NULL){
## FIRST CHECKS
@@ -97,6 +97,7 @@
res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
+ res$means <- ldaX$means
res$ind.coord <-predX$x
res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
res$prior <- ldaX$prior
@@ -104,7 +105,15 @@
res$assign <- predX$class
res$call <- match.call()
- ## optional: get loadings of alleles
+
+ ## optional: store loadings of variables
+ if(pca.loadings){
+ res$pca.loadings <- as.matrix(U)
+ res$pca.cent <- pcaX$cent
+ res$pca.norm <- pcaX$norm
+ }
+
+ ## optional: get loadings of variables
if(var.contrib){
res$var.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
f1 <- function(x){
@@ -137,7 +146,7 @@
## dapc.genind
#############
dapc.genind <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
- scale=FALSE, scale.method=c("sigma", "binom"), truenames=TRUE, var.contrib=TRUE,
+ scale=FALSE, scale.method=c("sigma", "binom"), truenames=TRUE, pca.loadings=TRUE, var.contrib=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, ...){
## FIRST CHECKS
@@ -194,7 +203,7 @@
## dapc.genlight
#################
dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
- scale=FALSE, var.contrib=TRUE,
+ scale=FALSE, pca.loadings=TRUE, var.contrib=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, glPca=NULL, ...){
## FIRST CHECKS ##
if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
@@ -240,7 +249,7 @@
} else {
myCol <- "black"
}
-
+
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
@@ -309,6 +318,7 @@
res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
+ res$means <- ldaX$means
res$ind.coord <-predX$x
res$grp.coord <- apply(res$ind.coord, 2, tapply, pop.fac, mean)
res$prior <- ldaX$prior
@@ -316,7 +326,15 @@
res$assign <- predX$class
res$call <- match.call()
- ## optional: get loadings of alleles
+
+ ## optional: store loadings of variables
+ if(pca.loadings){
+ res$pca.loadings <- as.matrix(U)
+ res$pca.cent <- glMean(toto,alleleAsUnit=FALSE)
+ res$pca.norm <- sqrt(glVar(toto,alleleAsUnit=FALSE))
+ }
+
+ ## optional: get loadings of variables
if(var.contrib){
res$var.contr <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
f1 <- function(x){
@@ -357,37 +375,55 @@
cat(" ...\n\n")
## vectors
- sumry <- array("", c(4, 3), list(1:4, c("vector", "length", "content")))
+ TABDIM <- 4
+ if(!is.null(x$pca.loadings)){
+ TABDIM <- TABDIM + 2
+ }
+ sumry <- array("", c(TABDIM, 3), list(1:4, c("vector", "length", "content")))
sumry[1, ] <- c('$eig', length(x$eig), 'eigenvalues')
sumry[2, ] <- c('$grp', length(x$grp), 'prior group assignment')
sumry[3, ] <- c('$prior', length(x$prior), 'prior group probabilities')
sumry[4, ] <- c('$assign', length(x$assign), 'posterior group assignment')
+ if(!is.null(x$pca.loadings)){
+ sumry[5, ] <- c('$pca.cent', length(x$pca.cent), 'centring vector of PCA')
+ sumry[6, ] <- c('$pca.norm', length(x$pca.norm), 'scaling vector of PCA')
+ }
class(sumry) <- "table"
print(sumry)
## data.frames
cat("\n")
+ TABDIM <- 6
+ if(!is.null(x$pca.loadings)){
+ TABDIM <- TABDIM + 1
+ }
if(!is.null(x$var.contr)){
- sumry <- array("", c(6, 4), list(1:6, c("data.frame", "nrow", "ncol", "content")))
- } else {
- sumry <- array("", c(5, 4), list(1:5, c("data.frame", "nrow", "ncol", "content")))
+ TABDIM <- TABDIM + 1
}
+
+ sumry <- array("", c(TABDIM, 4), list(1:5, c("data.frame", "nrow", "ncol", "content")))
+
sumry[1, ] <- c("$tab", nrow(x$tab), ncol(x$tab), "retained PCs of PCA")
- sumry[2, ] <- c("$loadings", nrow(x$loadings), ncol(x$loadings), "loadings of variables")
- sumry[3, ] <- c("$ind.coord", nrow(x$ind.coord), ncol(x$ind.coord), "coordinates of individuals (principal components)")
- sumry[4, ] <- c("$grp.coord", nrow(x$grp.coord), ncol(x$grp.coord), "coordinates of groups")
- sumry[5, ] <- c("$posterior", nrow(x$posterior), ncol(x$posterior), "posterior membership probabilities")
+ sumry[2, ] <- c("$means", nrow(x$means), ncol(x$means), "group means")
+ sumry[3, ] <- c("$loadings", nrow(x$loadings), ncol(x$loadings), "loadings of variables")
+ sumry[4, ] <- c("$ind.coord", nrow(x$ind.coord), ncol(x$ind.coord), "coordinates of individuals (principal components)")
+ sumry[5, ] <- c("$grp.coord", nrow(x$grp.coord), ncol(x$grp.coord), "coordinates of groups")
+ sumry[6, ] <- c("$posterior", nrow(x$posterior), ncol(x$posterior), "posterior membership probabilities")
+ if(!is.null(x$pca.loadings)){
+ sumry[7, ] <- c("$pca.loadings", nrow(x$pca.loadings), ncol(x$pca.loadings), "PCA loadings of original variables")
+ }
if(!is.null(x$var.contr)){
- sumry[6, ] <- c("$var.contr", nrow(x$var.contr), ncol(x$var.contr), "contribution of original variables")
+ sumry[TABDIM, ] <- c("$var.contr", nrow(x$var.contr), ncol(x$var.contr), "contribution of original variables")
}
class(sumry) <- "table"
print(sumry)
- cat("\nother elements: ")
- if (length(names(x)) > 14)
- cat(names(x)[14:(length(names(x)))], "\n")
- else cat("NULL\n")
-}
+ ## cat("\nother elements: ")
+ ## if (length(names(x)) > 15)
+ ## cat(names(x)[15:(length(names(x)))], "\n")
+ ## else cat("NULL\n")
+ cat("\n")
+} # end print.dapc
@@ -682,6 +718,81 @@
+#############
+## as.lda.dapc
+#############
+as.lda <- function(...){
+ UseMethod("as.lda")
+}
+
+as.lda.dapc <- function(x){
+ if(!inherits(x,"dapc")) stop("x is not a dapc object")
+ res <- list()
+
+ res$N <- nrow(res$ind.coord)
+ res$call <- match.call()
+ res$counts <- as.integer(table(x$grp))
+ res$lev <- names(res$counts) <- levels(x$grp)
+ res$means <- x$means
+ res$prior <- x$prior
+ res$scaling <- x$loadings
+ res$svd <- sqrt(x$eig)
+
+ class(res) <- "lda"
+
+ return(res)
+} # end as.lda.dapc
+
+
+
+
+
+
+##############
+## predict.dapc
+##############
+predict.dapc <- function(object, newdata, prior = object$prior, dimen,
+ method = c("plug-in", "predictive", "debiased"), ...){
+ if(!inherits(x,"dapc")) stop("x is not a dapc object")
+
+ x <- as.lda(object)
+
+
+ ## HANDLE NEW DATA ##
+ if(!missing(newdata)){
+ ## make a few checks
+ if(is.null(object$pca.loadings)) stop("DAPC object does not contain loadings of original variables. \nPlease re-run DAPC using 'pca.loadings=TRUE'.")
+ newdata <- as.matrix(newdata) # to force conversion, notably from genlight objects
+ if(ncol(newdata) != nrow(object$pca.loadings)) stop("Number of variables in newdata does not match original data.")
+
+ ## centre/scale data
+ for(i in 1:nrow(newdata)){ # this is faster for large, flat matrices)
+ newdata[i,] <- (newdata[i,] - object$pca.cent) / object$pca.norm
+ }
+
+ ## project as supplementary individuals
+ XU <- newdata %*% object$pca.loadings
+ }
+
+
+ ## HANDLE DIMEN ##
+ if(!missing(dimen)){
+ if(dimen > object$n.da) stop(paste("Too many dimensions requested. \nOnly", object$n.da, "discriminant functions were saved in DAPC."))
+ }
+
+ ## CALL PREDICT.LDA ##
+ res <- predict(x, newdata, prior, dimen, method, ...)
+
+ return(res)
+
+} # end predict.dapc
+
+
+
+
+
+
+
## ############
## ## crossval
## ############
More information about the adegenet-commits
mailing list