[Adephylo-commits] r176 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Feb 13 19:10:12 CET 2012
Author: jombart
Date: 2012-02-13 19:10:12 +0100 (Mon, 13 Feb 2012)
New Revision: 176
Modified:
pkg/R/tree.group.R
Log:
first working version, but results odd
Modified: pkg/R/tree.group.R
===================================================================
--- pkg/R/tree.group.R 2012-02-09 19:30:32 UTC (rev 175)
+++ pkg/R/tree.group.R 2012-02-13 18:10:12 UTC (rev 176)
@@ -1,14 +1,25 @@
-treeGroup <- function(x, grp, dat, FUN, n.boot=10,
- n.dens=4096, plot=TRUE, check.lab=TRUE, ...){
+##############
+## treeGroup
+##############
+treeGroup <- function(x, grp, dat, FUN, boot=FALSE, n.boot=10,
+ n.dens=4096, plot=TRUE, warn.lab=FALSE, ...){
if(!require(ape)) stop("ape package is required")
if(!inherits(x,"phylo")) stop("x is not a phylo object")
- if(check.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
+ if(warn.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
grp <- factor(grp)
- K <- length(lev <- levels(grp))
+ K <- length(LEV <- levels(grp))
N <- nrow(dat)
D <- cophenetic.phylo(x)
+ THRES <- 1e-320 # densities < THRES will be set to THRES to avoid log(x)=-Inf
+ ## RE-ORDER GRP AND DATA MATRIX AFTER TIP LABELS ##
+ if(is.null(rownames(dat))) rownames(dat) <- x$tip.label
+ if(!all(x$tip.label %in% rownames(dat))) stop("some tips do not have data matching their label")
+ grp <- grp[match(x$tip.label, rownames(dat))] # grp is assumed to be in the same order as 'dat'
+ dat <- dat[x$tip.label,,drop=FALSE]
+
+
#### AUXILIARY FUNCTIONS ####
## FUNCTION TO ESTIMATE A DENSITY AT A SERIES OF POINTS ##
compute.dens <- function(dens, values){
@@ -17,39 +28,67 @@
}
- ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCE FOR ONE GROUP ##
- getdist.grp <- function(M, g){
- temp <- M[grp==g,grp==g]
+ ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCE WITHIN ONE GROUP ##
+ getdist.within.grp <- function(M, fac, val){ # val is one level of fac
+ temp <- M[fac==val,fac==val]
return(temp[lower.tri(temp)])
}
+ ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCES WITHIN GROUP, FOR ALL GROUPS ##
+ getdist.within.allgrp <- function(M, fac){
+ res <- lapply(LEV, function(e) getdist.within.grp(M, fac, e))
+ names(res) <- LEV
+ return(res)
+ }
+
+
## FUNCTION TO GET PROBA FOR ONE INDIV / ONE GROUP ##
getprob.ind <- function(i, g){ # i: idx of indiv; g: idx of a group
temp <- 1:ncol(D)
- find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i])
- res <- exp(sum(log(find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i]))))
+ dens <- compute.dens(list.dens[[g]], D[i,grp==LEV[g] & temp!=i])
+ dens[dens < THRES] <- THRES
+ res <- exp(mean(log(dens)))
return(res)
}
+
## FUNCTION TO GET PROBA FOR ALL INDIV / ONE GROUP ##
getprob.grp <- function(g){ # g: idx of a group
return(sapply(1:N, function(i) getprob.ind(i,g)))
}
+ ## FUNCTION TO GET A BOOTSTRAPPED TREE AND MATCHING GRP ##
+ getboot.tree.grp <- function(){
+ samp <- sample(1:N,replace=TRUE)
+ tre <- FUN(dat[samp,,drop=FALSE])
+ newgrp <- factor(grp[samp])
+ return(list(tre=tre, grp=newgrp))
+ }
- ## GET BOOTSTRAPPED TREES ##
- list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:N,replace=TRUE),]))
+ if(boot){ # USE BOOTSTRAP
+ ## GET BOOTSTRAPPED TREES ##
+ list.trees.grp <- lapply(1:n.boot, function(i) getboot.tree.grp())
- ## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
- list.D <- lapply(list.trees, cophenetic.phylo)
- list.D <- lapply(lev, function(g) unlist(lapply(list.D, function(e) getdist.grp(e,g)))) # within dist. pooled across trees for each grp
+ ## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
+ list.D <- lapply(list.trees.grp, function(e) cophenetic.phylo(e$tre))
+ temp <- lapply(1:n.boot, function(i) getdist.within.allgrp(list.D[[i]], list.trees.grp[[i]]$grp)) # for each replicate, list of distances within for each grp
- ## COMPUTE DENSITIES ##
- list.dens <- lapply(list.D, density, n=n.dens, ...)
+
+ ## GET DENSITIES FOR EACH GROUP ##
+ dens.dat <- lapply(LEV, function(onegroup) unlist(lapply(temp, function(e) e[[onegroup]]))) # density data for each group
+ list.dens <- lapply(dens.dat, density, n=n.dens, ...) # densities for each group
+
+ } else { # DO NOT USE BOOTSTRAP
+ dens.dat <- getdist.within.allgrp(D, grp) # density data for each group
+ list.dens <- lapply(dens.dat, density, n=n.dens, ...) # densities for each group
+ }
+
+
+ ## PLOT DENSITIES ##
if(plot){
find.mfrow <- function(i) {
nrow <- ceiling(sqrt(i))
@@ -58,8 +97,8 @@
}
par(mfrow = find.mfrow(K))
for(i in 1:K){
- plot(list.dens[[i]], main=paste("Group:",lev[i]),xlab="Phylogenetic pairwise distance",ylab="Density", col="blue")
- points(list.D[[i]], rep(0,length(list.D[[i]])), pch="|", col="blue")
+ plot(list.dens[[i]], main=paste("Group:",LEV[i]),xlab="Within-group pairwise distance",ylab="Density", col="blue")
+ points(dens.dat[[i]], rep(0,length(dens.dat[[i]])), pch="|", col="blue")
}
}
@@ -67,7 +106,7 @@
## COMPUTE MEMBERSHIP PROBABILITIES ##
prob <- matrix(unlist(lapply(1:K, getprob.grp)), ncol=K)
prob <- prop.table(prob,1)
- colnames(prob) <- lev
+ colnames(prob) <- LEV
rownames(prob) <- x$tip.label
@@ -80,6 +119,48 @@
## BUILD / RETURN RESULT ##
- res <- list(prob,groups)
+ propcorrect <- mean(annot==" ")
+ ## propcorrect.bygroup <- tapply(annot==" ", grp, mean)
+ assignability <- mean((apply(prob,1,max)-.5)/.5)
+ ##res <- list(prob=prob,groups=groups, mean.correct=propcorrect, prop.correct=propcorrect.bygroup)
+ res <- list(prob=prob,groups=groups, assigndex=assignability, mean.correct=propcorrect)
+
return(res)
} # end treeGroup
+
+
+
+
+
+
+
+
+##############################
+## simulate data with groups
+##############################
+simDatGroups <- function(k=2, p=1000, n=10, mu=0, sigma=1){
+ ## RECYCLE ARGUMENTS ##
+ n <- rep(n, length=k)
+ mu <- rep(mu, length=k)
+ sigma <- rep(sigma, length=k)
+
+
+ ## GENERATE DATA ##
+ dat <- list()
+ for(i in 1:k){
+ dat[[i]] <- replicate(p, rnorm(n[i], mu[i], sigma[i]))
+ }
+
+ dat <- Reduce(rbind,dat)
+ rownames(dat) <- paste("ind", 1:nrow(dat))
+
+ ## SHAPE OUTPUT ##
+ grp <- factor(paste("grp", rep(1:k, n)))
+ res <- list(dat=dat, grp=grp)
+ return(res)
+}
+
+
+
+
+
More information about the Adephylo-commits
mailing list