[Adephylo-commits] r176 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Feb 13 19:10:12 CET 2012


Author: jombart
Date: 2012-02-13 19:10:12 +0100 (Mon, 13 Feb 2012)
New Revision: 176

Modified:
   pkg/R/tree.group.R
Log:
first working version, but results odd


Modified: pkg/R/tree.group.R
===================================================================
--- pkg/R/tree.group.R	2012-02-09 19:30:32 UTC (rev 175)
+++ pkg/R/tree.group.R	2012-02-13 18:10:12 UTC (rev 176)
@@ -1,14 +1,25 @@
-treeGroup <- function(x, grp, dat, FUN, n.boot=10,
-                      n.dens=4096, plot=TRUE, check.lab=TRUE, ...){
+##############
+## treeGroup
+##############
+treeGroup <- function(x, grp, dat, FUN, boot=FALSE, n.boot=10,
+                      n.dens=4096, plot=TRUE, warn.lab=FALSE, ...){
     if(!require(ape)) stop("ape package is required")
     if(!inherits(x,"phylo")) stop("x is not a phylo object")
-    if(check.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
+    if(warn.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
     grp <- factor(grp)
-    K <- length(lev <- levels(grp))
+    K <- length(LEV <- levels(grp))
     N <- nrow(dat)
     D <- cophenetic.phylo(x)
+    THRES <- 1e-320 # densities < THRES will be set to THRES to avoid log(x)=-Inf
 
 
+    ## RE-ORDER GRP AND DATA MATRIX AFTER TIP LABELS ##
+    if(is.null(rownames(dat))) rownames(dat) <- x$tip.label
+    if(!all(x$tip.label %in% rownames(dat))) stop("some tips do not have data matching their label")
+    grp <- grp[match(x$tip.label, rownames(dat))] # grp is assumed to be in the same order as 'dat'
+    dat <- dat[x$tip.label,,drop=FALSE]
+
+
     #### AUXILIARY FUNCTIONS ####
     ## FUNCTION TO ESTIMATE A DENSITY AT A SERIES OF POINTS ##
     compute.dens <- function(dens, values){
@@ -17,39 +28,67 @@
     }
 
 
-    ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCE FOR ONE GROUP ##
-    getdist.grp <- function(M, g){
-        temp <- M[grp==g,grp==g]
+    ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCE WITHIN ONE GROUP ##
+    getdist.within.grp <- function(M, fac, val){ # val is one level of fac
+        temp <- M[fac==val,fac==val]
         return(temp[lower.tri(temp)])
     }
 
 
+    ## FUNCTION TO GET A VECTOR OF PAIRWISE DISTANCES WITHIN GROUP, FOR ALL GROUPS ##
+    getdist.within.allgrp <- function(M, fac){
+        res <- lapply(LEV, function(e) getdist.within.grp(M, fac, e))
+        names(res) <- LEV
+        return(res)
+    }
+
+
     ## FUNCTION TO GET PROBA FOR ONE INDIV / ONE GROUP ##
     getprob.ind <- function(i, g){ # i: idx of indiv; g: idx of a group
         temp <- 1:ncol(D)
-        find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i])
-        res <- exp(sum(log(find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i]))))
+        dens <- compute.dens(list.dens[[g]], D[i,grp==LEV[g] & temp!=i])
+        dens[dens < THRES] <- THRES
+        res <- exp(mean(log(dens)))
         return(res)
     }
 
+
     ## FUNCTION TO GET PROBA FOR ALL INDIV / ONE GROUP ##
     getprob.grp <- function(g){ # g: idx of a group
         return(sapply(1:N, function(i) getprob.ind(i,g)))
     }
 
 
+    ## FUNCTION TO GET A BOOTSTRAPPED TREE AND MATCHING GRP ##
+    getboot.tree.grp <- function(){
+        samp <- sample(1:N,replace=TRUE)
+        tre <- FUN(dat[samp,,drop=FALSE])
+        newgrp <- factor(grp[samp])
+        return(list(tre=tre, grp=newgrp))
+    }
 
-    ## GET BOOTSTRAPPED TREES ##
-    list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:N,replace=TRUE),]))
 
+    if(boot){ # USE BOOTSTRAP
+        ## GET BOOTSTRAPPED TREES ##
+        list.trees.grp <- lapply(1:n.boot, function(i) getboot.tree.grp())
 
-    ## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
-    list.D <- lapply(list.trees, cophenetic.phylo)
-    list.D <- lapply(lev, function(g) unlist(lapply(list.D, function(e) getdist.grp(e,g)))) # within dist. pooled across trees for each grp
 
+        ## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
+        list.D <- lapply(list.trees.grp, function(e) cophenetic.phylo(e$tre))
+        temp <- lapply(1:n.boot, function(i) getdist.within.allgrp(list.D[[i]], list.trees.grp[[i]]$grp)) # for each replicate, list of distances within for each grp
 
-    ## COMPUTE DENSITIES ##
-    list.dens <- lapply(list.D, density, n=n.dens, ...)
+
+        ## GET DENSITIES FOR EACH GROUP ##
+        dens.dat <- lapply(LEV, function(onegroup) unlist(lapply(temp, function(e) e[[onegroup]]))) # density data for each group
+        list.dens <- lapply(dens.dat, density, n=n.dens, ...) # densities for each group
+
+    } else { # DO NOT USE BOOTSTRAP
+         dens.dat <- getdist.within.allgrp(D, grp) # density data for each group
+         list.dens <- lapply(dens.dat, density, n=n.dens, ...) # densities for each group
+    }
+
+
+    ## PLOT DENSITIES ##
     if(plot){
         find.mfrow <- function(i) {
             nrow <- ceiling(sqrt(i))
@@ -58,8 +97,8 @@
         }
         par(mfrow = find.mfrow(K))
         for(i in 1:K){
-            plot(list.dens[[i]], main=paste("Group:",lev[i]),xlab="Phylogenetic pairwise distance",ylab="Density", col="blue")
-            points(list.D[[i]], rep(0,length(list.D[[i]])), pch="|", col="blue")
+            plot(list.dens[[i]], main=paste("Group:",LEV[i]),xlab="Within-group pairwise distance",ylab="Density", col="blue")
+            points(dens.dat[[i]], rep(0,length(dens.dat[[i]])), pch="|", col="blue")
         }
     }
 
@@ -67,7 +106,7 @@
     ## COMPUTE MEMBERSHIP PROBABILITIES ##
     prob <- matrix(unlist(lapply(1:K, getprob.grp)), ncol=K)
     prob <- prop.table(prob,1)
-    colnames(prob) <- lev
+    colnames(prob) <- LEV
     rownames(prob) <- x$tip.label
 
 
@@ -80,6 +119,48 @@
 
 
     ## BUILD / RETURN RESULT ##
-    res <- list(prob,groups)
+    propcorrect <- mean(annot==" ")
+    ## propcorrect.bygroup <- tapply(annot==" ", grp, mean)
+    assignability <- mean((apply(prob,1,max)-.5)/.5)
+    ##res <- list(prob=prob,groups=groups, mean.correct=propcorrect, prop.correct=propcorrect.bygroup)
+    res <- list(prob=prob,groups=groups, assigndex=assignability, mean.correct=propcorrect)
+
     return(res)
 } # end treeGroup
+
+
+
+
+
+
+
+
+##############################
+## simulate data with groups
+##############################
+simDatGroups <- function(k=2, p=1000, n=10, mu=0, sigma=1){
+    ## RECYCLE ARGUMENTS ##
+    n <- rep(n, length=k)
+    mu <- rep(mu, length=k)
+    sigma <- rep(sigma, length=k)
+
+
+    ## GENERATE DATA ##
+    dat <- list()
+    for(i in 1:k){
+        dat[[i]] <- replicate(p, rnorm(n[i], mu[i], sigma[i]))
+    }
+
+    dat <- Reduce(rbind,dat)
+    rownames(dat) <- paste("ind", 1:nrow(dat))
+
+    ## SHAPE OUTPUT ##
+    grp <- factor(paste("grp", rep(1:k, n)))
+    res <- list(dat=dat, grp=grp)
+    return(res)
+}
+
+
+
+
+



More information about the Adephylo-commits mailing list