[Adephylo-commits] r175 - in pkg: . R src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Feb 9 20:30:33 CET 2012


Author: jombart
Date: 2012-02-09 20:30:32 +0100 (Thu, 09 Feb 2012)
New Revision: 175

Added:
   pkg/src/misc.c
Modified:
   pkg/DESCRIPTION
   pkg/R/tree.group.R
Log:
beta version, tree grouping algo implemented


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2012-02-08 19:02:42 UTC (rev 174)
+++ pkg/DESCRIPTION	2012-02-09 19:30:32 UTC (rev 175)
@@ -9,4 +9,4 @@
 Description: Multivariate tools to analyze comparative data, i.e. a phylogeny and some traits measured for each taxa.
 License: GPL (>=2)
 LazyLoad: yes
-Collate: utils.R partition.R table.phylo4d.R distances.R proximities.R orthobasis.R ppca.R orthogram.R abouheif.R moran.R zzz.R
+Collate: utils.R partition.R table.phylo4d.R distances.R proximities.R orthobasis.R ppca.R orthogram.R abouheif.R moran.R tree.group.R zzz.R

Modified: pkg/R/tree.group.R
===================================================================
--- pkg/R/tree.group.R	2012-02-08 19:02:42 UTC (rev 174)
+++ pkg/R/tree.group.R	2012-02-09 19:30:32 UTC (rev 175)
@@ -1,20 +1,19 @@
 treeGroup <- function(x, grp, dat, FUN, n.boot=10,
-                      n.dens=4096,
-                      plot=TRUE, ...){
+                      n.dens=4096, plot=TRUE, check.lab=TRUE, ...){
     if(!require(ape)) stop("ape package is required")
     if(!inherits(x,"phylo")) stop("x is not a phylo object")
+    if(check.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
     grp <- factor(grp)
-    k <- length(lev <- levels(grp))
-    n <- nrow(dat)
+    K <- length(lev <- levels(grp))
+    N <- nrow(dat)
     D <- cophenetic.phylo(x)
 
 
-    ## FUNCTION TO ESTIMATE A DENSITY AT A GIVEN POINT ##
-    find.dens <- function(dens, value){
-        if(value<=min(dens$x)) return((dens$y[1]+0) / 2)
-        if(value>=max(dens$x)) return((dens$y[length(dens$y)]+0) / 2)
-        idx <- min(which(c(dens$x > value)))
-        return(mean(dens$y[c(idx,idx+1)]))
+    #### AUXILIARY FUNCTIONS ####
+    ## FUNCTION TO ESTIMATE A DENSITY AT A SERIES OF POINTS ##
+    compute.dens <- function(dens, values){
+        pred.y <- double(n <- length(values))
+        return(.C("predict_density", dens$x, dens$y, length(dens$x), as.double(values), pred.y, n, PACKAGE="adephylo")[[5]])
     }
 
 
@@ -26,33 +25,61 @@
 
 
     ## FUNCTION TO GET PROBA FOR ONE INDIV / ONE GROUP ##
-    getprob.grp <- function(i, g){ # g indicates a group
-        find.dens(list.dens[[g]] D[i,grp==lev[g]])
-        temp <- D[,grp==g]
+    getprob.ind <- function(i, g){ # i: idx of indiv; g: idx of a group
+        temp <- 1:ncol(D)
+        find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i])
+        res <- exp(sum(log(find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i]))))
+        return(res)
+    }
 
+    ## FUNCTION TO GET PROBA FOR ALL INDIV / ONE GROUP ##
+    getprob.grp <- function(g){ # g: idx of a group
+        return(sapply(1:N, function(i) getprob.ind(i,g)))
     }
 
 
-    ## PERFORM BOOTSTRAP TREES ##
-    list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:n,replace=TRUE),]))
 
+    ## GET BOOTSTRAPPED TREES ##
+    list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:N,replace=TRUE),]))
 
+
     ## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
     list.D <- lapply(list.trees, cophenetic.phylo)
-    list.D <- lapply(lev, function(g) unlist(lapply(listD, function(e) getdist.grp(e,g))))
+    list.D <- lapply(lev, function(g) unlist(lapply(list.D, function(e) getdist.grp(e,g)))) # within dist. pooled across trees for each grp
 
 
     ## COMPUTE DENSITIES ##
     list.dens <- lapply(list.D, density, n=n.dens, ...)
     if(plot){
-        par(mfrow = c(ceiling(sqrt(k)),ceiling(sqrt(k))) )
-        for(i in 1:k){
+        find.mfrow <- function(i) {
+            nrow <- ceiling(sqrt(i))
+            ncol <- ceiling(i/ceiling(sqrt(i)))
+            return(c(nrow,ncol))
+        }
+        par(mfrow = find.mfrow(K))
+        for(i in 1:K){
             plot(list.dens[[i]], main=paste("Group:",lev[i]),xlab="Phylogenetic pairwise distance",ylab="Density", col="blue")
             points(list.D[[i]], rep(0,length(list.D[[i]])), pch="|", col="blue")
         }
     }
 
 
+    ## COMPUTE MEMBERSHIP PROBABILITIES ##
+    prob <- matrix(unlist(lapply(1:K, getprob.grp)), ncol=K)
+    prob <- prop.table(prob,1)
+    colnames(prob) <- lev
+    rownames(prob) <- x$tip.label
 
 
-}
+    ## FIND GROUP ASSIGNMENTS ##
+    temp <- factor(colnames(prob)[apply(prob,1, which.max)])
+    annot <- rep(" ", N)
+    annot[as.character(grp)!=as.character(temp)] <- "!"
+    groups <- data.frame(observed=grp, inferred=temp, annot=annot)
+    rownames(groups) <- x$tip.labels
+
+
+    ## BUILD / RETURN RESULT ##
+    res <- list(prob,groups)
+    return(res)
+} # end treeGroup

Added: pkg/src/misc.c
===================================================================
--- pkg/src/misc.c	                        (rev 0)
+++ pkg/src/misc.c	2012-02-09 19:30:32 UTC (rev 175)
@@ -0,0 +1,23 @@
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+
+
+/* interpolate values in a density */
+/* x contais n values for which we want y=f(x) */
+void predict_density(double *densx, double *densy, int *densn, double *x, double *y, int *n){
+	int i, idx;
+
+	for(i=0;i<*n;i++){
+		idx=0;
+		while(idx < *densn && x[i]>densx[idx]){
+			idx++;
+		}
+		if(idx==0 || idx == *densn){
+			y[i] = densy[idx]/2;
+		} else {
+			y[i] = (densy[idx] + densy[idx+1])/2;
+		}
+	}
+}



More information about the Adephylo-commits mailing list