[Adephylo-commits] r175 - in pkg: . R src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Feb 9 20:30:33 CET 2012
Author: jombart
Date: 2012-02-09 20:30:32 +0100 (Thu, 09 Feb 2012)
New Revision: 175
Added:
pkg/src/misc.c
Modified:
pkg/DESCRIPTION
pkg/R/tree.group.R
Log:
beta version, tree grouping algo implemented
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2012-02-08 19:02:42 UTC (rev 174)
+++ pkg/DESCRIPTION 2012-02-09 19:30:32 UTC (rev 175)
@@ -9,4 +9,4 @@
Description: Multivariate tools to analyze comparative data, i.e. a phylogeny and some traits measured for each taxa.
License: GPL (>=2)
LazyLoad: yes
-Collate: utils.R partition.R table.phylo4d.R distances.R proximities.R orthobasis.R ppca.R orthogram.R abouheif.R moran.R zzz.R
+Collate: utils.R partition.R table.phylo4d.R distances.R proximities.R orthobasis.R ppca.R orthogram.R abouheif.R moran.R tree.group.R zzz.R
Modified: pkg/R/tree.group.R
===================================================================
--- pkg/R/tree.group.R 2012-02-08 19:02:42 UTC (rev 174)
+++ pkg/R/tree.group.R 2012-02-09 19:30:32 UTC (rev 175)
@@ -1,20 +1,19 @@
treeGroup <- function(x, grp, dat, FUN, n.boot=10,
- n.dens=4096,
- plot=TRUE, ...){
+ n.dens=4096, plot=TRUE, check.lab=TRUE, ...){
if(!require(ape)) stop("ape package is required")
if(!inherits(x,"phylo")) stop("x is not a phylo object")
+ if(check.lab && !identical(x$tip.label,rownames(dat))) warning("Tip labels in x and rownames of dat differ \nplease make sure the same order is used in x, grp, and dat")
grp <- factor(grp)
- k <- length(lev <- levels(grp))
- n <- nrow(dat)
+ K <- length(lev <- levels(grp))
+ N <- nrow(dat)
D <- cophenetic.phylo(x)
- ## FUNCTION TO ESTIMATE A DENSITY AT A GIVEN POINT ##
- find.dens <- function(dens, value){
- if(value<=min(dens$x)) return((dens$y[1]+0) / 2)
- if(value>=max(dens$x)) return((dens$y[length(dens$y)]+0) / 2)
- idx <- min(which(c(dens$x > value)))
- return(mean(dens$y[c(idx,idx+1)]))
+ #### AUXILIARY FUNCTIONS ####
+ ## FUNCTION TO ESTIMATE A DENSITY AT A SERIES OF POINTS ##
+ compute.dens <- function(dens, values){
+ pred.y <- double(n <- length(values))
+ return(.C("predict_density", dens$x, dens$y, length(dens$x), as.double(values), pred.y, n, PACKAGE="adephylo")[[5]])
}
@@ -26,33 +25,61 @@
## FUNCTION TO GET PROBA FOR ONE INDIV / ONE GROUP ##
- getprob.grp <- function(i, g){ # g indicates a group
- find.dens(list.dens[[g]] D[i,grp==lev[g]])
- temp <- D[,grp==g]
+ getprob.ind <- function(i, g){ # i: idx of indiv; g: idx of a group
+ temp <- 1:ncol(D)
+ find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i])
+ res <- exp(sum(log(find.dens(list.dens[[g]], D[i,grp==lev[g] & temp!=i]))))
+ return(res)
+ }
+ ## FUNCTION TO GET PROBA FOR ALL INDIV / ONE GROUP ##
+ getprob.grp <- function(g){ # g: idx of a group
+ return(sapply(1:N, function(i) getprob.ind(i,g)))
}
- ## PERFORM BOOTSTRAP TREES ##
- list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:n,replace=TRUE),]))
+ ## GET BOOTSTRAPPED TREES ##
+ list.trees <- lapply(1:n.boot, function(i) FUN(dat[sample(1:N,replace=TRUE),]))
+
## GET WITHIN-GROUP DISTANCES FOR EACH BOOTSTRAP SAMPLE ##
list.D <- lapply(list.trees, cophenetic.phylo)
- list.D <- lapply(lev, function(g) unlist(lapply(listD, function(e) getdist.grp(e,g))))
+ list.D <- lapply(lev, function(g) unlist(lapply(list.D, function(e) getdist.grp(e,g)))) # within dist. pooled across trees for each grp
## COMPUTE DENSITIES ##
list.dens <- lapply(list.D, density, n=n.dens, ...)
if(plot){
- par(mfrow = c(ceiling(sqrt(k)),ceiling(sqrt(k))) )
- for(i in 1:k){
+ find.mfrow <- function(i) {
+ nrow <- ceiling(sqrt(i))
+ ncol <- ceiling(i/ceiling(sqrt(i)))
+ return(c(nrow,ncol))
+ }
+ par(mfrow = find.mfrow(K))
+ for(i in 1:K){
plot(list.dens[[i]], main=paste("Group:",lev[i]),xlab="Phylogenetic pairwise distance",ylab="Density", col="blue")
points(list.D[[i]], rep(0,length(list.D[[i]])), pch="|", col="blue")
}
}
+ ## COMPUTE MEMBERSHIP PROBABILITIES ##
+ prob <- matrix(unlist(lapply(1:K, getprob.grp)), ncol=K)
+ prob <- prop.table(prob,1)
+ colnames(prob) <- lev
+ rownames(prob) <- x$tip.label
-}
+ ## FIND GROUP ASSIGNMENTS ##
+ temp <- factor(colnames(prob)[apply(prob,1, which.max)])
+ annot <- rep(" ", N)
+ annot[as.character(grp)!=as.character(temp)] <- "!"
+ groups <- data.frame(observed=grp, inferred=temp, annot=annot)
+ rownames(groups) <- x$tip.labels
+
+
+ ## BUILD / RETURN RESULT ##
+ res <- list(prob,groups)
+ return(res)
+} # end treeGroup
Added: pkg/src/misc.c
===================================================================
--- pkg/src/misc.c (rev 0)
+++ pkg/src/misc.c 2012-02-09 19:30:32 UTC (rev 175)
@@ -0,0 +1,23 @@
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+
+
+/* interpolate values in a density */
+/* x contais n values for which we want y=f(x) */
+void predict_density(double *densx, double *densy, int *densn, double *x, double *y, int *n){
+ int i, idx;
+
+ for(i=0;i<*n;i++){
+ idx=0;
+ while(idx < *densn && x[i]>densx[idx]){
+ idx++;
+ }
+ if(idx==0 || idx == *densn){
+ y[i] = densy[idx]/2;
+ } else {
+ y[i] = (densy[idx] + densy[idx+1])/2;
+ }
+ }
+}
More information about the Adephylo-commits
mailing list