[Adephylo-commits] r153 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Mar 12 15:56:18 CET 2010


Author: jombart
Date: 2010-03-12 15:56:18 +0100 (Fri, 12 Mar 2010)
New Revision: 153

Modified:
   pkg/src/distPhylo.c
   pkg/src/sptips.c
   pkg/src/sptips.h
Log:
Finished first complete version of dist2tips.


Modified: pkg/src/distPhylo.c
===================================================================
--- pkg/src/distPhylo.c	2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/distPhylo.c	2010-03-12 14:56:18 UTC (rev 153)
@@ -4,14 +4,8 @@
   Licence: GPL >=2.
 
    Notes:
-   these functions are used to find the shortest path between specified pairs of tips.
-   The algorithm proceeds as follows:
-   1) find the paths (pathA, pathB) to the root
-   2) find the MRCA, defined as the first term of pathA in pathB (same as the converse)
-   3) from A, go back to MRCA, adding crossed nodes to the result, not including the MRCA
-   4) from B, go back to MRCA, adding crossed nodes to the result, not including the MRCA
-   5) add the MRCA to the output
-   6) return the output
+   These functions implement several different phylogenetic distances between all pairs of tips in a phylogeny.
+   These functions require sptips.c and adesub.c.
 */
 
 
@@ -22,9 +16,9 @@
 #include <R.h>
 #include <R_ext/Utils.h>
 #include "adesub.h"
+#include "sptips.h"
 
 
-
 /* 
    =====================
    UTILITARY FUNCTIONS
@@ -32,333 +26,145 @@
 */
 
 
-/* 
-   === REPLICATE %IN% / MATCH OPERATOR FOR INTEGERS === 
+/*
+   === FIND THE LENGTH OF AN EDGE ===
    == for internal use only ==
-   - *b has to be a vector created using vecintalloc
-   - returns 0 if there are no matches, and the index of the first match otherwise
+  - the edge is identified by the descending node
+  - ances, desc, and brlength must be created using vecintalloc
+  - N is the number of edges to represent the tree
 */
-int intAinB(int a, int *b, int lengthB){
-	if(lengthB == 0) return(0); /* avoid comparison with empty vector */
+double findedgelength(int *ances, int *desc, int *brlength, int N, int myNode){
+	int posi=0;
 
-	int i=1;
 
-	/* printf("\n AinB debugging: a=%d", a); */
-	while(i <= lengthB){
-		/* printf("\t i=%d \t bi=%d ", a, b[i]); */
+	/* find the edge */
+	posi = intAinB(myNode, desc, N);
 
-		if(b[i]==a) {
-			return(i);
-		} else {
-			i++;
-		}
+	if(posi==0){
+		printf("\n Likely error in findedgelength: edge not found");
+		return(0.0);
 	}
 
-	return(0);
-} /* intAinB */
+	/* return corresponding length */
+	return(brlength[posi]);
+} /* end findedgelength */
 
 
 
 
 
-/* 
-   === REPLICATE SETDIFF MATCH OPERATOR FOR INTEGERS === 
-   == for internal use only ==
-   - *b has to be a vector created using vecintalloc
-   - finds (possibly duplicated) elements of a not in b
-*/
-void intANotInB(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
-	int i;
-
-	/* a few checks */
-	if(lengthA==0) return;
-	if(lengthB==0){
-		*resSize = 0; 
-		return;
-	}
-
-
-	for(i=1; i<=lengthA; i++){
-		if(intAinB(a[i], b, lengthB)==0){
-			*resSize = *resSize+1;
-			res[*resSize] = a[i];
-		}
-	}
-
-} /* intANotInB */
-
-
-
-
-
-
 /*
-  === UNION OF TWO INTEGER VECTORS ===
-  == for internal use only ==
-  - a, b, and res have to be created by vecintalloc
-  - returns unique(c(a,b))
-*/
-void unionInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
-	if(lengthA==0 && lengthB && 0) {
-		*res = 0;
-		*resSize = 0;
-		return;
-	}
-
-	int i, idx;
-
-	res[1] = a[1]; /* initialization of temp results */
-	*resSize = 1;
-
-        /* For a */
-	for(i=1;i<=lengthA;i++){
-		idx = intAinB(a[i], res, *resSize); /* check if element is in res */
-		if(idx==0) {
-			*resSize = *resSize + 1;
-			res[*resSize] = a[i];
-		}
-	}
-
-        /* For b */
-	for(i=1;i<=lengthB;i++){
-		idx = intAinB(b[i], res, *resSize); /* check if element is in res */
-		if(idx==0) {
-			*resSize = *resSize + 1;
-			res[*resSize] = b[i];
-		}
-	}
-} /* unionInt */
-
-
-
-
-
-
-/* 
-   === INTERSECTION OF TWO INTEGER VECTORS ===
+   === FIND THE NUMBER OF DIRECT DESCENDENTS (DD) OF A NODE ===
    == for internal use only ==
-   - a, b, and res have to be created by vecintalloc
+   - ances, desc, and brlength must be created using vecintalloc
+   - N is the number of edges to represent the tree
 */
-void intersectInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
-	if((lengthA * lengthB) ==0) {
-		*res = 0;
-		*resSize = 0;
-		return;
-	}
-	int i, idx;
+int findNbDD(int *ances, int *desc, int N, int myNode){
+	int i, nbDD=0;
 
-	*resSize = 0;
 
-        /* store elements of a present in b */
-	for(i=1;i<=lengthA;i++){
-		idx = intAinB(a[i], b, lengthB) * intAinB(a[i], res, *resSize); /* a in b and not already in res */
-		if(idx != 0) {
-			*resSize = *resSize + 1;
-			res[*resSize] = a[i];
+	/* browse the ances vector */
+	for(i=1; i<=N; i++){
+		if(ances[i] == myNode) {
+			nbDD++;
 		}
 	}
-} /* intersectInt */
 
-
-
-
-
-/* 
-   === FIND THE PATH FROM A TIP TO THE ROOT === 
-   == for internal use only ==
-   - ances, desc and path must have been created using vecintalloc; their indices start at 1.
-   - N is the number of edges in the tree, i.e. number of rows of $edge
-*/
-void pathTipToRoot(int tip, int *ances, int *desc, int N, int *res, int *resSize){
-	int i, curNode=0, keepOn=1, nextNodeId;
-
-	curNode = tip;
-	*resSize = 0;
-
-	/* printf("\nInput inside pathTipTo...: \n"); */
-	/* for(i=1; i<= N;i++){ */
-	/* 	printf(" %d", res[i]); */
-	/* } */
-
-	while(keepOn==1){
-		nextNodeId = intAinB(curNode, desc, N);
-		/* printf("\n%d in desc: %d", curNode, nextNodeId); */
-
-		if(nextNodeId > 0){
-			*resSize = *resSize + 1;
-			curNode = ances[nextNodeId];
-			res[*resSize] = curNode;
-		} else {
-			keepOn = 0;
-		}
+	if(nbDD==0){
+		printf("\n Likely error in findNbDD: no direct descendent found.\n");
 	}
 
-	/* /\* debugging *\/ */
-	/* printf("\nOutput from pathTip... :"); */
-	/* printf("\nresSize: %d \n", *resSize); */
-	
-	/* for(i=1; i<= *resSize;i++){ */
-	/* 	printf(" %d", res[i]); */
-	/* } */
-	
-} /* pathTipToRoot */
+	/* return corresponding length */
+	return(nbDD);
+} /* end findedgelength */
 
 
 
 
 
-/*
-  === FIND THE MRCA BETWEEN TWO TIPS ===
-  == for internal use only ==
-  - a and b are two tips
-  - ances and desc must be created using vecintalloc
-  - N is the number of edges
-*/
-int mrca2tips(int *ances, int*desc, int a, int b, int N){
-	int i, res, idx;
-	int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
 
-	/* allocate memory */
-	vecintalloc(&pathAroot, N);
-	vecintalloc(&pathBroot, N);
-	lengthPathA = (int *) calloc(1, sizeof(int));
-	lengthPathB = (int *) calloc(1, sizeof(int));
 
-	/* printf("\n N: %d", N); */
-	/* printf("\nEmpty res passed to pathTip...:\n"); */
-	/* for(i=1; i<= N;i++){ */
-	/* 	printf(" %d", pathAroot[i]); */
-	/* } */
-
-	/* find paths to the root */
-	pathTipToRoot(a, ances, desc, N, pathAroot, lengthPathA);
-	pathTipToRoot(b, ances, desc, N, pathBroot, lengthPathB);
-
-	/* debugging*/
-	/* printf("\n Information found within mrca2tips:\n"); */
-	/* printf("\nlengthPathA: %d \n", *lengthPathA); */
-	/* printf("\nlengthPathB: %d \n", *lengthPathB); */
-
-	/* printf("\nPath from %d to the root:\n", a); */
-	/* for(i=1; i<= *lengthPathA;i++){ */
-	/* 	printf(" %d", pathAroot[i]); */
-	/* } */
-
-	/* printf("\nPath from %d to the root\n", b); */
-	/* for(i=1; i<= *lengthPathB;i++){ */
-	/* 	printf(" %d", pathBroot[i]); */
-	/* } */
-
-	/* initialization */
-	i = 0;
-	idx = 0;
-
-	/* printf("\n - marker within mrca2tips - \n"); */
-	while(idx==0){
-		if(i == *lengthPathA){ /* that would indicate an error */
-			/* printf("\n Likely error: no MRCA found between specified tips."); */
-				/* free memory */
-			freeintvec(pathAroot);
-			freeintvec(pathBroot);
-			free(lengthPathA);
-			free(lengthPathB);
-			return(0);
-		}
-		i++;
-		idx = intAinB(pathAroot[i], pathBroot, *lengthPathB);
-		/* printf("\ni: %d    idx: %d    node: %d", i, idx, pathAroot[i]); */
-	}
-
-	/* store the result in a local variable */
-	res = pathBroot[idx];
-
-	/* free memory */
-	freeintvec(pathAroot);
-	freeintvec(pathBroot);
-	free(lengthPathA);
-	free(lengthPathB);
-
-	return(res);
-} /* end mrca */
-
-
-
-
-
-
 /*
-  === FIND SHORTEST PATH BETWEEN TWO TIPS ===
+  === DISTANCE(s) BETWEEN TWO TIPS ===
   == for internal use only ==
-  - ances and desc must be created using vecintalloc
+  - ances, desc, and brlength must be created using vecintalloc
   - N is the number of edges to represent the tree
+  - 'method' indicates the type of distance: 1) patristic 2) nNodes 3) Abouheif 4) sumDD
+  - edges are identified by their descending node
+  - for patristic distances, the set of edge used is: {output of sp2tips} U {tipA, tipB} \ {output of mrca2tips}
+  - for all others: {output of sp2tips}
 */
-void sp2tips(int *ances, int *desc, int N, int tipA, int tipB, int *res, int *resSize){
+int dist2tips(int *ances, int *desc, int *brlength, int N, int tipA, int tipB, int *res, int *resSize, int method){
 	/* declarations */
-	int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
-	int k, myMrca;
+	int *path, *lengthPath, *myMrca;
+	int i, res;
 
 
 	/* allocate memory */
-	vecintalloc(&pathAroot, N);
-	vecintalloc(&pathBroot, N);
-	lengthPathA = (int *) calloc(1, sizeof(int));
-	lengthPathB = (int *) calloc(1, sizeof(int));
+	vecintalloc(&path, N);
+	lengthPath = (int *) calloc(1, sizeof(int));
+	myMrca = (int *) calloc(1, sizeof(int));
 
 
-	/* find paths to the root */
-	pathTipToRoot(tipA, ances, desc, N, pathAroot, lengthPathA);
-	pathTipToRoot(tipB, ances, desc, N, pathBroot, lengthPathB);
+	/* find the shortest path between the two tips */
+	 sp2tips(ances, desc, N, tipA, tipB, path, lengthPath);
 
-	/* find the MRCA between both tips */
-	myMrca = mrca2tips(ances, desc, tipA, tipB, N);
 
-	/* go back the paths and stop at MRCA (exclude MRCA) */
-	/* for A */
-	k = 1;
-	*resSize = 0;
-	while(pathAroot[k] != myMrca){
-		*resSize = *resSize + 1;
-		res[*resSize] = pathAroot[k];
-		k++;
-	}
+	 /* compute the distance */
+	 switch( method )
+	 {
+	 case 1: /* patristic distance */
+		 /* find the mrca */
+		 *myMrca = 0;
+		 *myMrca = mrca2tips(ances, desc, tipA, tipB, N);
 
-	/* printf("\nsp step a:"); */
-	/* int i; */
-	/* for(i=1; i<=*resSize; i++){ */
-	/* 	printf(" %d", res[i]); */
-	/* } */
+		 /* remove mrca from the path */
+		 intANotInB(path, myMrca, lengthPath, 1, path, lengthPath);
 
-	/* for B */
-	k = 1;
-	while(pathBroot[k] != myMrca){
-		*resSize = *resSize + 1;
-		res[*resSize] = pathBroot[k];
-		k++;
-	}
+		 /* add tips to the path */
+		 *lengthPath = *lengthPath + 1;
+		 path[*lengthPath] = tipA;
+		 *lengthPath = *lengthPath + 1;
+		 path[*lengthPath] = tipB;
 
+		 /* compute length */
+		 res=0.0;
+		 for(i=1; i<=*lengthPath; i++){
+			 res += findedgelength(ances, desc, brlength, N, path[i]);
+		 }
+		 break;
 
-	/* printf("\nsp step b:"); */
-	/* for(i=1; i<=*resSize; i++){ */
-	/* 	printf(" %d", res[i]); */
-	/* } */
+	 case 2: /* number of nodes */
+		 res = double(*lengthPath);
+		 break;
 
-	/* add the MRCA */
-	*resSize = *resSize + 1;
-	res[*resSize] = myMrca;
+	 case 3: /* prod DD (Abouheif) */
+		 res=1.0;
+		 for(i=1; i<=*lengthPath; i++){
+			 res *= findNbDD(ances, desc, N, path[i]);
+		 }
+		 break;
 
-	/* printf("\nsp step mrca (%d):", myMrca); */
-	/* for(i=1; i<=*resSize; i++){ */
-	/* 	printf(" %d", res[i]); */
-	/* } */
+	 case 4: /* sum DD */
+		 res=0.0;
+		 for(i=1; i<=*lengthPath; i++){
+			 res += findNbDD(ances, desc, N, path[i]);
+		 }
+		 break;
 
+	 default :
+		 res=0.0;
+		 printf("\n\n Likely error in dist2tips: unknown method (%d):", method);
+		 break;
+	 }
 
 	/* free memory */
-	freeintvec(pathAroot);
-	freeintvec(pathBroot);
-	free(lengthPathA);
-	free(lengthPathB);
+	freeintvec(path);
+	free(lengthPath);
+	free(myMrca);
 
-} /* end sp2tips */
+	return(res)
+} /* end dist2tips */
 
 
 
@@ -367,7 +173,7 @@
 
 
 /*
-  === FIND SHORTEST PATH BETWEEN ALL PAIRS OF TIPS ===
+  === ... BETWEEN ALL PAIRS OF TIPS ===
   == for internal/external uses ==
   - all arguments are passed from R
   - N is the number of edges to represent the tree
@@ -375,7 +181,7 @@
   - resSize is the total size of the output vector; it can't be known in advance, so a fake value has to be passed
   - resId indicates how the final result should be cut
 */
-void spalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
+void distalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
 	/* declarations */
 	int i, j, k, m, idPair;
 	int *ancesLoc, *descLoc, *tempRes, *tempResSize; /* must use dynamic allocation */

Modified: pkg/src/sptips.c
===================================================================
--- pkg/src/sptips.c	2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/sptips.c	2010-03-12 14:56:18 UTC (rev 153)
@@ -77,6 +77,8 @@
 		return;
 	}
 
+	/* main code */
+	*resSize = 0;
 
 	for(i=1; i<=lengthA; i++){
 		if(intAinB(a[i], b, lengthB)==0){

Modified: pkg/src/sptips.h
===================================================================
--- pkg/src/sptips.h	2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/sptips.h	2010-03-12 14:56:18 UTC (rev 153)
@@ -1,15 +1,3 @@
-/* Notes:
-   these functions are used to find the shortest path between specified pairs of tips.
-   The algorithm proceeds as follows:
-   1) find the paths (pathA, pathB) to the root
-   2) find the MRCA, defined as the first term of pathA in pathB (same as the converse)
-   3) from A, go back to MRCA, adding crossed nodes to the result, not including the MRCA
-   4) from B, go back to MRCA, adding crossed nodes to the result, not including the MRCA
-   5) add the MRCA to the output
-   6) return the output
-*/
-
-
 #include <math.h>
 #include <time.h>
 #include <string.h>



More information about the Adephylo-commits mailing list