[Adephylo-commits] r153 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Mar 12 15:56:18 CET 2010
Author: jombart
Date: 2010-03-12 15:56:18 +0100 (Fri, 12 Mar 2010)
New Revision: 153
Modified:
pkg/src/distPhylo.c
pkg/src/sptips.c
pkg/src/sptips.h
Log:
Finished first complete version of dist2tips.
Modified: pkg/src/distPhylo.c
===================================================================
--- pkg/src/distPhylo.c 2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/distPhylo.c 2010-03-12 14:56:18 UTC (rev 153)
@@ -4,14 +4,8 @@
Licence: GPL >=2.
Notes:
- these functions are used to find the shortest path between specified pairs of tips.
- The algorithm proceeds as follows:
- 1) find the paths (pathA, pathB) to the root
- 2) find the MRCA, defined as the first term of pathA in pathB (same as the converse)
- 3) from A, go back to MRCA, adding crossed nodes to the result, not including the MRCA
- 4) from B, go back to MRCA, adding crossed nodes to the result, not including the MRCA
- 5) add the MRCA to the output
- 6) return the output
+ These functions implement several different phylogenetic distances between all pairs of tips in a phylogeny.
+ These functions require sptips.c and adesub.c.
*/
@@ -22,9 +16,9 @@
#include <R.h>
#include <R_ext/Utils.h>
#include "adesub.h"
+#include "sptips.h"
-
/*
=====================
UTILITARY FUNCTIONS
@@ -32,333 +26,145 @@
*/
-/*
- === REPLICATE %IN% / MATCH OPERATOR FOR INTEGERS ===
+/*
+ === FIND THE LENGTH OF AN EDGE ===
== for internal use only ==
- - *b has to be a vector created using vecintalloc
- - returns 0 if there are no matches, and the index of the first match otherwise
+ - the edge is identified by the descending node
+ - ances, desc, and brlength must be created using vecintalloc
+ - N is the number of edges to represent the tree
*/
-int intAinB(int a, int *b, int lengthB){
- if(lengthB == 0) return(0); /* avoid comparison with empty vector */
+double findedgelength(int *ances, int *desc, int *brlength, int N, int myNode){
+ int posi=0;
- int i=1;
- /* printf("\n AinB debugging: a=%d", a); */
- while(i <= lengthB){
- /* printf("\t i=%d \t bi=%d ", a, b[i]); */
+ /* find the edge */
+ posi = intAinB(myNode, desc, N);
- if(b[i]==a) {
- return(i);
- } else {
- i++;
- }
+ if(posi==0){
+ printf("\n Likely error in findedgelength: edge not found");
+ return(0.0);
}
- return(0);
-} /* intAinB */
+ /* return corresponding length */
+ return(brlength[posi]);
+} /* end findedgelength */
-/*
- === REPLICATE SETDIFF MATCH OPERATOR FOR INTEGERS ===
- == for internal use only ==
- - *b has to be a vector created using vecintalloc
- - finds (possibly duplicated) elements of a not in b
-*/
-void intANotInB(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
- int i;
-
- /* a few checks */
- if(lengthA==0) return;
- if(lengthB==0){
- *resSize = 0;
- return;
- }
-
-
- for(i=1; i<=lengthA; i++){
- if(intAinB(a[i], b, lengthB)==0){
- *resSize = *resSize+1;
- res[*resSize] = a[i];
- }
- }
-
-} /* intANotInB */
-
-
-
-
-
-
/*
- === UNION OF TWO INTEGER VECTORS ===
- == for internal use only ==
- - a, b, and res have to be created by vecintalloc
- - returns unique(c(a,b))
-*/
-void unionInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
- if(lengthA==0 && lengthB && 0) {
- *res = 0;
- *resSize = 0;
- return;
- }
-
- int i, idx;
-
- res[1] = a[1]; /* initialization of temp results */
- *resSize = 1;
-
- /* For a */
- for(i=1;i<=lengthA;i++){
- idx = intAinB(a[i], res, *resSize); /* check if element is in res */
- if(idx==0) {
- *resSize = *resSize + 1;
- res[*resSize] = a[i];
- }
- }
-
- /* For b */
- for(i=1;i<=lengthB;i++){
- idx = intAinB(b[i], res, *resSize); /* check if element is in res */
- if(idx==0) {
- *resSize = *resSize + 1;
- res[*resSize] = b[i];
- }
- }
-} /* unionInt */
-
-
-
-
-
-
-/*
- === INTERSECTION OF TWO INTEGER VECTORS ===
+ === FIND THE NUMBER OF DIRECT DESCENDENTS (DD) OF A NODE ===
== for internal use only ==
- - a, b, and res have to be created by vecintalloc
+ - ances, desc, and brlength must be created using vecintalloc
+ - N is the number of edges to represent the tree
*/
-void intersectInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
- if((lengthA * lengthB) ==0) {
- *res = 0;
- *resSize = 0;
- return;
- }
- int i, idx;
+int findNbDD(int *ances, int *desc, int N, int myNode){
+ int i, nbDD=0;
- *resSize = 0;
- /* store elements of a present in b */
- for(i=1;i<=lengthA;i++){
- idx = intAinB(a[i], b, lengthB) * intAinB(a[i], res, *resSize); /* a in b and not already in res */
- if(idx != 0) {
- *resSize = *resSize + 1;
- res[*resSize] = a[i];
+ /* browse the ances vector */
+ for(i=1; i<=N; i++){
+ if(ances[i] == myNode) {
+ nbDD++;
}
}
-} /* intersectInt */
-
-
-
-
-/*
- === FIND THE PATH FROM A TIP TO THE ROOT ===
- == for internal use only ==
- - ances, desc and path must have been created using vecintalloc; their indices start at 1.
- - N is the number of edges in the tree, i.e. number of rows of $edge
-*/
-void pathTipToRoot(int tip, int *ances, int *desc, int N, int *res, int *resSize){
- int i, curNode=0, keepOn=1, nextNodeId;
-
- curNode = tip;
- *resSize = 0;
-
- /* printf("\nInput inside pathTipTo...: \n"); */
- /* for(i=1; i<= N;i++){ */
- /* printf(" %d", res[i]); */
- /* } */
-
- while(keepOn==1){
- nextNodeId = intAinB(curNode, desc, N);
- /* printf("\n%d in desc: %d", curNode, nextNodeId); */
-
- if(nextNodeId > 0){
- *resSize = *resSize + 1;
- curNode = ances[nextNodeId];
- res[*resSize] = curNode;
- } else {
- keepOn = 0;
- }
+ if(nbDD==0){
+ printf("\n Likely error in findNbDD: no direct descendent found.\n");
}
- /* /\* debugging *\/ */
- /* printf("\nOutput from pathTip... :"); */
- /* printf("\nresSize: %d \n", *resSize); */
-
- /* for(i=1; i<= *resSize;i++){ */
- /* printf(" %d", res[i]); */
- /* } */
-
-} /* pathTipToRoot */
+ /* return corresponding length */
+ return(nbDD);
+} /* end findedgelength */
-/*
- === FIND THE MRCA BETWEEN TWO TIPS ===
- == for internal use only ==
- - a and b are two tips
- - ances and desc must be created using vecintalloc
- - N is the number of edges
-*/
-int mrca2tips(int *ances, int*desc, int a, int b, int N){
- int i, res, idx;
- int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
- /* allocate memory */
- vecintalloc(&pathAroot, N);
- vecintalloc(&pathBroot, N);
- lengthPathA = (int *) calloc(1, sizeof(int));
- lengthPathB = (int *) calloc(1, sizeof(int));
- /* printf("\n N: %d", N); */
- /* printf("\nEmpty res passed to pathTip...:\n"); */
- /* for(i=1; i<= N;i++){ */
- /* printf(" %d", pathAroot[i]); */
- /* } */
-
- /* find paths to the root */
- pathTipToRoot(a, ances, desc, N, pathAroot, lengthPathA);
- pathTipToRoot(b, ances, desc, N, pathBroot, lengthPathB);
-
- /* debugging*/
- /* printf("\n Information found within mrca2tips:\n"); */
- /* printf("\nlengthPathA: %d \n", *lengthPathA); */
- /* printf("\nlengthPathB: %d \n", *lengthPathB); */
-
- /* printf("\nPath from %d to the root:\n", a); */
- /* for(i=1; i<= *lengthPathA;i++){ */
- /* printf(" %d", pathAroot[i]); */
- /* } */
-
- /* printf("\nPath from %d to the root\n", b); */
- /* for(i=1; i<= *lengthPathB;i++){ */
- /* printf(" %d", pathBroot[i]); */
- /* } */
-
- /* initialization */
- i = 0;
- idx = 0;
-
- /* printf("\n - marker within mrca2tips - \n"); */
- while(idx==0){
- if(i == *lengthPathA){ /* that would indicate an error */
- /* printf("\n Likely error: no MRCA found between specified tips."); */
- /* free memory */
- freeintvec(pathAroot);
- freeintvec(pathBroot);
- free(lengthPathA);
- free(lengthPathB);
- return(0);
- }
- i++;
- idx = intAinB(pathAroot[i], pathBroot, *lengthPathB);
- /* printf("\ni: %d idx: %d node: %d", i, idx, pathAroot[i]); */
- }
-
- /* store the result in a local variable */
- res = pathBroot[idx];
-
- /* free memory */
- freeintvec(pathAroot);
- freeintvec(pathBroot);
- free(lengthPathA);
- free(lengthPathB);
-
- return(res);
-} /* end mrca */
-
-
-
-
-
-
/*
- === FIND SHORTEST PATH BETWEEN TWO TIPS ===
+ === DISTANCE(s) BETWEEN TWO TIPS ===
== for internal use only ==
- - ances and desc must be created using vecintalloc
+ - ances, desc, and brlength must be created using vecintalloc
- N is the number of edges to represent the tree
+ - 'method' indicates the type of distance: 1) patristic 2) nNodes 3) Abouheif 4) sumDD
+ - edges are identified by their descending node
+ - for patristic distances, the set of edge used is: {output of sp2tips} U {tipA, tipB} \ {output of mrca2tips}
+ - for all others: {output of sp2tips}
*/
-void sp2tips(int *ances, int *desc, int N, int tipA, int tipB, int *res, int *resSize){
+int dist2tips(int *ances, int *desc, int *brlength, int N, int tipA, int tipB, int *res, int *resSize, int method){
/* declarations */
- int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
- int k, myMrca;
+ int *path, *lengthPath, *myMrca;
+ int i, res;
/* allocate memory */
- vecintalloc(&pathAroot, N);
- vecintalloc(&pathBroot, N);
- lengthPathA = (int *) calloc(1, sizeof(int));
- lengthPathB = (int *) calloc(1, sizeof(int));
+ vecintalloc(&path, N);
+ lengthPath = (int *) calloc(1, sizeof(int));
+ myMrca = (int *) calloc(1, sizeof(int));
- /* find paths to the root */
- pathTipToRoot(tipA, ances, desc, N, pathAroot, lengthPathA);
- pathTipToRoot(tipB, ances, desc, N, pathBroot, lengthPathB);
+ /* find the shortest path between the two tips */
+ sp2tips(ances, desc, N, tipA, tipB, path, lengthPath);
- /* find the MRCA between both tips */
- myMrca = mrca2tips(ances, desc, tipA, tipB, N);
- /* go back the paths and stop at MRCA (exclude MRCA) */
- /* for A */
- k = 1;
- *resSize = 0;
- while(pathAroot[k] != myMrca){
- *resSize = *resSize + 1;
- res[*resSize] = pathAroot[k];
- k++;
- }
+ /* compute the distance */
+ switch( method )
+ {
+ case 1: /* patristic distance */
+ /* find the mrca */
+ *myMrca = 0;
+ *myMrca = mrca2tips(ances, desc, tipA, tipB, N);
- /* printf("\nsp step a:"); */
- /* int i; */
- /* for(i=1; i<=*resSize; i++){ */
- /* printf(" %d", res[i]); */
- /* } */
+ /* remove mrca from the path */
+ intANotInB(path, myMrca, lengthPath, 1, path, lengthPath);
- /* for B */
- k = 1;
- while(pathBroot[k] != myMrca){
- *resSize = *resSize + 1;
- res[*resSize] = pathBroot[k];
- k++;
- }
+ /* add tips to the path */
+ *lengthPath = *lengthPath + 1;
+ path[*lengthPath] = tipA;
+ *lengthPath = *lengthPath + 1;
+ path[*lengthPath] = tipB;
+ /* compute length */
+ res=0.0;
+ for(i=1; i<=*lengthPath; i++){
+ res += findedgelength(ances, desc, brlength, N, path[i]);
+ }
+ break;
- /* printf("\nsp step b:"); */
- /* for(i=1; i<=*resSize; i++){ */
- /* printf(" %d", res[i]); */
- /* } */
+ case 2: /* number of nodes */
+ res = double(*lengthPath);
+ break;
- /* add the MRCA */
- *resSize = *resSize + 1;
- res[*resSize] = myMrca;
+ case 3: /* prod DD (Abouheif) */
+ res=1.0;
+ for(i=1; i<=*lengthPath; i++){
+ res *= findNbDD(ances, desc, N, path[i]);
+ }
+ break;
- /* printf("\nsp step mrca (%d):", myMrca); */
- /* for(i=1; i<=*resSize; i++){ */
- /* printf(" %d", res[i]); */
- /* } */
+ case 4: /* sum DD */
+ res=0.0;
+ for(i=1; i<=*lengthPath; i++){
+ res += findNbDD(ances, desc, N, path[i]);
+ }
+ break;
+ default :
+ res=0.0;
+ printf("\n\n Likely error in dist2tips: unknown method (%d):", method);
+ break;
+ }
/* free memory */
- freeintvec(pathAroot);
- freeintvec(pathBroot);
- free(lengthPathA);
- free(lengthPathB);
+ freeintvec(path);
+ free(lengthPath);
+ free(myMrca);
-} /* end sp2tips */
+ return(res)
+} /* end dist2tips */
@@ -367,7 +173,7 @@
/*
- === FIND SHORTEST PATH BETWEEN ALL PAIRS OF TIPS ===
+ === ... BETWEEN ALL PAIRS OF TIPS ===
== for internal/external uses ==
- all arguments are passed from R
- N is the number of edges to represent the tree
@@ -375,7 +181,7 @@
- resSize is the total size of the output vector; it can't be known in advance, so a fake value has to be passed
- resId indicates how the final result should be cut
*/
-void spalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
+void distalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
/* declarations */
int i, j, k, m, idPair;
int *ancesLoc, *descLoc, *tempRes, *tempResSize; /* must use dynamic allocation */
Modified: pkg/src/sptips.c
===================================================================
--- pkg/src/sptips.c 2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/sptips.c 2010-03-12 14:56:18 UTC (rev 153)
@@ -77,6 +77,8 @@
return;
}
+ /* main code */
+ *resSize = 0;
for(i=1; i<=lengthA; i++){
if(intAinB(a[i], b, lengthB)==0){
Modified: pkg/src/sptips.h
===================================================================
--- pkg/src/sptips.h 2010-03-12 12:21:11 UTC (rev 152)
+++ pkg/src/sptips.h 2010-03-12 14:56:18 UTC (rev 153)
@@ -1,15 +1,3 @@
-/* Notes:
- these functions are used to find the shortest path between specified pairs of tips.
- The algorithm proceeds as follows:
- 1) find the paths (pathA, pathB) to the root
- 2) find the MRCA, defined as the first term of pathA in pathB (same as the converse)
- 3) from A, go back to MRCA, adding crossed nodes to the result, not including the MRCA
- 4) from B, go back to MRCA, adding crossed nodes to the result, not including the MRCA
- 5) add the MRCA to the output
- 6) return the output
-*/
-
-
#include <math.h>
#include <time.h>
#include <string.h>
More information about the Adephylo-commits
mailing list