[Adephylo-commits] r152 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Mar 12 13:21:11 CET 2010
Author: jombart
Date: 2010-03-12 13:21:11 +0100 (Fri, 12 Mar 2010)
New Revision: 152
Added:
pkg/src/distPhylo.c
Modified:
pkg/src/sptips.c
Log:
Added copyrights n stuff.
Added: pkg/src/distPhylo.c
===================================================================
--- pkg/src/distPhylo.c (rev 0)
+++ pkg/src/distPhylo.c 2010-03-12 12:21:11 UTC (rev 152)
@@ -0,0 +1,468 @@
+/*
+ Coded by Thibaut Jombart (tjombart at imperial.ac.uk), March 2010.
+ Distributed with the adephylo package for the R software.
+ Licence: GPL >=2.
+
+ Notes:
+ these functions are used to find the shortest path between specified pairs of tips.
+ The algorithm proceeds as follows:
+ 1) find the paths (pathA, pathB) to the root
+ 2) find the MRCA, defined as the first term of pathA in pathB (same as the converse)
+ 3) from A, go back to MRCA, adding crossed nodes to the result, not including the MRCA
+ 4) from B, go back to MRCA, adding crossed nodes to the result, not including the MRCA
+ 5) add the MRCA to the output
+ 6) return the output
+*/
+
+
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <R.h>
+#include <R_ext/Utils.h>
+#include "adesub.h"
+
+
+
+/*
+ =====================
+ UTILITARY FUNCTIONS
+ =====================
+*/
+
+
+/*
+ === REPLICATE %IN% / MATCH OPERATOR FOR INTEGERS ===
+ == for internal use only ==
+ - *b has to be a vector created using vecintalloc
+ - returns 0 if there are no matches, and the index of the first match otherwise
+*/
+int intAinB(int a, int *b, int lengthB){
+ if(lengthB == 0) return(0); /* avoid comparison with empty vector */
+
+ int i=1;
+
+ /* printf("\n AinB debugging: a=%d", a); */
+ while(i <= lengthB){
+ /* printf("\t i=%d \t bi=%d ", a, b[i]); */
+
+ if(b[i]==a) {
+ return(i);
+ } else {
+ i++;
+ }
+ }
+
+ return(0);
+} /* intAinB */
+
+
+
+
+
+/*
+ === REPLICATE SETDIFF MATCH OPERATOR FOR INTEGERS ===
+ == for internal use only ==
+ - *b has to be a vector created using vecintalloc
+ - finds (possibly duplicated) elements of a not in b
+*/
+void intANotInB(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
+ int i;
+
+ /* a few checks */
+ if(lengthA==0) return;
+ if(lengthB==0){
+ *resSize = 0;
+ return;
+ }
+
+
+ for(i=1; i<=lengthA; i++){
+ if(intAinB(a[i], b, lengthB)==0){
+ *resSize = *resSize+1;
+ res[*resSize] = a[i];
+ }
+ }
+
+} /* intANotInB */
+
+
+
+
+
+
+/*
+ === UNION OF TWO INTEGER VECTORS ===
+ == for internal use only ==
+ - a, b, and res have to be created by vecintalloc
+ - returns unique(c(a,b))
+*/
+void unionInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
+ if(lengthA==0 && lengthB && 0) {
+ *res = 0;
+ *resSize = 0;
+ return;
+ }
+
+ int i, idx;
+
+ res[1] = a[1]; /* initialization of temp results */
+ *resSize = 1;
+
+ /* For a */
+ for(i=1;i<=lengthA;i++){
+ idx = intAinB(a[i], res, *resSize); /* check if element is in res */
+ if(idx==0) {
+ *resSize = *resSize + 1;
+ res[*resSize] = a[i];
+ }
+ }
+
+ /* For b */
+ for(i=1;i<=lengthB;i++){
+ idx = intAinB(b[i], res, *resSize); /* check if element is in res */
+ if(idx==0) {
+ *resSize = *resSize + 1;
+ res[*resSize] = b[i];
+ }
+ }
+} /* unionInt */
+
+
+
+
+
+
+/*
+ === INTERSECTION OF TWO INTEGER VECTORS ===
+ == for internal use only ==
+ - a, b, and res have to be created by vecintalloc
+*/
+void intersectInt(int *a, int *b, int lengthA, int lengthB, int *res, int *resSize){
+ if((lengthA * lengthB) ==0) {
+ *res = 0;
+ *resSize = 0;
+ return;
+ }
+ int i, idx;
+
+ *resSize = 0;
+
+ /* store elements of a present in b */
+ for(i=1;i<=lengthA;i++){
+ idx = intAinB(a[i], b, lengthB) * intAinB(a[i], res, *resSize); /* a in b and not already in res */
+ if(idx != 0) {
+ *resSize = *resSize + 1;
+ res[*resSize] = a[i];
+ }
+ }
+} /* intersectInt */
+
+
+
+
+
+/*
+ === FIND THE PATH FROM A TIP TO THE ROOT ===
+ == for internal use only ==
+ - ances, desc and path must have been created using vecintalloc; their indices start at 1.
+ - N is the number of edges in the tree, i.e. number of rows of $edge
+*/
+void pathTipToRoot(int tip, int *ances, int *desc, int N, int *res, int *resSize){
+ int i, curNode=0, keepOn=1, nextNodeId;
+
+ curNode = tip;
+ *resSize = 0;
+
+ /* printf("\nInput inside pathTipTo...: \n"); */
+ /* for(i=1; i<= N;i++){ */
+ /* printf(" %d", res[i]); */
+ /* } */
+
+ while(keepOn==1){
+ nextNodeId = intAinB(curNode, desc, N);
+ /* printf("\n%d in desc: %d", curNode, nextNodeId); */
+
+ if(nextNodeId > 0){
+ *resSize = *resSize + 1;
+ curNode = ances[nextNodeId];
+ res[*resSize] = curNode;
+ } else {
+ keepOn = 0;
+ }
+ }
+
+ /* /\* debugging *\/ */
+ /* printf("\nOutput from pathTip... :"); */
+ /* printf("\nresSize: %d \n", *resSize); */
+
+ /* for(i=1; i<= *resSize;i++){ */
+ /* printf(" %d", res[i]); */
+ /* } */
+
+} /* pathTipToRoot */
+
+
+
+
+
+/*
+ === FIND THE MRCA BETWEEN TWO TIPS ===
+ == for internal use only ==
+ - a and b are two tips
+ - ances and desc must be created using vecintalloc
+ - N is the number of edges
+*/
+int mrca2tips(int *ances, int*desc, int a, int b, int N){
+ int i, res, idx;
+ int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
+
+ /* allocate memory */
+ vecintalloc(&pathAroot, N);
+ vecintalloc(&pathBroot, N);
+ lengthPathA = (int *) calloc(1, sizeof(int));
+ lengthPathB = (int *) calloc(1, sizeof(int));
+
+ /* printf("\n N: %d", N); */
+ /* printf("\nEmpty res passed to pathTip...:\n"); */
+ /* for(i=1; i<= N;i++){ */
+ /* printf(" %d", pathAroot[i]); */
+ /* } */
+
+ /* find paths to the root */
+ pathTipToRoot(a, ances, desc, N, pathAroot, lengthPathA);
+ pathTipToRoot(b, ances, desc, N, pathBroot, lengthPathB);
+
+ /* debugging*/
+ /* printf("\n Information found within mrca2tips:\n"); */
+ /* printf("\nlengthPathA: %d \n", *lengthPathA); */
+ /* printf("\nlengthPathB: %d \n", *lengthPathB); */
+
+ /* printf("\nPath from %d to the root:\n", a); */
+ /* for(i=1; i<= *lengthPathA;i++){ */
+ /* printf(" %d", pathAroot[i]); */
+ /* } */
+
+ /* printf("\nPath from %d to the root\n", b); */
+ /* for(i=1; i<= *lengthPathB;i++){ */
+ /* printf(" %d", pathBroot[i]); */
+ /* } */
+
+ /* initialization */
+ i = 0;
+ idx = 0;
+
+ /* printf("\n - marker within mrca2tips - \n"); */
+ while(idx==0){
+ if(i == *lengthPathA){ /* that would indicate an error */
+ /* printf("\n Likely error: no MRCA found between specified tips."); */
+ /* free memory */
+ freeintvec(pathAroot);
+ freeintvec(pathBroot);
+ free(lengthPathA);
+ free(lengthPathB);
+ return(0);
+ }
+ i++;
+ idx = intAinB(pathAroot[i], pathBroot, *lengthPathB);
+ /* printf("\ni: %d idx: %d node: %d", i, idx, pathAroot[i]); */
+ }
+
+ /* store the result in a local variable */
+ res = pathBroot[idx];
+
+ /* free memory */
+ freeintvec(pathAroot);
+ freeintvec(pathBroot);
+ free(lengthPathA);
+ free(lengthPathB);
+
+ return(res);
+} /* end mrca */
+
+
+
+
+
+
+/*
+ === FIND SHORTEST PATH BETWEEN TWO TIPS ===
+ == for internal use only ==
+ - ances and desc must be created using vecintalloc
+ - N is the number of edges to represent the tree
+*/
+void sp2tips(int *ances, int *desc, int N, int tipA, int tipB, int *res, int *resSize){
+ /* declarations */
+ int *pathAroot, *pathBroot, *lengthPathA, *lengthPathB;
+ int k, myMrca;
+
+
+ /* allocate memory */
+ vecintalloc(&pathAroot, N);
+ vecintalloc(&pathBroot, N);
+ lengthPathA = (int *) calloc(1, sizeof(int));
+ lengthPathB = (int *) calloc(1, sizeof(int));
+
+
+ /* find paths to the root */
+ pathTipToRoot(tipA, ances, desc, N, pathAroot, lengthPathA);
+ pathTipToRoot(tipB, ances, desc, N, pathBroot, lengthPathB);
+
+ /* find the MRCA between both tips */
+ myMrca = mrca2tips(ances, desc, tipA, tipB, N);
+
+ /* go back the paths and stop at MRCA (exclude MRCA) */
+ /* for A */
+ k = 1;
+ *resSize = 0;
+ while(pathAroot[k] != myMrca){
+ *resSize = *resSize + 1;
+ res[*resSize] = pathAroot[k];
+ k++;
+ }
+
+ /* printf("\nsp step a:"); */
+ /* int i; */
+ /* for(i=1; i<=*resSize; i++){ */
+ /* printf(" %d", res[i]); */
+ /* } */
+
+ /* for B */
+ k = 1;
+ while(pathBroot[k] != myMrca){
+ *resSize = *resSize + 1;
+ res[*resSize] = pathBroot[k];
+ k++;
+ }
+
+
+ /* printf("\nsp step b:"); */
+ /* for(i=1; i<=*resSize; i++){ */
+ /* printf(" %d", res[i]); */
+ /* } */
+
+ /* add the MRCA */
+ *resSize = *resSize + 1;
+ res[*resSize] = myMrca;
+
+ /* printf("\nsp step mrca (%d):", myMrca); */
+ /* for(i=1; i<=*resSize; i++){ */
+ /* printf(" %d", res[i]); */
+ /* } */
+
+
+ /* free memory */
+ freeintvec(pathAroot);
+ freeintvec(pathBroot);
+ free(lengthPathA);
+ free(lengthPathB);
+
+} /* end sp2tips */
+
+
+
+
+
+
+
+/*
+ === FIND SHORTEST PATH BETWEEN ALL PAIRS OF TIPS ===
+ == for internal/external uses ==
+ - all arguments are passed from R
+ - N is the number of edges to represent the tree
+ - nTips is the total number of tips in the tree
+ - resSize is the total size of the output vector; it can't be known in advance, so a fake value has to be passed
+ - resId indicates how the final result should be cut
+*/
+void spalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
+ /* declarations */
+ int i, j, k, m, idPair;
+ int *ancesLoc, *descLoc, *tempRes, *tempResSize; /* must use dynamic allocation */
+
+ /* allocate memory for local variables */
+ vecintalloc(&ancesLoc, *N);
+ vecintalloc(&descLoc, *N);
+ vecintalloc(&tempRes, *N);
+ tempResSize = (int *) calloc(1, sizeof(int));
+
+
+ /* create local vectors for ancestors and descendents */
+ ancesLoc[0] = *N;
+ descLoc[0] = *N;
+ for(i=0; i< *N; i++){
+ ancesLoc[i+1] = ances[i];
+ descLoc[i+1] = desc[i];
+ }
+
+
+ /* perform computations for all pairs of tips (indexed 'i,j') */
+ *tempResSize = 0;
+ *resSize = 0;
+ m = 0; /* used to browse 'res' and 'resId' */
+ idPair = 0;
+
+ /* printf("\ngot to 1"); */
+ /* debugging*/
+/* printf("\nancesLoc:\n");
+ for(i=1; i<= *N;i++){
+ printf(" %d", ancesLoc[i]);
+ }
+
+ printf("\ndesc:\n");
+ for(i=1; i<= *N;i++){
+ printf(" %d", descLoc[i]);
+ }
+
+ printf("\nN: %d", *N);
+*/
+ for(i=0; i<=(*nTips-2); i++){
+ for(j=(i+1); j<=(*nTips-1); j++){
+ /* temp results are save in tempRes and tempResSize */
+ idPair++;
+ sp2tips(ancesLoc, descLoc, *N, i+1, j+1, tempRes, tempResSize); /* i+1 and j+1 are tips id */
+
+ /* copy temp results to returned results */
+ *resSize = *resSize + *tempResSize;
+ for(k=1; k <= *tempResSize; k++){
+ res[m] = tempRes[k];
+ resId[m] = idPair;
+ m++;
+ }
+ }
+ }
+ /* printf("\ngot to 4"); */
+
+ /* free memory */
+ freeintvec(ancesLoc);
+ freeintvec(descLoc);
+ freeintvec(tempRes);
+ free(tempResSize);
+
+} /* end sptips */
+
+
+
+
+/* TESTING */
+/*
+
+library(adephylo)
+tre=rtree(10)
+plot(tre)
+nodelabels()
+tiplabels()
+
+res <- resId <- integer(1e5)
+resSize=as.integer(1e5)
+
+# void spalltips(int *ances, int *desc, int *N, int *nTips, int *res, int *resId, int *resSize){
+
+toto <- .C("spalltips", as.integer(tre$edge[,1]), as.integer(tre$edge[,2]), nrow(tre$edge), as.integer(nTips(tre)), res, resId, resSize)
+toto[[5]] <- toto[[5]][1:toto[[7]]]
+toto[[6]] <- toto[[6]][1:toto[[7]]]
+
+res <- split(toto[[5]], toto[[6]])
+res
+
+*/
Modified: pkg/src/sptips.c
===================================================================
--- pkg/src/sptips.c 2010-03-12 12:17:26 UTC (rev 151)
+++ pkg/src/sptips.c 2010-03-12 12:21:11 UTC (rev 152)
@@ -1,4 +1,9 @@
-/* Notes:
+/*
+ Coded by Thibaut Jombart (tjombart at imperial.ac.uk), March 2010.
+ Distributed with the adephylo package for the R software.
+ Licence: GPL >=2.
+
+ Notes:
these functions are used to find the shortest path between specified pairs of tips.
The algorithm proceeds as follows:
1) find the paths (pathA, pathB) to the root
More information about the Adephylo-commits
mailing list