[Seqinr-commits] r1831 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Dec 2 17:34:12 CET 2014


Author: simonpenel
Date: 2014-12-02 17:34:12 +0100 (Tue, 02 Dec 2014)
New Revision: 1831

Added:
   pkg/src/Makevars
   pkg/src/Makevars.win
   pkg/src/alignment.c
   pkg/src/alignment.h
   pkg/src/fastacc.c
   pkg/src/getzlibsock.c
   pkg/src/kaks.c
   pkg/src/util.c
   pkg/src/zsockr.c
Log:
Removal of zlib code and headers  with help from Prof. Ripley


Added: pkg/src/Makevars
===================================================================
--- pkg/src/Makevars	                        (rev 0)
+++ pkg/src/Makevars	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,2 @@
+PKG_CFLAGS =  -DUSE_TYPE_CHECKING_STRICT
+PKG_LIBS = -lz

Added: pkg/src/Makevars.win
===================================================================
--- pkg/src/Makevars.win	                        (rev 0)
+++ pkg/src/Makevars.win	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,2 @@
+PKG_CFLAGS = -DUSE_TYPE_CHECKING_STRICT
+# PKG_LIBS = -lz -lws2_32 -mwindows

Added: pkg/src/alignment.c
===================================================================
--- pkg/src/alignment.c	                        (rev 0)
+++ pkg/src/alignment.c	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,906 @@
+#include "alignment.h"
+
+
+void rem_blank(char *string)
+{
+  int ii;
+
+
+  ii = strlen(string);
+
+  for( ;ii >=0; ii--) {
+    if(string[ii] == 0 || string[ii] == '\n' ||
+       string[ii] == ' ' || string[ii] == '\t') string[ii] = 0;
+    else break;
+  }
+  
+
+}
+/**************************** end rem_blank ************************/
+
+void free_mase(struct SEQMASE * aln, int nbsq)
+     
+{
+  int ii;
+
+
+  for(ii = 0; ii <= nbsq; ii++) {
+    free(aln[ii].seq);
+    free(aln[ii].com);
+  }
+  
+  free((char *) aln);
+  
+  
+}
+
+/******************************** end free_mase ************************/
+
+
+int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments)
+{
+  static int max_count;
+  char **seq, **seqname, **comments;
+
+  if(count1 == -1) max_count = 0;
+  
+  if(count1 + 1 < max_count) return count1 + 1;
+
+  count1++;
+  if(max_count == 0) {
+    max_count = 100;
+    seq = (char **)malloc(max_count * sizeof(char *));
+    if(seq == NULL) return -1;
+    seqname = (char **)malloc(max_count * sizeof(char *));
+    if(seqname == NULL) return -1;
+    comments = (char **)malloc(max_count * sizeof(char *));
+    if(comments == NULL) return -1;
+  }
+  else {
+    seq = *pseq; seqname = *pseqname; comments = *pcomments;
+    max_count = 3 * max_count;
+    seq = (char **)realloc(seq, max_count * sizeof(char *));
+    if(seq == NULL) return -1;
+    seqname = (char **)realloc(seqname, max_count * sizeof(char *));
+    if(seqname == NULL) return -1;
+    comments = (char **)realloc(comments, max_count * sizeof(char *));
+    if(comments == NULL) return -1;
+  }
+  
+  *pseq = seq; *pseqname = seqname; *pcomments = comments;
+  return count1;
+}
+
+/******************************** end one_more_seq_found ************************/
+
+/***********************************************************************************************************************/
+/*  lit un fichier MASE, renvoie une liste (objet R) contenant les séquences, les commentaies et les noms des espèces. */
+/***********************************************************************************************************************/
+
+
+SEXP read_mase(SEXP nomfic)
+{
+  char *fic_name;
+  FILE *fic;
+  struct SEQMASE *aln;
+  int nb_seq;
+  int lg_max = 0, lg, lgs, lgc;
+  char string[MAXSTRING + 1];
+  char c1, c2;
+  int i,ii, jj, kk = 0, numline, maxcom = 0;
+ 
+  SEXP listseq;
+  SEXP essai;
+  SEXP listcom;
+  SEXP listmn;
+  SEXP nombreseq;
+ 
+
+  /*Passages des objets R (paramètres) dans des variables C */ 
+  fic_name = (char *) CHAR(STRING_ELT(nomfic, 0)); 
+
+
+
+  if((fic = fopen(fic_name, "r")) == NULL) {
+    error("Can't open file");
+  }
+  
+  c1 = 0;
+  nb_seq = 0;
+  lg = lgc = 0;
+  while(fgets(string, MAXSTRING, fic) != NULL) {
+    string[MAXSTRING] = 0;
+   
+    lgs = strlen(string);
+    
+    if(lgs >= (MAXSTRING - 1)) {
+      REprintf("\n Fatal Error. Too long line in alignment (> %d).\n", MAXSTRING);
+      REprintf("Increase MAXSTRING and recompile.\n"); 
+    }
+    
+    c2 = string[0];
+    
+    if(string[0] == ';' && string[1] != ';') {
+      lgc += (lgs + 1);
+    }
+    
+    
+    if(c1 == ';' && c2 != c1) {
+      nb_seq++;
+     if(lg > lg_max) lg_max = lg;
+     if(lgc > maxcom) maxcom = lgc;
+     lg = lgc = 0;
+    }
+    
+    else if(c2 != ';') lg += lgs;
+    c1 = c2;
+    
+  }
+  if(lg > lg_max) lg_max = lg;
+  
+
+  /******************************************/
+  /* Création de 6 objets R qui seront      */
+  /******************************************/
+
+  PROTECT(listseq=allocVector(VECSXP,nb_seq));
+  PROTECT(essai=allocVector(VECSXP,5));
+  PROTECT(listcom=allocVector(VECSXP,nb_seq));
+  PROTECT(listmn=allocVector(VECSXP,nb_seq));
+  PROTECT(nombreseq=NEW_INTEGER(1));
+ 
+ 
+ 
+  aln = (struct SEQMASE *) calloc(nb_seq + 1, sizeof(struct SEQMASE));
+
+ 
+  for(ii = 0; ii <= nb_seq; ii++) {
+    aln[ii].seq = (char *) calloc(lg_max + 1, sizeof(char));
+    aln[ii].com = (char *) calloc(maxcom + 1, sizeof(char));
+    aln[ii].com[0] = 0;
+ }
+ 
+
+ rewind(fic);
+
+ numline = 0;
+ ii = -1;
+ while(fgets(string, MAXSTRING, fic) != NULL) {
+   numline++;
+   string[MAXSTRING] = 0;
+   if ((string[0] != ';') && (numline == 1))
+   	{
+	error("Not a MASE file"); /* check format, thanks to J.H. Troesemeier */
+	goto fini;
+	}
+   
+   c2 = string[0];
+   
+   if(string[0] == ';' && string[1] != ';') {
+     strcat(aln[ii + 1].com, string);
+        }
+   
+   if(c1 == ';' && c2 != c1) {
+     ii++;
+     kk = aln[ii].lg = 0;
+	
+     rem_blank(string);
+
+     if((int) strlen(string) >= (MAXMNMASE - 1)) {
+       REprintf("Error. Maximum sequance name is   %d characters\n", MAXMNMASE);
+       error("sequence name too long!");
+     } 
+     
+     strcpy(aln[ii].mn, string);
+     
+     lg = 0;
+   }
+   
+   else if(c2 != ';') {
+     for(jj = 0; jj < MAXSTRING; jj++) {
+       if(string[jj] == 0) break;
+       if(string[jj] == ' ') continue;
+       if(string[jj] == '\n') continue;
+       if(string[jj] == '\t') continue;
+            aln[ii].seq[kk++] = string[jj];
+            aln[ii].lg = kk;
+     }
+   }
+   c1 = c2;
+   
+ }
+ 
+
+ fclose(fic);
+ 
+ lg_max = aln[0].lg;
+
+ for(ii = 1; ii < nb_seq; ii++)
+   if(aln[ii].lg > lg_max) lg_max = aln[ii].lg;
+ 
+ INTEGER(nombreseq)[0]=(int)nb_seq;
+ 
+
+ for(i=0;i<nb_seq;i++){
+   SET_ELEMENT(listseq,i,mkChar(aln[i].seq));
+ }
+
+ for(i=0;i<nb_seq;i++){
+   SET_ELEMENT(listcom,i,mkChar(aln[i].com));
+ }
+
+for(i=0;i<nb_seq;i++){
+   SET_ELEMENT(listmn,i,mkChar(aln[i].mn));
+ }
+ 
+ SET_ELEMENT(essai,0,nombreseq);
+ SET_ELEMENT(essai,1,listmn);
+ SET_ELEMENT(essai,2,listseq);
+ SET_ELEMENT(essai,3,listcom);
+ 
+ fini: 
+ free_mase(aln,nb_seq);
+ UNPROTECT(5);
+ 
+ return(essai);
+
+}
+
+
+/********************** end read_mase ****************************/
+
+  
+
+
+/*************************************************************************/
+/* Compute distance between two aligned sequences using different matrix */
+/*************************************************************************/
+
+
+SEXP distance(SEXP sequences,SEXP nbseq, SEXP matNumber, SEXP seqtype, SEXP gapoption){
+
+  SEXP d;
+  int MAXNSEQS;
+  char **seq;
+  int gap_option;
+  int i, j, k, n,totseqs, seq_long, nbases;
+  int mat_number, seq_type;
+  int **ndiff;
+  double **dist;
+
+  int mat_pos[]  = { 17, -1, 15, 0, 1, 12, 18,  4,  9, -1,  2, 10, 16, 5, -1, 19,  6, 3, 7, 8, -1, 11, 13, -1, 14, -1 };
+
+  const char DNA[]  = "ACGTXN-";
+  const char Prot[] = "DEKRHNQSTILVFWYCMAGPX*-";
+  int matp[20][20] = { {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1 },
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 2/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1, 1, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
+		       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3} };
+  
+
+ 
+  
+  MAXNSEQS = INTEGER_VALUE(nbseq);
+  totseqs = INTEGER_VALUE(nbseq);
+  mat_number= INTEGER_VALUE(matNumber);
+  seq_type = INTEGER_VALUE(seqtype);
+  gap_option =  INTEGER_VALUE(gapoption);
+
+  PROTECT(d=NEW_NUMERIC(totseqs*totseqs));
+
+  seq = (char **)malloc(totseqs*sizeof(char *));
+   
+  for(i=0;i<totseqs;i++){
+    seq[i] = (char *) CHAR(STRING_ELT(sequences,i));
+  }
+
+  ndiff = (int **)malloc(MAXNSEQS*sizeof(int *));
+  
+  for(j=0; j < MAXNSEQS; j++){ 
+    ndiff[j] = (int *)malloc(MAXNSEQS*sizeof(int));
+  }
+  
+  dist = (double **)malloc(MAXNSEQS*sizeof(double *));
+  
+  for(j=0; j < MAXNSEQS; j++){ 
+    dist[j] = (double *)malloc(MAXNSEQS*sizeof(double));
+  }
+  
+  
+  
+  /*********************************************************/
+  /*     Computing distance between sequences i and j      */
+  /*********************************************************/
+  
+    seq_long = (int)strlen(seq[0]);
+    
+    for (i = 0; i < totseqs; i++)
+      {
+        dist[i][i] = 0.0;
+      }
+
+    for (i = 0; i < totseqs; i++)
+    {
+      for (j = 0; j < i; j++)
+        {
+	  ndiff[j][i] = ndiff[i][j] = 0;
+	  nbases = 0;
+	  for (k = 0; k < seq_long; k++)
+            {
+	      
+	      if(seq_type == 1){
+	 
+		/***************************/
+                /* DNA/RNA sequences       */
+		/***************************/
+		if ((strchr(DNA, seq[i][k]) == NULL) || (strchr(DNA, seq[j][k]) == NULL))
+		  seq[i][k] = seq[j][k] = 'X';
+		if (seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X' &&
+		    seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
+		  {
+		    nbases++;
+		    if (seq[i][k] != seq[j][k])
+		      {
+			ndiff[j][i]++;
+			ndiff[i][j]++;
+		      }
+		  }
+		  
+		  if (gap_option == 1)
+		  	{
+			if (seq[i][k] == '-'  && seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
+				{
+				nbases++;
+				ndiff[j][i]++;
+				ndiff[i][j]++;
+				}
+			if (seq[j][k] == '-'  && seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X')
+				{
+				nbases++;
+				ndiff[j][i]++;
+				ndiff[i][j]++;
+				}
+			
+			}
+	      }
+
+	      else if(mat_number == 1 && seq_type == 0){
+
+		/********************************************/
+		/*Protein sequences with similarity matrix  */
+		/********************************************/
+
+		  if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
+		    seq[i][k] = seq[j][k] = 'X';
+		  if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
+		      seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
+                    {
+		      nbases++;
+		      ndiff[i][j] = ndiff[i][j] + matp[mat_pos[seq[i][k] - 65]][mat_pos[seq[j][k] - 65]];
+		      ndiff[j][i] = ndiff[i][j];
+                
+		    }
+	      }
+
+	      else if( mat_number == 2 && seq_type == 0){
+
+		/********************************************/
+		/*  Protein sequences with identity matrix  */
+		/********************************************/
+
+		if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
+		  seq[i][k] = seq[j][k] = 'X';
+		if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
+		      seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
+		  {
+		      nbases++;
+		      if (seq[i][k] != seq[j][k])
+                        {
+			  ndiff[j][i]++;
+			  ndiff[i][j]++;
+                        }
+		  }	
+		
+	      }
+	      
+	      dist[i][j] = dist[j][i] = sqrt((double)ndiff[i][j]/nbases);
+	    }
+	}
+    }
+    
+    
+    /********************************************************************************/
+    /* Remplissage de l'objet R (matrice de taille nb_seq * nb_seq  avec dist       */
+    /********************************************************************************/
+
+    n=0;
+    
+    for(i=0;i<totseqs;i++){
+      for(j=0;j<totseqs;j++){
+	REAL(d)[n+j]=dist[i][j];
+      }
+      n=n+totseqs;
+    }
+
+    UNPROTECT(1);
+    
+    return(d);
+}
+
+/****************************************/
+/* Lecture  d'un fichier au format msf */
+/***************************************/
+
+SEXP read_msf_align(SEXP ficname)
+{
+
+  SEXP list;
+  SEXP listseq;
+  SEXP listname;
+  SEXP nombreseq;
+  char *fname;
+ FILE *in; 
+ char line[100], *p, *q;
+ int i,l, curr_spec, maxwidname=0, curr_len, tot_spec, wid_1_line, wid_block;
+ char **seq, **seqname, **comments;
+ 
+ fname = (char *) CHAR(STRING_ELT(ficname,0)); 
+
+ PROTECT(nombreseq=NEW_INTEGER(1));
+  PROTECT(list=allocVector(VECSXP,3));
+
+ in=fopen(fname,"r");
+ if(in==NULL) {
+   error("File not found");
+ }
+ 
+ /* compter le nbre de seqs dans le fichier */
+ tot_spec = 0;
+ while(fgets(line, sizeof(line), in) != NULL) {
+   if(strncmp(line, "//", 2) == 0) break;
+   if(strncmp(line, " Name: ", 7) == 0) tot_spec++;
+ }
+ rewind(in);
+ 
+ INTEGER(nombreseq)[0]=tot_spec;
+ 
+ PROTECT(listname=allocVector(VECSXP,tot_spec));
+ PROTECT(listseq=allocVector(VECSXP,tot_spec));
+   
+   seq = (char **)malloc(tot_spec * sizeof(char *));
+   if(seq == NULL) goto nomem;
+   comments = (char **)malloc(tot_spec * sizeof(char *));
+   if(comments == NULL) goto nomem;
+   seqname = (char **)malloc(tot_spec * sizeof(char *));
+   if(seqname == NULL) goto nomem;
+   
+   p = NULL;
+   while( fgets(line,sizeof(line),in) != NULL) {
+     if( (p = strstr(line, "MSF: ")) != NULL) break;
+   }
+   if(p == NULL) {
+     error("File not in MSF format!");
+     tot_spec = -1; goto fini;
+   }
+   tot_spec = -1;
+   do	{
+     fgets(line,sizeof(line),in);
+     if( (p = strstr(line, "Name:") ) == NULL) continue;
+     tot_spec++;
+     q = strstr(p, " Len: "); 
+     sscanf(q + 5, "%d", &l);
+     seq[tot_spec] = (char *)malloc(l + 1);
+     if(seq[tot_spec]==NULL) goto nomem;
+     p += 5; while(*p == ' ') p++;
+     q = p; while(*q != ' ') q++;
+     l = q - p;
+     seqname[tot_spec] = (char *)malloc(l + 1);
+     if(seqname[tot_spec]==NULL) goto nomem;
+     memcpy(seqname[tot_spec], p, l); seqname[tot_spec][l] = 0;
+     if(l > maxwidname) maxwidname = l;
+     comments[tot_spec] = NULL;
+   }
+   while(strncmp(line, "//", 2) != 0);
+   curr_spec = 0; curr_len = 0; wid_block = 0;
+   while( fgets(line, sizeof(line), in) != NULL ) {
+     p = line; while(*p == ' ') p++;
+     l = strlen(seqname[curr_spec]);
+     if(strncmp(p, seqname[curr_spec], l) != 0) continue;
+     p += l; while(*p == ' ') p++; p--;
+     q = seq[curr_spec] + curr_len;
+     while( *(++p) != '\n') {
+       if( *p == ' ') continue;
+       if(*p == '.') *p = '-';
+       *(q++) = *p;
+     }
+     *q = 0;
+     wid_1_line = q - (seq[curr_spec] + curr_len);
+     wid_block = (wid_1_line > wid_block ? wid_1_line : wid_block);
+     if(curr_spec == tot_spec) {
+       curr_len += wid_block;
+       curr_spec = 0;
+       wid_block = 0;
+     }
+     else	curr_spec++;
+   }
+   
+   for(i=0; i<tot_spec+1; i++) {
+     SET_ELEMENT(listname,i,mkChar(seqname[i]));
+     SET_ELEMENT(listseq,i,mkChar(seq[i]));
+   }
+   
+   SET_ELEMENT(list,0,nombreseq);
+   SET_ELEMENT(list,1,listname);
+   SET_ELEMENT(list,2,listseq);
+   
+   
+ fini:
+   fclose(in);
+   UNPROTECT(4);
+   return list;
+ nomem:
+   error("Not enough memory!");
+   tot_spec = -1;
+   goto fini;
+}
+
+
+/******************************************/
+/* Lecture  d'un fichier au format phylip */
+/******************************************/
+
+
+SEXP read_phylip_align(SEXP ficname)
+{
+
+  SEXP list;
+  SEXP listseq;
+  SEXP listname;
+  SEXP nombreseq;
+  char *fname;
+  FILE *in;
+  char *p, *q, line[PHYNAME + 200];
+  char **seq, **comments, **seqname;
+  int totseqs, lenseqs, i, l;
+  
+  q=0;
+  fname = (char *) CHAR(STRING_ELT(ficname,0)); 
+  
+  PROTECT(nombreseq=NEW_INTEGER(1));
+  PROTECT(list=allocVector(VECSXP,3));
+  
+  
+  in=fopen(fname,"r");
+  if(in==NULL) {
+    error("file not found");
+  }
+  fgets(line,sizeof(line),in);
+  if( sscanf(line, "%d%d", &totseqs, &lenseqs) != 2) {
+    error("Not a PHYLIP file");
+    totseqs = 0;
+    goto fini;
+  }
+
+  INTEGER(nombreseq)[0]=totseqs;
+
+  PROTECT(listname=allocVector(VECSXP,totseqs));
+  PROTECT(listseq=allocVector(VECSXP,totseqs));
+  
+  seq = (char **)malloc(totseqs * sizeof(char *));
+  if(seq == NULL) goto nomem;
+  seqname = (char **)malloc(totseqs * sizeof(char *));
+  if(seqname == NULL) goto nomem;
+  comments = (char **)malloc(totseqs * sizeof(char *));
+  if(comments == NULL) goto nomem;
+  for(i=0; i<totseqs; i++) {
+    if( (seq[i] = (char *)malloc(lenseqs+1) ) == NULL ) goto nomem;
+    if( (seqname[i] = (char *)malloc(PHYNAME+1) ) == NULL ) goto nomem;
+    comments[i] = NULL;
+  }
+  for(i=0; i<totseqs; i++) {
+    fgets(line,sizeof(line),in);
+    memcpy(seqname[i],line,PHYNAME); seqname[i][PHYNAME] = 0;
+    p = line+PHYNAME; q = seq[i];
+    while(*p != '\n') {
+      if(*p != ' ') *(q++) = *p;
+      p++;
+    }
+  }
+  l = q - seq[totseqs - 1];
+  while( l < lenseqs) {
+    fgets(line,sizeof(line),in);
+    for(i=0; i<totseqs; i++) {
+      fgets(line,sizeof(line),in);
+      p = line; q = seq[i] + l;
+      while(*p != '\n') {
+	if(*p != ' ') *(q++) = *p;
+	p++;
+      }
+    }
+    l = q - seq[totseqs - 1];
+  }
+  for(i=0; i<totseqs; i++) seq[i][l] = 0;
+
+
+
+  for(i=0; i<totseqs; i++) {
+    SET_ELEMENT(listname,i,mkChar(seqname[i]));
+    SET_ELEMENT(listseq,i,mkChar(seq[i]));
+  }
+
+  SET_ELEMENT(list,0,nombreseq);
+  SET_ELEMENT(list,1,listname);
+  SET_ELEMENT(list,2,listseq);
+
+
+ fini:
+ fclose(in);
+ UNPROTECT(4);
+ return list;
+ nomem:
+ error("Not enough memory!");
+ totseqs = 0;
+ goto fini;
+}
+
+
+
+/*************************************/
+/* Reading alignment in fasta format */
+/*************************************/
+
+SEXP read_fasta_align(SEXP ficname)
+{
+  SEXP list;
+  SEXP listseq;
+  SEXP listname;
+  SEXP nombreseq;
+  char *fname;
+  FILE *in;
+  int totseqs, lseq, l2, l, lenseqs;
+  char line[200], *p, *i;
+  char **seq, **seqname, **comments;
+
+  fname = (char *) CHAR(STRING_ELT(ficname, 0)); 
+
+  PROTECT(nombreseq = NEW_INTEGER(1));
+  PROTECT(list = allocVector(VECSXP, 3));
+
+  /* Check that the file is available */
+
+  if((in = fopen(fname, "r")) == NULL) error("File not found");
+  
+  /* How many sequences are in the file ? */
+
+  totseqs = 0;
+  while(fgets(line, sizeof(line), in) != NULL)
+    if(*line == '>') totseqs++;
+  rewind(in);
+
+  /* R objects creation */  
+  
+  INTEGER(nombreseq)[0] = totseqs;
+  PROTECT(listname = allocVector(VECSXP, totseqs));
+  PROTECT(listseq = allocVector(VECSXP, totseqs));
+
+  /* Memory allocation */
+
+  seq = (char **) R_alloc(totseqs, sizeof(char *));
+  comments = (char **) R_alloc(totseqs, sizeof(char *));
+  seqname = (char **) R_alloc(totseqs, sizeof(char *));
+ 
+  lenseqs = MAXLENSEQ;
+  totseqs = -1;
+  i = fgets(line, sizeof(line), in);
+  if(line[0] != '>')
+    error("File not in Fasta format!\n");
+
+  /* Main loop to read line by line the file */
+
+  while( i != NULL ){
+    totseqs++;
+    comments[totseqs] = NULL;
+    p = line + 1; 
+    while(*p != '\n')  
+      p++;
+    l = p - line - 1;
+
+    seqname[totseqs] = (char *) R_alloc(l + 1, sizeof(char));
+
+    memcpy(seqname[totseqs], line + 1, l); 
+    seqname[totseqs][l] = '\0';	
+    SET_ELEMENT(listname, totseqs, mkChar(seqname[totseqs]));
+
+    seq[totseqs] = (char *) R_alloc(lenseqs + 1, sizeof(char));
+    lseq = 0;
+
+    while( (i = fgets(line, sizeof(line), in)) !=  NULL && *i != '>' ) {
+      l2 = strlen(line);
+      if( line[l2 - 1] == '\n' ) l2--;
+      while(l2 > 0 && line[l2 - 1] == ' ')
+        l2--;
+      if(lseq + l2 > lenseqs) {
+        char *temp;
+	lenseqs += MAXLENSEQ;
+	temp = R_alloc(lenseqs + 1, sizeof(char));
+	memcpy(temp, seq[totseqs], lseq);
+	seq[totseqs] = temp;
+      }
+      memcpy(seq[totseqs] + lseq, line, l2);
+      lseq += l2;
+    }
+    seq[totseqs][lseq] = '\0';
+    SET_ELEMENT(listseq, totseqs, mkChar(seq[totseqs]));
+  }
+
+  SET_ELEMENT(list, 0, nombreseq);
+  SET_ELEMENT(list, 1, listname);
+  SET_ELEMENT(list, 2, listseq);
+ 
+  fclose(in);
+  UNPROTECT(4);
+  return list;
+}
+
+
+
+/*******************************************/
+/* Lecture  d'un fichier au format clustal */
+/*******************************************/
+
+
+
+SEXP read_clustal_align(SEXP ficname)
+{
+
+  SEXP list;
+  SEXP listseq;
+  SEXP listname;
+  SEXP nombreseq;
+  char *fname;
+  FILE *in;
+  char line[200], *p;
+  int i, l = 0, curr_spec, first=TRUEL, curr_len, next_len, tot_spec, curr_max_len =0, carac, wid_name = 0;
+  char **seq, **comments, **seqname = NULL;
+  
+
+  fname = (char *) CHAR(STRING_ELT(ficname,0)); 
+  
+  PROTECT(nombreseq=NEW_INTEGER(1));
+  PROTECT(list=allocVector(VECSXP,3));
+ 
+  in=fopen(fname,"r");
+
+  if(in==NULL) {
+    error("file not found");
+    return 0;
+  }
+
+  fgets(line,sizeof(line),in);
+  if(strncmp(line,"CLUSTAL",7) != 0) { /* skip 1st line with CLUSTAL in it */
+    error("File not in CLUSTAL format!");
+    tot_spec = -1; goto fini;
+  }
+ 
+  /* skip next empty lines */
+  do	{
+    carac = getc(in);
+    if(carac == ' ') {
+      fgets(line,sizeof(line),in);
+      carac = getc(in);
+    }
+  }
+  while(carac == '\n' || carac == '\r');
+  ungetc(carac, in); /* back to start of 1st non-empty line */
+  tot_spec = curr_spec = -1; curr_len = next_len = 0;
+  while( fgets(line, sizeof(line), in) != NULL ) {
+    if(*line == '\n' || *line == ' ') {
+      curr_spec = -1;
+      curr_len = next_len;
+      first = FALSE;
+      continue;
+    }
+    
+    else if(tot_spec >= 0 && curr_spec == -1 &&
+	    strncmp(line, seqname[0], strlen(seqname[0]) ) != 0) {
+      break;
+    }
+    else {
+	  if(first) {
+	    curr_spec = one_more_seq_found(curr_spec, &seq, &seqname, &comments);
+	    if(curr_spec == -1) goto nomem;
+	  }
+	  else	curr_spec++;
+    }
+    
+    
+    if(first && curr_spec == 0) {
+      /* calcul long partie nom: enlever tout ce qui n'est pas espace en fin */
+      p = line + strlen(line) - 2; 
+      while(*p == ' ' || isdigit(*p) ) p--; 
+      while (*p != ' ') p--;
+      wid_name = p - line + 1;
+    }   
+    
+    
+    if(first) {
+      seqname[curr_spec] = (char *)malloc(wid_name+1);
+      if(seqname[curr_spec]==NULL) {
+	goto nomem;
+      }
+      memcpy(seqname[curr_spec], line, wid_name);
+      p = seqname[curr_spec] + wid_name - 1;
+      while(*p==' ') p--; *(p+1)=0;
+      if(curr_spec > tot_spec) tot_spec = curr_spec;
+      seq[curr_spec] = (char *)malloc(CLU_BLOCK_LEN+1);
+      curr_max_len = CLU_BLOCK_LEN;
+      if(seq[curr_spec]==NULL) {
+	goto nomem;
+      }
+      comments[curr_spec] = NULL;
+    }
+    if(curr_spec == 0) {
+      l = strlen(line) - 1;
+      p = line + l - 1; 
+      while(*p == ' ' || isdigit(*p) ) { p--; l--; }
+      l -= wid_name;
+      if(curr_len + l > curr_max_len) {
+	curr_max_len += CLU_BLOCK_LEN;
+	for(i=0; i<=tot_spec; i++) {
+	  p = (char *)malloc(curr_max_len+1);
+	  if(p == NULL) goto nomem;
+	  memcpy(p, seq[i], curr_len);
+	  free(seq[i]);
+	  seq[i] = p;
+	}
+      }
+      next_len = curr_len + l;
+    }
+    memcpy(seq[curr_spec]+curr_len, line + wid_name, l);
+  }
+  for(i=0; i<=tot_spec; i++) seq[i][next_len] = 0;
+  seq = (char **)realloc(seq, (tot_spec + 1)*sizeof(char *));
+  seqname = (char **)realloc(seqname, (tot_spec + 1)*sizeof(char *));
+  comments = (char **)realloc(comments, (tot_spec + 1)*sizeof(char *));
+
+
+  INTEGER(nombreseq)[0]=tot_spec+1;
+
+  PROTECT(listname=allocVector(VECSXP,tot_spec+1));
+  PROTECT(listseq=allocVector(VECSXP,tot_spec+1));
+
+  for(i=0; i<tot_spec+1; i++) {
+    SET_ELEMENT(listname,i,mkChar(seqname[i]));
+    SET_ELEMENT(listseq,i,mkChar(seq[i]));
+  }
+
+  SET_ELEMENT(list,0,nombreseq);
+  SET_ELEMENT(list,1,listname);
+  SET_ELEMENT(list,2,listseq);
+  
+  
+ fini:
+  fclose(in);
+  UNPROTECT(4);
+  return list;
+ nomem:
+  error("Not enough memory!");
+  tot_spec = -1;
+  goto fini;
+}

Added: pkg/src/alignment.h
===================================================================
--- pkg/src/alignment.h	                        (rev 0)
+++ pkg/src/alignment.h	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,33 @@
+#include <Rinternals.h>
+#include <R.h>
+#include <Rdefines.h>
+#include <ctype.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#define FALSE 0
+#define TRUEL (!FALSE)
+#define MAXLENCOM 50000 /* long max des commentaires sous mase */
+#define MAX_SPECIES_SETS 50 /* nbre max de species sets */
+#define PHYNAME 10
+#define CLU_WID_NAME 16
+#define MSF_WID_NAME 15
+#define CLU_BLOCK_LEN 5000 /* block pour allocation mem format Clustal */
+#define MAX_GAP_SITES 1000
+#define MAXLENSEQ 10000
+#define MAXMNMASE 30
+#define MAXSTRING 10000
+
+struct SEQMASE
+{
+    char mn[MAXMNMASE];
+    char *com;
+    char *seq;
+    int lg;
+};

Added: pkg/src/fastacc.c
===================================================================
--- pkg/src/fastacc.c	                        (rev 0)
+++ pkg/src/fastacc.c	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,39 @@
+#include <R.h>
+#include <Rdefines.h>
+
+
+SEXP fastacc(SEXP bits_in_char, SEXP target, SEXP database, SEXP noc, SEXP n){
+  int i,j;
+  SEXP res;
+  int *pbits_in_char, *pnoc, *pn, *pres;
+  unsigned char *ptarget, *pdatabase;
+  int ires;
+
+  PROTECT(bits_in_char = AS_INTEGER(bits_in_char));
+  pbits_in_char = INTEGER_POINTER(bits_in_char);
+
+  PROTECT(target = AS_RAW(target));
+  ptarget = RAW_POINTER(target);
+
+  PROTECT(database = AS_RAW(database));
+  pdatabase = RAW_POINTER(database);
+
+  PROTECT(noc = AS_INTEGER(noc));
+  pnoc = INTEGER_POINTER(noc);
+
+  PROTECT(n = AS_INTEGER(n));
+  pn = INTEGER_POINTER(n);
+
+  PROTECT(res = NEW_INTEGER(*pn));
+  pres = INTEGER_POINTER(res);
+
+  for(ires = i = 0 ; i < *pn * *pnoc; i += *pnoc, ires++){
+    pres[ires] = 0;
+    for(j = 0; j < *pnoc ; j++){
+      pres[ires] += pbits_in_char[pdatabase[i+j] & ptarget[j]];
+    }
+  }
+
+  UNPROTECT(6);
+  return(res);
+}

Added: pkg/src/getzlibsock.c
===================================================================
--- pkg/src/getzlibsock.c	                        (rev 0)
+++ pkg/src/getzlibsock.c	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,241 @@
+#include <R.h>
+#include <Rdefines.h>
+#include <Rinternals.h>
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif
+#endif
+#ifndef WIN32
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+
+void *prepare_sock_gz_r(int ncon );
+char *z_read_sock(void *v);
+int close_sock_gz_r(void *v);
+static void *extract_opaque = NULL;
+
+#define R_EOF	-1	
+#define MAXESSAIS 1	/*#define MAXESSAIS 1000000*/
+
+SEXP getzlibsock(SEXP sock, SEXP nmax, SEXP debug)
+{
+
+/* Variable de types SEXP :entree et sorties*/
+
+  SEXP ans = R_NilValue;
+  SEXP ans2 = R_NilValue;
+
+/* Quelles variable faut il proteger : s'appliqe uniquement aux objets R, cad SEXP : ans et ans2*/  
+  
+  int nprotect = 0;
+  int debugon;
+  int testc;
+  int numsoc;
+
+
+  int i,j, n, nn, nnn, ok, warn, nread, c;
+  int itest,itestd;
+  
+  char *test;
+  char *res;
+  
+  int flagend =0;
+  int nbseq =0;
+  
+  
+  debugon = INTEGER_VALUE(debug);
+  n=INTEGER_VALUE(nmax);
+  if (debugon)
+  	Rprintf("Running getzlibsock... \n");
+		  
+  if(!inherits(sock, "connection")) {
+  	Rprintf("Error!\n\n'con' is not a connection");
+	ans2 = PROTECT(allocVector(STRSXP, 1));
+	nprotect ++;
+	SET_STRING_ELT(ans2, 0,mkChar("Socket is not defined."));
+	PROTECT(ans = ans2);
+	nprotect ++;
+    	UNPROTECT(nprotect);
+	nprotect=0;
+	return ans ;
+	}
+  if (debugon)
+	Rprintf("'con' is a connection...\n");
+  numsoc = asInteger(sock);
+/* Pour  UNIX ( pbil):
+  numsoc = asInteger(sock) + 1;	
+  con=getConnection(numsoc);
+  scon= (Rsockconn)con->private;
+  numsoc = scon->fd;*/
+  
+  if (debugon)
+   	Rprintf("Socket number is %d....\n",numsoc);
+  extract_opaque=prepare_sock_gz_r(numsoc);
+  if (extract_opaque == NULL) {
+	Rprintf("Erreur dans prepare_sock_gz_r\n");
+	ans2 = PROTECT(allocVector(STRSXP, 1));
+	nprotect ++;
+	SET_STRING_ELT(ans2, 0,mkChar("Socket is not defined."));
+	PROTECT(ans = ans2);
+	nprotect ++;
+    	UNPROTECT(nprotect);
+	nprotect=0;
+	return ans ;
+	} 
+	
+   if (debugon)
+   	Rprintf("Trying to get answer from socket...\n");
+ 
+   res=z_read_sock(extract_opaque);
+   
+   /*AJOUT PATCHE CRADO ( devrait etre inutile)*/
+   itest=0;
+   itestd=0;
+   while ( res == NULL){ 
+   	res=z_read_sock(extract_opaque);
+	itest++;
+	itestd++;
+	if (debugon){	
+		if (itestd>10) {
+			Rprintf("*");
+			itestd=0;
+			}
+		}
+			
+	if (itest> MAXESSAIS) {
+		Rprintf("Socket error!\n");
+		Rprintf("No answer from socket after %d trials!\n",itest);
+		ans2 = PROTECT(allocVector(STRSXP, 1));
+		nprotect++;
+		SET_STRING_ELT(ans2, 0,mkChar("No answer from socket."));
+		PROTECT(ans = ans2);
+		nprotect++;
+    		UNPROTECT(nprotect);
+		nprotect=0;
+		testc=close_sock_gz_r(extract_opaque);
+		if (debugon)
+			Rprintf("Closing socket close_sock_gz_r  status = %d\n",testc);
+		return ans ;
+		}
+	}
+
+  if (debugon)
+  	Rprintf("\n-->[%s]\n",res);
+  if (strncmp(res,"code=0",6) != 0) {
+  	Rprintf("extractseqs error!\n");
+	Rprintf("[%s]\n",res);
+	ans2 = PROTECT(allocVector(STRSXP, 1));
+	nprotect++;
+	SET_STRING_ELT(ans2, 0,mkChar("Wrong answer from socket."));
+	PROTECT(ans = ans2);
+	nprotect++;
+    	UNPROTECT(nprotect);
+	nprotect=0;
+	testc=close_sock_gz_r(extract_opaque);
+	if (debugon)
+		Rprintf("Closing socket close_sock_gz_r  status = %d\n",testc);
+	return ans ;
+	}
+  if (debugon)
+  	Rprintf("Socket answer is ok %s(%d)\n",res, strlen(res));		 
+  nn = (n < 0) ? 1000 : n; /* initially allocate space for 1000 lines */
+  nnn = (n < 0) ? INT_MAX : n;
+  PROTECT(ans = allocVector(STRSXP, nn));
+  nprotect++;
+  nread=0;
+  if (debugon)
+   	Rprintf("n=%d, nn=%d,nnn=%d\n",n,nn, nnn);
+  res=z_read_sock(extract_opaque);	
+  while ((res != NULL) ) {
+  
+  	if (nread >=nnn) {
+	  	if (debugon)
+			Rprintf("Increasing memory...\n");
+	    	PROTECT(ans2 = (allocVector(STRSXP, 2*nn)));
+		nprotect++;
+	    	for(i = 0; i < nn; i++)
+		SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
+	    	nn *= 2;
+	    	nnn=nn;
+	    	UNPROTECT(nprotect); /* old ans et ans2 */
+	    	PROTECT(ans = ans2);
+		nprotect=1;
+		};
+	if  (strncmp(res,"extractseqs END.",16) == 0){
+		if (debugon)
+			Rprintf("extractseqs successfully ended ...\n");
+		flagend=1;
+		break;
+		}
+	if  ((strncmp(res,"code=0",6) == 0) && (nread >0)) {
+		Rprintf("-->[%s]\n",res);
+		Rprintf("WARNING!\nextractseqs unsuccessfully ended ...\n");
+		flagend=1;
+		break;
+		}				
+	 if  (strncmp(res,"\033count=", 7) == 0){
+	 	nbseq++;
+	 	} else {
+		SET_STRING_ELT(ans, nread, mkChar(res));
+		nread++;
+		}
+		
+	res=z_read_sock(extract_opaque);
+    	}
+  if (debugon)
+    Rprintf("Number of lines     : %d\n",nread-1);
+  if (debugon) 
+    Rprintf("Number of sequences : %d\n",nbseq);
+  if (flagend) {
+  	if (debugon)
+  		Rprintf("extractseqs OK, program carry on...\n");
+	if (debugon)
+	 	Rprintf("Ok, everything is fine!\n");
+	 } 
+	 else{
+	Rprintf("extractseqs error!\n");
+	ans2 = PROTECT(allocVector(STRSXP, 1));
+	nprotect++;
+	SET_STRING_ELT(ans2, 0,mkChar("Wrong answer from socket."));
+	PROTECT(ans = ans2);
+	nprotect++;
+	}
+		
+  testc=close_sock_gz_r(extract_opaque);
+  if (debugon)
+	Rprintf("Closing socket close_sock_gz_r  status = %d\n",testc);
+  UNPROTECT(nprotect);
+  return ans ;
+}
+#else
+SEXP getzlibsock(SEXP sock, SEXP nmax, SEXP debug)
+{
+
+/* Variable de types SEXP :entree et sorties*/
+
+  SEXP ans = R_NilValue;
+  SEXP ans2 = R_NilValue;
+
+/* Quelles variable faut il proteger : s'appliqe uniquement aux objets R, cad SEXP : ans et ans2*/  
+  
+  int nprotect = 0;
+  int debugon;
+  
+  
+  debugon = INTEGER_VALUE(debug);
+  if (debugon)
+  	Rprintf("Running getzlibsock... \n");	
+  Rprintf("Warning!\n\nCompressed sockets are not yet available on Windows.\n");
+  ans2 = PROTECT(allocVector(STRSXP, 1));
+  nprotect ++;
+  SET_STRING_ELT(ans2, 0,mkChar("Compressed sockets are not yet available on Windows."));  
+  PROTECT(ans = ans2);
+  nprotect ++;
+  UNPROTECT(nprotect);
+  nprotect=0;
+  return ans ;
+	}
+#endif

Added: pkg/src/kaks.c
===================================================================
--- pkg/src/kaks.c	                        (rev 0)
+++ pkg/src/kaks.c	2014-12-02 16:34:12 UTC (rev 1831)
@@ -0,0 +1,1125 @@
+#include <R.h>
+#include <Rdefines.h>
+
+int code_mt = 0; /* Not implemented yet */
+
+void reresh(char **, int, int);
+void prefastlwl(double **, double **, double **, double **, double **, double **, double **, double **, double **, double **);
+int fastlwl(char **, int, int, double **, double **, double **, double **, double **, double **, double **, double **, 
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/seqinr -r 1831


More information about the Seqinr-commits mailing list