[Seqinr-commits] r1830 - pkg/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 2 17:31:43 CET 2014
Author: simonpenel
Date: 2014-12-02 17:31:43 +0100 (Tue, 02 Dec 2014)
New Revision: 1830
Removed:
pkg/src/Makevars
pkg/src/Makevars.win
pkg/src/Rconnections.h
pkg/src/adler32.c
pkg/src/alignment.c
pkg/src/alignment.h
pkg/src/compress.c
pkg/src/crc32.c
pkg/src/crc32.h
pkg/src/deflate.c
pkg/src/deflate.h
pkg/src/fastacc.c
pkg/src/getzlibsock.c
pkg/src/gzio.c
pkg/src/infback.c
pkg/src/inffast.c
pkg/src/inffast.h
pkg/src/inffixed.h
pkg/src/inflate.c
pkg/src/inflate.h
pkg/src/inftrees.c
pkg/src/inftrees.h
pkg/src/kaks.c
pkg/src/trees.c
pkg/src/trees.h
pkg/src/uncompr.c
pkg/src/util.c
pkg/src/zconf.h
pkg/src/zlib.h
pkg/src/zsockr.c
pkg/src/zutil.c
pkg/src/zutil.h
Log:
Deleted: pkg/src/Makevars
===================================================================
--- pkg/src/Makevars 2014-11-27 15:41:19 UTC (rev 1829)
+++ pkg/src/Makevars 2014-12-02 16:31:43 UTC (rev 1830)
@@ -1 +0,0 @@
-PKG_CFLAGS = -DUSE_TYPE_CHECKING_STRICT
Deleted: pkg/src/Makevars.win
===================================================================
--- pkg/src/Makevars.win 2014-11-27 15:41:19 UTC (rev 1829)
+++ pkg/src/Makevars.win 2014-12-02 16:31:43 UTC (rev 1830)
@@ -1,2 +0,0 @@
-PKG_CFLAGS = -DUSE_TYPE_CHECKING_STRICT
-PKG_LIBS = -lws2_32 -mwindows
Deleted: pkg/src/Rconnections.h
===================================================================
--- pkg/src/Rconnections.h 2014-11-27 15:41:19 UTC (rev 1829)
+++ pkg/src/Rconnections.h 2014-12-02 16:31:43 UTC (rev 1830)
@@ -1,176 +0,0 @@
-/*
- * R : A Computer Language for Statistical Data Analysis
- * Copyright (C) 2000-2006 The R Development Core Team.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef R_CONNECTIONS_H_
-#define R_CONNECTIONS_H_
-#include <R_ext/Boolean.h>
-
-/* until we make connections more public this allows the opaque
- pointer definition to be made available in Rinternals.h */
-#ifndef HAVE_RCONNECTION_TYPEDEF
-typedef struct Rconn *Rconnection;
-#endif
-struct Rconn {
- char* class;
- char* description;
- char mode[5];
- Rboolean text, isopen, incomplete, canread, canwrite, canseek, blocking,
- isGzcon;
- Rboolean (*open)(struct Rconn *);
- void (*close)(struct Rconn *); /* routine closing after auto open */
- void (*destroy)(struct Rconn *); /* when closing connection */
- int (*vfprintf)(struct Rconn *, const char *, va_list);
- int (*fgetc)(struct Rconn *);
- int (*fgetc_internal)(struct Rconn *);
-/* int (*ungetc)(int c, struct Rconn *); */
- double (*seek)(struct Rconn *, double, int, int);
- void (*truncate)(struct Rconn *);
- int (*fflush)(struct Rconn *);
- size_t (*read)(void *, size_t, size_t, struct Rconn *);
- size_t (*write)(const void *, size_t, size_t, struct Rconn *);
-/* void (*onerror)(struct Rconn *); */
- int nPushBack, posPushBack; /* number of lines, position on top line */
- char **PushBack;
- int save, save2;
- /* unsigned char encoding[256];*/
- char encname[101];
- /* will be iconv_t, which is a pointer. NULL if not in use */
- void *inconv, *outconv;
- /* The idea here is that no MBCS char will ever not fit */
- char iconvbuff[25], oconvbuff[50], *next, init_out[25];
- short navail, inavail;
- Rboolean EOF_signalled;
- void *private;
-};
-
-typedef struct fileconn {
- FILE *fp;
-#if defined(HAVE_OFF_T) && defined(HAVE_SEEKO)
- off_t rpos, wpos;
-#else
-#ifdef Win32
- off64_t rpos, wpos;
-#else
- long rpos, wpos;
-#endif
-#endif
- Rboolean last_was_write;
-#ifdef Win32
- Rboolean anon_file;
- char name[PATH_MAX+1];
-#endif
-} *Rfileconn;
-
-typedef struct fifoconn {
- int fd;
-} *Rfifoconn;
-
-typedef struct gzfileconn {
- void *fp;
- int cp;
-} *Rgzfileconn;
-
-typedef struct textconn {
- char *data; /* all the data */
- int cur, nchars; /* current pos and number of chars */
- char save; /* pushback */
-} *Rtextconn;
-
-typedef struct outtextconn {
- int len; /* number of lines */
- SEXP namesymbol;
- SEXP data;
- char *lastline;
- int lastlinelength; /* buffer size */
-} *Routtextconn;
-
-typedef enum {HTTPsh, FTPsh} UrlScheme;
-
-typedef struct urlconn {
- void *ctxt;
- UrlScheme type;
-} *Rurlconn;
-
-typedef struct sockconn {
- int port;
- int server;
- int fd;
- char *host;
- char inbuf[4096], *pstart, *pend;
-} *Rsockconn;
-
-typedef struct unzconn {
- void *uf;
-} *Runzconn;
-
-typedef struct bzfileconn {
- FILE *fp;
- void *bfp;
-} *Rbzfileconn;
-
-typedef struct clpconn {
- char *buff;
- int pos, len, last, sizeKB;
- Rboolean warned;
-} *Rclpconn;
-
-/* zlib wants to use ZLIB_H without leading underscore in 1.2.1 */
-#if defined(_ZLIB_H) || defined(ZLIB_H)
-typedef struct gzconn {
- Rconnection con;
- int cp; /* compression level */
- z_stream s;
- int z_err, z_eof;
- uLong crc;
- Byte *inbuf, *outbuf;
- int nsaved;
- char saved[2];
- Rboolean allow;
-} *Rgzconn;
-#endif
-
-#define init_con Rf_init_con
-#define con_close Rf_con_close
-#define con_pushback Rf_con_pushback
-
-int Rconn_fgetc(Rconnection con);
-int Rconn_ungetc(int c, Rconnection con);
-int Rconn_getline(Rconnection con, char *buf, int bufsize);
-int Rconn_printf(Rconnection con, const char *format, ...);
-Rconnection getConnection(int n);
-Rconnection getConnection_no_err(int n);
-Rboolean switch_stdout(int icon, int closeOnExit);
-void con_close(int i);
-void init_con(Rconnection new, char *description, char *mode);
-Rconnection R_newurl(char *description, char *mode);
-Rconnection R_newsock(char *host, int port, int server, char *mode);
-Rconnection in_R_newsock(char *host, int port, int server, char *mode);
-Rconnection R_newunz(char *description, char *mode);
-int dummy_fgetc(Rconnection con);
-int dummy_vfprintf(Rconnection con, const char *format, va_list ap);
-int getActiveSink(int n);
-void con_pushback(Rconnection con, Rboolean newLine, char *line);
-
-int Rsockselect(int nsock, int *insockfd, int *ready, int *write,
- double timeout);
-
-#define set_iconv Rf_set_iconv
-void set_iconv(Rconnection con);
-#endif
-
Deleted: pkg/src/adler32.c
===================================================================
--- pkg/src/adler32.c 2014-11-27 15:41:19 UTC (rev 1829)
+++ pkg/src/adler32.c 2014-12-02 16:31:43 UTC (rev 1830)
@@ -1,153 +0,0 @@
-/* adler32.c -- compute the Adler-32 checksum of a data stream
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id: adler32.c,v 1.1.2.1 2007-04-19 09:40:17 penel Exp $ */
-
-#define ZLIB_INTERNAL
-#include "zlib.h"
-
-#define BASE 65521UL /* largest prime smaller than 65536 */
-#define NMAX 5552
-/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-
-#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
-#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
-#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
-#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
-#define DO16(buf) DO8(buf,0); DO8(buf,8);
-
-/* use NO_DIVIDE if your processor does not do division in hardware */
-#ifdef NO_DIVIDE
-# define MOD(a) \
- do { \
- if (a >= (BASE << 16)) a -= (BASE << 16); \
- if (a >= (BASE << 15)) a -= (BASE << 15); \
- if (a >= (BASE << 14)) a -= (BASE << 14); \
- if (a >= (BASE << 13)) a -= (BASE << 13); \
- if (a >= (BASE << 12)) a -= (BASE << 12); \
- if (a >= (BASE << 11)) a -= (BASE << 11); \
- if (a >= (BASE << 10)) a -= (BASE << 10); \
- if (a >= (BASE << 9)) a -= (BASE << 9); \
- if (a >= (BASE << 8)) a -= (BASE << 8); \
- if (a >= (BASE << 7)) a -= (BASE << 7); \
- if (a >= (BASE << 6)) a -= (BASE << 6); \
- if (a >= (BASE << 5)) a -= (BASE << 5); \
- if (a >= (BASE << 4)) a -= (BASE << 4); \
- if (a >= (BASE << 3)) a -= (BASE << 3); \
- if (a >= (BASE << 2)) a -= (BASE << 2); \
- if (a >= (BASE << 1)) a -= (BASE << 1); \
- if (a >= BASE) a -= BASE; \
- } while (0)
-# define MOD4(a) \
- do { \
- if (a >= (BASE << 4)) a -= (BASE << 4); \
- if (a >= (BASE << 3)) a -= (BASE << 3); \
- if (a >= (BASE << 2)) a -= (BASE << 2); \
- if (a >= (BASE << 1)) a -= (BASE << 1); \
- if (a >= BASE) a -= BASE; \
- } while (0)
-#else
-# define MOD(a) a %= BASE
-# define MOD4(a) a %= BASE
-#endif
-
-/* ========================================================================= */
-uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len)
-/*
- uLong adler;
- const Bytef *buf;
- uInt len;
-*/
-{
- unsigned long sum2;
- unsigned n;
-
- /* split Adler-32 into component sums */
- sum2 = (adler >> 16) & 0xffff;
- adler &= 0xffff;
-
- /* in case user likes doing a byte at a time, keep it fast */
- if (len == 1) {
- adler += buf[0];
- if (adler >= BASE)
- adler -= BASE;
- sum2 += adler;
- if (sum2 >= BASE)
- sum2 -= BASE;
- return adler | (sum2 << 16);
- }
-
- /* initial Adler-32 value (deferred check for len == 1 speed) */
- if (buf == Z_NULL)
- return 1L;
-
- /* in case short lengths are provided, keep it somewhat fast */
- if (len < 16) {
- while (len--) {
- adler += *buf++;
- sum2 += adler;
- }
- if (adler >= BASE)
- adler -= BASE;
- MOD4(sum2); /* only added so many BASE's */
- return adler | (sum2 << 16);
- }
-
- /* do length NMAX blocks -- requires just one modulo operation */
- while (len >= NMAX) {
- len -= NMAX;
- n = NMAX / 16; /* NMAX is divisible by 16 */
- do {
- DO16(buf); /* 16 sums unrolled */
- buf += 16;
- } while (--n);
- MOD(adler);
- MOD(sum2);
- }
-
- /* do remaining bytes (less than NMAX, still just one modulo) */
- if (len) { /* avoid modulos if none remaining */
- while (len >= 16) {
- len -= 16;
- DO16(buf);
- buf += 16;
- }
- while (len--) {
- adler += *buf++;
- sum2 += adler;
- }
- MOD(adler);
- MOD(sum2);
- }
-
- /* return recombined sums */
- return adler | (sum2 << 16);
-}
-
-/* ========================================================================= */
-uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2)
-/*
- uLong adler1;
- uLong adler2;
- z_off_t len2;
-*/
-{
- unsigned long sum1;
- unsigned long sum2;
- unsigned rem;
-
- /* the derivation of this formula is left as an exercise for the reader */
- rem = (unsigned)(len2 % BASE);
- sum1 = adler1 & 0xffff;
- sum2 = rem * sum1;
- MOD(sum2);
- sum1 += (adler2 & 0xffff) + BASE - 1;
- sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
- if (sum1 > BASE) sum1 -= BASE;
- if (sum1 > BASE) sum1 -= BASE;
- if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
- if (sum2 > BASE) sum2 -= BASE;
- return sum1 | (sum2 << 16);
-}
Deleted: pkg/src/alignment.c
===================================================================
--- pkg/src/alignment.c 2014-11-27 15:41:19 UTC (rev 1829)
+++ pkg/src/alignment.c 2014-12-02 16:31:43 UTC (rev 1830)
@@ -1,906 +0,0 @@
-#include "alignment.h"
-
-
-void rem_blank(char *string)
-{
- int ii;
-
-
- ii = strlen(string);
-
- for( ;ii >=0; ii--) {
- if(string[ii] == 0 || string[ii] == '\n' ||
- string[ii] == ' ' || string[ii] == '\t') string[ii] = 0;
- else break;
- }
-
-
-}
-/**************************** end rem_blank ************************/
-
-void free_mase(struct SEQMASE * aln, int nbsq)
-
-{
- int ii;
-
-
- for(ii = 0; ii <= nbsq; ii++) {
- free(aln[ii].seq);
- free(aln[ii].com);
- }
-
- free((char *) aln);
-
-
-}
-
-/******************************** end free_mase ************************/
-
-
-int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments)
-{
- static int max_count;
- char **seq, **seqname, **comments;
-
- if(count1 == -1) max_count = 0;
-
- if(count1 + 1 < max_count) return count1 + 1;
-
- count1++;
- if(max_count == 0) {
- max_count = 100;
- seq = (char **)malloc(max_count * sizeof(char *));
- if(seq == NULL) return -1;
- seqname = (char **)malloc(max_count * sizeof(char *));
- if(seqname == NULL) return -1;
- comments = (char **)malloc(max_count * sizeof(char *));
- if(comments == NULL) return -1;
- }
- else {
- seq = *pseq; seqname = *pseqname; comments = *pcomments;
- max_count = 3 * max_count;
- seq = (char **)realloc(seq, max_count * sizeof(char *));
- if(seq == NULL) return -1;
- seqname = (char **)realloc(seqname, max_count * sizeof(char *));
- if(seqname == NULL) return -1;
- comments = (char **)realloc(comments, max_count * sizeof(char *));
- if(comments == NULL) return -1;
- }
-
- *pseq = seq; *pseqname = seqname; *pcomments = comments;
- return count1;
-}
-
-/******************************** end one_more_seq_found ************************/
-
-/***********************************************************************************************************************/
-/* lit un fichier MASE, renvoie une liste (objet R) contenant les séquences, les commentaies et les noms des espèces. */
-/***********************************************************************************************************************/
-
-
-SEXP read_mase(SEXP nomfic)
-{
- char *fic_name;
- FILE *fic;
- struct SEQMASE *aln;
- int nb_seq;
- int lg_max = 0, lg, lgs, lgc;
- char string[MAXSTRING + 1];
- char c1, c2;
- int i,ii, jj, kk = 0, numline, maxcom = 0;
-
- SEXP listseq;
- SEXP essai;
- SEXP listcom;
- SEXP listmn;
- SEXP nombreseq;
-
-
- /*Passages des objets R (paramètres) dans des variables C */
- fic_name = (char *) CHAR(STRING_ELT(nomfic, 0));
-
-
-
- if((fic = fopen(fic_name, "r")) == NULL) {
- error("Can't open file");
- }
-
- c1 = 0;
- nb_seq = 0;
- lg = lgc = 0;
- while(fgets(string, MAXSTRING, fic) != NULL) {
- string[MAXSTRING] = 0;
-
- lgs = strlen(string);
-
- if(lgs >= (MAXSTRING - 1)) {
- REprintf("\n Fatal Error. Too long line in alignment (> %d).\n", MAXSTRING);
- REprintf("Increase MAXSTRING and recompile.\n");
- }
-
- c2 = string[0];
-
- if(string[0] == ';' && string[1] != ';') {
- lgc += (lgs + 1);
- }
-
-
- if(c1 == ';' && c2 != c1) {
- nb_seq++;
- if(lg > lg_max) lg_max = lg;
- if(lgc > maxcom) maxcom = lgc;
- lg = lgc = 0;
- }
-
- else if(c2 != ';') lg += lgs;
- c1 = c2;
-
- }
- if(lg > lg_max) lg_max = lg;
-
-
- /******************************************/
- /* Création de 6 objets R qui seront */
- /******************************************/
-
- PROTECT(listseq=allocVector(VECSXP,nb_seq));
- PROTECT(essai=allocVector(VECSXP,5));
- PROTECT(listcom=allocVector(VECSXP,nb_seq));
- PROTECT(listmn=allocVector(VECSXP,nb_seq));
- PROTECT(nombreseq=NEW_INTEGER(1));
-
-
-
- aln = (struct SEQMASE *) calloc(nb_seq + 1, sizeof(struct SEQMASE));
-
-
- for(ii = 0; ii <= nb_seq; ii++) {
- aln[ii].seq = (char *) calloc(lg_max + 1, sizeof(char));
- aln[ii].com = (char *) calloc(maxcom + 1, sizeof(char));
- aln[ii].com[0] = 0;
- }
-
-
- rewind(fic);
-
- numline = 0;
- ii = -1;
- while(fgets(string, MAXSTRING, fic) != NULL) {
- numline++;
- string[MAXSTRING] = 0;
- if ((string[0] != ';') && (numline == 1))
- {
- error("Not a MASE file"); /* check format, thanks to J.H. Troesemeier */
- goto fini;
- }
-
- c2 = string[0];
-
- if(string[0] == ';' && string[1] != ';') {
- strcat(aln[ii + 1].com, string);
- }
-
- if(c1 == ';' && c2 != c1) {
- ii++;
- kk = aln[ii].lg = 0;
-
- rem_blank(string);
-
- if((int) strlen(string) >= (MAXMNMASE - 1)) {
- REprintf("Error. Maximum sequance name is %d characters\n", MAXMNMASE);
- error("sequence name too long!");
- }
-
- strcpy(aln[ii].mn, string);
-
- lg = 0;
- }
-
- else if(c2 != ';') {
- for(jj = 0; jj < MAXSTRING; jj++) {
- if(string[jj] == 0) break;
- if(string[jj] == ' ') continue;
- if(string[jj] == '\n') continue;
- if(string[jj] == '\t') continue;
- aln[ii].seq[kk++] = string[jj];
- aln[ii].lg = kk;
- }
- }
- c1 = c2;
-
- }
-
-
- fclose(fic);
-
- lg_max = aln[0].lg;
-
- for(ii = 1; ii < nb_seq; ii++)
- if(aln[ii].lg > lg_max) lg_max = aln[ii].lg;
-
- INTEGER(nombreseq)[0]=(int)nb_seq;
-
-
- for(i=0;i<nb_seq;i++){
- SET_ELEMENT(listseq,i,mkChar(aln[i].seq));
- }
-
- for(i=0;i<nb_seq;i++){
- SET_ELEMENT(listcom,i,mkChar(aln[i].com));
- }
-
-for(i=0;i<nb_seq;i++){
- SET_ELEMENT(listmn,i,mkChar(aln[i].mn));
- }
-
- SET_ELEMENT(essai,0,nombreseq);
- SET_ELEMENT(essai,1,listmn);
- SET_ELEMENT(essai,2,listseq);
- SET_ELEMENT(essai,3,listcom);
-
- fini:
- free_mase(aln,nb_seq);
- UNPROTECT(5);
-
- return(essai);
-
-}
-
-
-/********************** end read_mase ****************************/
-
-
-
-
-/*************************************************************************/
-/* Compute distance between two aligned sequences using different matrix */
-/*************************************************************************/
-
-
-SEXP distance(SEXP sequences,SEXP nbseq, SEXP matNumber, SEXP seqtype, SEXP gapoption){
-
- SEXP d;
- int MAXNSEQS;
- char **seq;
- int gap_option;
- int i, j, k, n,totseqs, seq_long, nbases;
- int mat_number, seq_type;
- int **ndiff;
- double **dist;
-
- int mat_pos[] = { 17, -1, 15, 0, 1, 12, 18, 4, 9, -1, 2, 10, 16, 5, -1, 19, 6, 3, 7, 8, -1, 11, 13, -1, 14, -1 };
-
- const char DNA[] = "ACGTXN-";
- const char Prot[] = "DEKRHNQSTILVFWYCMAGPX*-";
- int matp[20][20] = { {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1 },
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 2/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3} };
-
-
-
-
- MAXNSEQS = INTEGER_VALUE(nbseq);
- totseqs = INTEGER_VALUE(nbseq);
- mat_number= INTEGER_VALUE(matNumber);
- seq_type = INTEGER_VALUE(seqtype);
- gap_option = INTEGER_VALUE(gapoption);
-
- PROTECT(d=NEW_NUMERIC(totseqs*totseqs));
-
- seq = (char **)malloc(totseqs*sizeof(char *));
-
- for(i=0;i<totseqs;i++){
- seq[i] = (char *) CHAR(STRING_ELT(sequences,i));
- }
-
- ndiff = (int **)malloc(MAXNSEQS*sizeof(int *));
-
- for(j=0; j < MAXNSEQS; j++){
- ndiff[j] = (int *)malloc(MAXNSEQS*sizeof(int));
- }
-
- dist = (double **)malloc(MAXNSEQS*sizeof(double *));
-
- for(j=0; j < MAXNSEQS; j++){
- dist[j] = (double *)malloc(MAXNSEQS*sizeof(double));
- }
-
-
-
- /*********************************************************/
- /* Computing distance between sequences i and j */
- /*********************************************************/
-
- seq_long = (int)strlen(seq[0]);
-
- for (i = 0; i < totseqs; i++)
- {
- dist[i][i] = 0.0;
- }
-
- for (i = 0; i < totseqs; i++)
- {
- for (j = 0; j < i; j++)
- {
- ndiff[j][i] = ndiff[i][j] = 0;
- nbases = 0;
- for (k = 0; k < seq_long; k++)
- {
-
- if(seq_type == 1){
-
- /***************************/
- /* DNA/RNA sequences */
- /***************************/
- if ((strchr(DNA, seq[i][k]) == NULL) || (strchr(DNA, seq[j][k]) == NULL))
- seq[i][k] = seq[j][k] = 'X';
- if (seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X' &&
- seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
- {
- nbases++;
- if (seq[i][k] != seq[j][k])
- {
- ndiff[j][i]++;
- ndiff[i][j]++;
- }
- }
-
- if (gap_option == 1)
- {
- if (seq[i][k] == '-' && seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
- {
- nbases++;
- ndiff[j][i]++;
- ndiff[i][j]++;
- }
- if (seq[j][k] == '-' && seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X')
- {
- nbases++;
- ndiff[j][i]++;
- ndiff[i][j]++;
- }
-
- }
- }
-
- else if(mat_number == 1 && seq_type == 0){
-
- /********************************************/
- /*Protein sequences with similarity matrix */
- /********************************************/
-
- if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
- seq[i][k] = seq[j][k] = 'X';
- if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
- seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
- {
- nbases++;
- ndiff[i][j] = ndiff[i][j] + matp[mat_pos[seq[i][k] - 65]][mat_pos[seq[j][k] - 65]];
- ndiff[j][i] = ndiff[i][j];
-
- }
- }
-
- else if( mat_number == 2 && seq_type == 0){
-
- /********************************************/
- /* Protein sequences with identity matrix */
- /********************************************/
-
- if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
- seq[i][k] = seq[j][k] = 'X';
- if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
- seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
- {
- nbases++;
- if (seq[i][k] != seq[j][k])
- {
- ndiff[j][i]++;
- ndiff[i][j]++;
- }
- }
-
- }
-
- dist[i][j] = dist[j][i] = sqrt((double)ndiff[i][j]/nbases);
- }
- }
- }
-
-
- /********************************************************************************/
- /* Remplissage de l'objet R (matrice de taille nb_seq * nb_seq avec dist */
- /********************************************************************************/
-
- n=0;
-
- for(i=0;i<totseqs;i++){
- for(j=0;j<totseqs;j++){
- REAL(d)[n+j]=dist[i][j];
- }
- n=n+totseqs;
- }
-
- UNPROTECT(1);
-
- return(d);
-}
-
-/****************************************/
-/* Lecture d'un fichier au format msf */
-/***************************************/
-
-SEXP read_msf_align(SEXP ficname)
-{
-
- SEXP list;
- SEXP listseq;
- SEXP listname;
- SEXP nombreseq;
- char *fname;
- FILE *in;
- char line[100], *p, *q;
- int i,l, curr_spec, maxwidname=0, curr_len, tot_spec, wid_1_line, wid_block;
- char **seq, **seqname, **comments;
-
- fname = (char *) CHAR(STRING_ELT(ficname,0));
-
- PROTECT(nombreseq=NEW_INTEGER(1));
- PROTECT(list=allocVector(VECSXP,3));
-
- in=fopen(fname,"r");
- if(in==NULL) {
- error("File not found");
- }
-
- /* compter le nbre de seqs dans le fichier */
- tot_spec = 0;
- while(fgets(line, sizeof(line), in) != NULL) {
- if(strncmp(line, "//", 2) == 0) break;
- if(strncmp(line, " Name: ", 7) == 0) tot_spec++;
- }
- rewind(in);
-
- INTEGER(nombreseq)[0]=tot_spec;
-
- PROTECT(listname=allocVector(VECSXP,tot_spec));
- PROTECT(listseq=allocVector(VECSXP,tot_spec));
-
- seq = (char **)malloc(tot_spec * sizeof(char *));
- if(seq == NULL) goto nomem;
- comments = (char **)malloc(tot_spec * sizeof(char *));
- if(comments == NULL) goto nomem;
- seqname = (char **)malloc(tot_spec * sizeof(char *));
- if(seqname == NULL) goto nomem;
-
- p = NULL;
- while( fgets(line,sizeof(line),in) != NULL) {
- if( (p = strstr(line, "MSF: ")) != NULL) break;
- }
- if(p == NULL) {
- error("File not in MSF format!");
- tot_spec = -1; goto fini;
- }
- tot_spec = -1;
- do {
- fgets(line,sizeof(line),in);
- if( (p = strstr(line, "Name:") ) == NULL) continue;
- tot_spec++;
- q = strstr(p, " Len: ");
- sscanf(q + 5, "%d", &l);
- seq[tot_spec] = (char *)malloc(l + 1);
- if(seq[tot_spec]==NULL) goto nomem;
- p += 5; while(*p == ' ') p++;
- q = p; while(*q != ' ') q++;
- l = q - p;
- seqname[tot_spec] = (char *)malloc(l + 1);
- if(seqname[tot_spec]==NULL) goto nomem;
- memcpy(seqname[tot_spec], p, l); seqname[tot_spec][l] = 0;
- if(l > maxwidname) maxwidname = l;
- comments[tot_spec] = NULL;
- }
- while(strncmp(line, "//", 2) != 0);
- curr_spec = 0; curr_len = 0; wid_block = 0;
- while( fgets(line, sizeof(line), in) != NULL ) {
- p = line; while(*p == ' ') p++;
- l = strlen(seqname[curr_spec]);
- if(strncmp(p, seqname[curr_spec], l) != 0) continue;
- p += l; while(*p == ' ') p++; p--;
- q = seq[curr_spec] + curr_len;
- while( *(++p) != '\n') {
- if( *p == ' ') continue;
- if(*p == '.') *p = '-';
- *(q++) = *p;
- }
- *q = 0;
- wid_1_line = q - (seq[curr_spec] + curr_len);
- wid_block = (wid_1_line > wid_block ? wid_1_line : wid_block);
- if(curr_spec == tot_spec) {
- curr_len += wid_block;
- curr_spec = 0;
- wid_block = 0;
- }
- else curr_spec++;
- }
-
- for(i=0; i<tot_spec+1; i++) {
- SET_ELEMENT(listname,i,mkChar(seqname[i]));
- SET_ELEMENT(listseq,i,mkChar(seq[i]));
- }
-
- SET_ELEMENT(list,0,nombreseq);
- SET_ELEMENT(list,1,listname);
- SET_ELEMENT(list,2,listseq);
-
-
- fini:
- fclose(in);
- UNPROTECT(4);
- return list;
- nomem:
- error("Not enough memory!");
- tot_spec = -1;
- goto fini;
-}
-
-
-/******************************************/
-/* Lecture d'un fichier au format phylip */
-/******************************************/
-
-
-SEXP read_phylip_align(SEXP ficname)
-{
-
- SEXP list;
- SEXP listseq;
- SEXP listname;
- SEXP nombreseq;
- char *fname;
- FILE *in;
- char *p, *q, line[PHYNAME + 200];
- char **seq, **comments, **seqname;
- int totseqs, lenseqs, i, l;
-
- q=0;
- fname = (char *) CHAR(STRING_ELT(ficname,0));
-
- PROTECT(nombreseq=NEW_INTEGER(1));
- PROTECT(list=allocVector(VECSXP,3));
-
-
- in=fopen(fname,"r");
- if(in==NULL) {
- error("file not found");
- }
- fgets(line,sizeof(line),in);
- if( sscanf(line, "%d%d", &totseqs, &lenseqs) != 2) {
- error("Not a PHYLIP file");
- totseqs = 0;
- goto fini;
- }
-
- INTEGER(nombreseq)[0]=totseqs;
-
- PROTECT(listname=allocVector(VECSXP,totseqs));
- PROTECT(listseq=allocVector(VECSXP,totseqs));
-
- seq = (char **)malloc(totseqs * sizeof(char *));
- if(seq == NULL) goto nomem;
- seqname = (char **)malloc(totseqs * sizeof(char *));
- if(seqname == NULL) goto nomem;
- comments = (char **)malloc(totseqs * sizeof(char *));
- if(comments == NULL) goto nomem;
- for(i=0; i<totseqs; i++) {
- if( (seq[i] = (char *)malloc(lenseqs+1) ) == NULL ) goto nomem;
- if( (seqname[i] = (char *)malloc(PHYNAME+1) ) == NULL ) goto nomem;
- comments[i] = NULL;
- }
- for(i=0; i<totseqs; i++) {
- fgets(line,sizeof(line),in);
- memcpy(seqname[i],line,PHYNAME); seqname[i][PHYNAME] = 0;
- p = line+PHYNAME; q = seq[i];
- while(*p != '\n') {
- if(*p != ' ') *(q++) = *p;
- p++;
- }
- }
- l = q - seq[totseqs - 1];
- while( l < lenseqs) {
- fgets(line,sizeof(line),in);
- for(i=0; i<totseqs; i++) {
- fgets(line,sizeof(line),in);
- p = line; q = seq[i] + l;
- while(*p != '\n') {
- if(*p != ' ') *(q++) = *p;
- p++;
- }
- }
- l = q - seq[totseqs - 1];
- }
- for(i=0; i<totseqs; i++) seq[i][l] = 0;
-
-
-
- for(i=0; i<totseqs; i++) {
- SET_ELEMENT(listname,i,mkChar(seqname[i]));
- SET_ELEMENT(listseq,i,mkChar(seq[i]));
- }
-
- SET_ELEMENT(list,0,nombreseq);
- SET_ELEMENT(list,1,listname);
- SET_ELEMENT(list,2,listseq);
-
-
- fini:
- fclose(in);
- UNPROTECT(4);
- return list;
- nomem:
- error("Not enough memory!");
- totseqs = 0;
- goto fini;
-}
-
-
-
-/*************************************/
-/* Reading alignment in fasta format */
-/*************************************/
-
-SEXP read_fasta_align(SEXP ficname)
-{
- SEXP list;
- SEXP listseq;
- SEXP listname;
- SEXP nombreseq;
- char *fname;
- FILE *in;
- int totseqs, lseq, l2, l, lenseqs;
- char line[200], *p, *i;
- char **seq, **seqname, **comments;
-
- fname = (char *) CHAR(STRING_ELT(ficname, 0));
-
- PROTECT(nombreseq = NEW_INTEGER(1));
- PROTECT(list = allocVector(VECSXP, 3));
-
- /* Check that the file is available */
-
- if((in = fopen(fname, "r")) == NULL) error("File not found");
-
- /* How many sequences are in the file ? */
-
- totseqs = 0;
- while(fgets(line, sizeof(line), in) != NULL)
- if(*line == '>') totseqs++;
- rewind(in);
-
- /* R objects creation */
-
- INTEGER(nombreseq)[0] = totseqs;
- PROTECT(listname = allocVector(VECSXP, totseqs));
- PROTECT(listseq = allocVector(VECSXP, totseqs));
-
- /* Memory allocation */
-
- seq = (char **) R_alloc(totseqs, sizeof(char *));
- comments = (char **) R_alloc(totseqs, sizeof(char *));
- seqname = (char **) R_alloc(totseqs, sizeof(char *));
-
- lenseqs = MAXLENSEQ;
- totseqs = -1;
- i = fgets(line, sizeof(line), in);
- if(line[0] != '>')
- error("File not in Fasta format!\n");
-
- /* Main loop to read line by line the file */
-
- while( i != NULL ){
- totseqs++;
- comments[totseqs] = NULL;
- p = line + 1;
- while(*p != '\n')
- p++;
- l = p - line - 1;
-
- seqname[totseqs] = (char *) R_alloc(l + 1, sizeof(char));
-
- memcpy(seqname[totseqs], line + 1, l);
- seqname[totseqs][l] = '\0';
- SET_ELEMENT(listname, totseqs, mkChar(seqname[totseqs]));
-
- seq[totseqs] = (char *) R_alloc(lenseqs + 1, sizeof(char));
- lseq = 0;
-
- while( (i = fgets(line, sizeof(line), in)) != NULL && *i != '>' ) {
- l2 = strlen(line);
- if( line[l2 - 1] == '\n' ) l2--;
- while(l2 > 0 && line[l2 - 1] == ' ')
- l2--;
- if(lseq + l2 > lenseqs) {
- char *temp;
- lenseqs += MAXLENSEQ;
- temp = R_alloc(lenseqs + 1, sizeof(char));
- memcpy(temp, seq[totseqs], lseq);
- seq[totseqs] = temp;
- }
- memcpy(seq[totseqs] + lseq, line, l2);
- lseq += l2;
- }
- seq[totseqs][lseq] = '\0';
- SET_ELEMENT(listseq, totseqs, mkChar(seq[totseqs]));
- }
-
- SET_ELEMENT(list, 0, nombreseq);
- SET_ELEMENT(list, 1, listname);
- SET_ELEMENT(list, 2, listseq);
-
- fclose(in);
- UNPROTECT(4);
- return list;
-}
-
-
-
-/*******************************************/
-/* Lecture d'un fichier au format clustal */
-/*******************************************/
-
-
-
-SEXP read_clustal_align(SEXP ficname)
-{
-
- SEXP list;
- SEXP listseq;
- SEXP listname;
- SEXP nombreseq;
- char *fname;
- FILE *in;
- char line[200], *p;
- int i, l = 0, curr_spec, first=TRUEL, curr_len, next_len, tot_spec, curr_max_len =0, carac, wid_name = 0;
- char **seq, **comments, **seqname = NULL;
-
-
- fname = (char *) CHAR(STRING_ELT(ficname,0));
-
- PROTECT(nombreseq=NEW_INTEGER(1));
- PROTECT(list=allocVector(VECSXP,3));
-
- in=fopen(fname,"r");
-
- if(in==NULL) {
- error("file not found");
- return 0;
- }
-
- fgets(line,sizeof(line),in);
- if(strncmp(line,"CLUSTAL",7) != 0) { /* skip 1st line with CLUSTAL in it */
- error("File not in CLUSTAL format!");
- tot_spec = -1; goto fini;
- }
-
- /* skip next empty lines */
- do {
- carac = getc(in);
- if(carac == ' ') {
- fgets(line,sizeof(line),in);
- carac = getc(in);
- }
- }
- while(carac == '\n' || carac == '\r');
- ungetc(carac, in); /* back to start of 1st non-empty line */
- tot_spec = curr_spec = -1; curr_len = next_len = 0;
- while( fgets(line, sizeof(line), in) != NULL ) {
- if(*line == '\n' || *line == ' ') {
- curr_spec = -1;
- curr_len = next_len;
- first = FALSE;
- continue;
- }
-
- else if(tot_spec >= 0 && curr_spec == -1 &&
- strncmp(line, seqname[0], strlen(seqname[0]) ) != 0) {
- break;
- }
- else {
- if(first) {
- curr_spec = one_more_seq_found(curr_spec, &seq, &seqname, &comments);
- if(curr_spec == -1) goto nomem;
- }
- else curr_spec++;
- }
-
-
- if(first && curr_spec == 0) {
- /* calcul long partie nom: enlever tout ce qui n'est pas espace en fin */
- p = line + strlen(line) - 2;
- while(*p == ' ' || isdigit(*p) ) p--;
- while (*p != ' ') p--;
- wid_name = p - line + 1;
- }
-
-
- if(first) {
- seqname[curr_spec] = (char *)malloc(wid_name+1);
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/seqinr -r 1830
More information about the Seqinr-commits
mailing list