[adegenet-commits] r104 - in pkg: . R man src

Wed Apr 30 18:03:29 CEST 2008

Author: jombart
Date: 2008-04-30 18:03:29 +0200 (Wed, 30 Apr 2008)
New Revision: 104

Added:
   pkg/R/propShared.R
   pkg/man/propShared.Rd
   pkg/src/sharedAll.c
Modified:
   pkg/DESCRIPTION
Log:
Added the function propShared, its C code and its documentation.
Works, check passes.


Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2008-04-19 13:27:29 UTC (rev 103)
+++ pkg/DESCRIPTION	2008-04-30 16:03:29 UTC (rev 104)
@@ -9,4 +9,4 @@
 Description: Classes and functions for genetic data analysis within the multivariate framework.
 License: GPL (>=2)
 LazyLoad: yes
-Collate: classes.R auxil.R makefreq.R chooseCN.R dist.genpop.R export.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R
\ No newline at end of file
+Collate: classes.R auxil.R makefreq.R chooseCN.R dist.genpop.R export.R gstat.randtest.R HWE.R import.R monmonier.R coords.monmonier.R spca.R spca.rtests.R zzz.R hybridize.R fstat.R propShared.R
\ No newline at end of file

Added: pkg/R/propShared.R
===================================================================
--- pkg/R/propShared.R	                        (rev 0)
+++ pkg/R/propShared.R	2008-04-30 16:03:29 UTC (rev 104)
@@ -0,0 +1,95 @@
+## propShared computes the proportion of shared alleles
+## in a genind object
+
+
+######################
+# Function propShared
+######################
+propShared <- function(obj){
+    x <- obj
+    ## check that this is a valid genind
+    if(!inherits(x,"genind")) stop("obj must be a genind object.")
+    invisible(validObject(x))
+    
+    ## build a matrix of genotypes (in rows) coded by integers
+    ## NAs are coded by 0
+    ## The matrix is a cbind of two matrices, storing respectively the
+    ## first and the second allele.
+    temp <- genind2df(x)
+    alleleSize <- max(apply(temp,1:2,nchar))/2
+    mat1 <- apply(temp, 1:2, substr, 1, alleleSize)
+    mat2 <- apply(temp, 1:2, substr, alleleSize+1, alleleSize*2)
+    matAll <- cbind(mat1,mat2)
+    matAll <- apply(matAll,1:2,as.integer)
+    matAll[is.na(matAll)] <- 0
+
+    n <- nrow(matAll)
+    resVec <- double(n*(n-1)/2)
+    res <- .C("sharedAll", as.integer(as.matrix(matAll)),
+              n, ncol(matAll), resVec, PACKAGE="adegenet")[[4]]
+
+    attr(res,"Size") <- n
+    attr(res,"Diag") <- FALSE
+    attr(res,"Upper") <- FALSE
+    class(res) <- "dist"
+    res <- as.matrix(res)
+
+    diag(res) <- 1
+    rownames(res) <- x at ind.names
+    colnames(res) <- x at ind.names
+    return(res)
+}
+
+
+
+
+## ######################
+## # Function propShared (old, pure-R version)
+## ######################
+## propShared <- function(obj){
+
+##     x <- obj
+##     ## check that this is a valid genind
+##     if(!inherits(x,"genind")) stop("obj must be a genind object.")
+##     invisible(validObject(x))
+    
+##     ## replace NAs
+##     x <- na.replace(x, method="0")
+    
+##     ## some useful variables
+##     nloc <- length(x at loc.names)
+    
+##     ## fnorm: auxiliary function for scaling
+##     fnorm <- function(vec){
+## 	norm <- sqrt(sum(vec*vec))
+## 	if(length(norm) > 0 && norm > 0) {return(vec/norm)}
+## 	return(vec)	
+##     }
+    
+##     ## auxiliary function f1
+##     ## computes the proportion of shared alleles in one locus
+##     f1 <- function(X){
+## 	X <- t(apply(X, 1, fnorm))
+## 	res <- X %*% t(X)
+## 	res[res>0.51 & res<0.9] <- 0.5 # remap case one heteroZ shares the allele of on homoZ
+## 	return(res)
+##     }
+    
+##     ## separate data per locus
+##     temp <- seploc(x)
+##     listProp <- lapply(temp, function(e) f1(e at tab))
+    
+##     ## produce the final result
+##     res <- listProp[[1]]
+##     if(nloc>2){
+## 	for(i in 2:nloc){
+##             res <- res + listProp[[i]]
+## 	}
+##     }
+    
+##     res <- res/nloc
+##     rownames(res) <- x at ind.names
+##     colnames(res) <- x at ind.names
+    
+##     return(res)
+## }

Added: pkg/man/propShared.Rd
===================================================================
--- pkg/man/propShared.Rd	                        (rev 0)
+++ pkg/man/propShared.Rd	2008-04-30 16:03:29 UTC (rev 104)
@@ -0,0 +1,47 @@
+\encoding{UTF-8}
+\name{propShared}
+\alias{propShared}
+\title{Compute proportion of shared alleles}
+\description{The function \code{propShared} computes the proportion of
+  shared alleles in a set of genotypes (i.e. from a \linkS4class{genind}
+  object).
+}
+\usage{
+propShared(obj)
+}
+\arguments{
+  \item{obj}{a \linkS4class{genind} object.}
+ }
+ \details{
+   Computations of the proportion of shared alleles are computed in C.
+   Proportions are computed from all available data, i.e. proportion can
+   be computed as far as there is at least one typed locus in common
+   between two genotypes.
+}
+\value{Returns a matrix of proportions}
+\seealso{\code{\link{dist.genpop}}
+}
+\author{ Thibaut Jombart \email{jombart at biomserv.univ-lyon1.fr} }
+\examples{
+## make a small object
+data(microbov)
+obj <- microbov[1:5,microbov at loc.fac \%in\% c("L01","L02")]
+
+## verify results
+propShared(obj)
+genind2df(obj,sep="|")
+
+## Use this similarity measure inside a PCoA
+## This is for illustration only:
+## the distance should be rendered Euclidean before
+## (e.g. using quasieuclid from package ade4).
+if(require(ade4)){
+matSimil <- propShared(microbov)
+matDist <- exp(-matSimil)
+D <- as.dist(matDist)
+pcoa1 <- dudi.pco(D,scannf=FALSE,nf=3)
+s.class(pcoa1$li,microbov$pop,lab=microbov$pop.names)
+}
+}
+\keyword{manip}
+\keyword{multivariate}

Added: pkg/src/sharedAll.c
===================================================================
--- pkg/src/sharedAll.c	                        (rev 0)
+++ pkg/src/sharedAll.c	2008-04-30 16:03:29 UTC (rev 104)
@@ -0,0 +1,111 @@
+/* 
+*** THIS CODE IS PART OF THE *adegenet* PACKAGE FOR R. ***
+
+Code used to compute the proportion of alleles shared among a set of individuals.
+The arguments are: 
+- matAll a matrix containing alleles coded by integers, with genotypes in rows and loci in columns. Results from the binding by columns of A1 and A2, where A1 stores the one allele and A2 the other allele.
+- nRow: the number of rows of matAll, i.e. number of genotypes
+- nCol: the number of columns of matAll, i.e. twice the number of loci
+- resVec: a vector of length (n(n-1)/2) storing the proportion of shared alleles for each couple of genotypes.
+
+Thibaut Jombart (jombart at biomserv.univ-lyon1.fr), 2008.
+*/ 
+
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include "adesub.h"
+
+
+void sharedAll(int *matAll, int *nRow, int *nCol, double *resVec)
+{
+/* Declare local C variables */
+	int i, i1, i2, j, k, n, p, nbAll, **mat, temp;
+	n = *nRow;
+	p = *nCol;
+
+	int nLoc=p/2;
+
+/* Memory allocation for local C variables */
+
+	tabintalloc(&mat, n, p); /* function from ade4 */
+
+/* Local reconstruction of the matrix of alleles
+   ! beware: this matrix is to be used from 1 to n and 1 to p included,
+   and not from zero to n and p excluded as it is common in C */
+	k = 0;
+	for (j=1; j<=p; j++) {
+		for (i=1; i<=n; i++) {
+			mat[i][j] = matAll[k];
+			k = k + 1;
+		}
+	}
+
+/* == Main Computations: ==
+   - i1, i2: indices of genotypes
+   - j: index of allele
+   - n: number of genotypes
+   - p number of columns in mat (i.e. twice the number of loci)
+   - each term in mat codes an allele (NAs are coded by 0)
+*/
+
+	k=0; /* counter used to browse resVec */
+	for(i1=1; i1<=(n-1); i1++){
+		for(i2=(i1+1); i2<=n; i2++){
+			/* Used for debugging
+			printf("\n\n debug: ## %d-%d ##",i1,i2);
+			*/
+
+			resVec[k] = 0.0; /* Initialization of the result */
+			nbAll = 0; /* counts the number of types alleles */
+			for(j=1; j<=nLoc; j++){
+				/* Used for debugging
+				   printf("\n debug: j=%d",j);
+				   printf("\n debug: mat[i1,j]=%d",mat[i1][j]);
+				   printf("\n debug: mat[i1,j]=%d",mat[i1][j+nLoc]);
+				   printf("\n debug: mat[i2,j]=%d",mat[i2][j]);
+				   printf("\n debug: mat[i2,j+nLoc]=%d",mat[i2][j+nLoc]);
+				*/
+				if(mat[i1][j] != 0 && mat[i1][j+nLoc] != 0 && 
+				   mat[i2][j] != 0 && mat[i2][j+nLoc] != 0){
+					/* Used for debugging 
+					   printf("\n debug: alleles are typed");
+					*/
+					nbAll+=2;
+					/* Used for debugging
+					   printf("\n debug: nbAll=%d", nbAll);
+					*/
+					/* Compare alleles: 
+					   -> either both alleles are in common, 
+					   -> or no allele are common, 
+					   -> or there is one common allele */
+					/* both alleles common */
+					if((mat[i1][j] == mat[i2][j] 
+					    && mat[i1][j+nLoc] == mat[i2][j+nLoc])
+					   || (mat[i1][j] == mat[i2][j+nLoc]
+					       && mat[i1][j+nLoc] == mat[i2][j])){
+						resVec[k] += 2.0;
+					} else if(!( /* if not 'all alleles differe' */
+							  mat[i1][j] != mat[i2][j] 
+							  && mat[i1][j] != mat[i2][j+nLoc]
+							  && mat[i1][j+nLoc] != mat[i2][j] 
+							  && mat[i1][j+nLoc] != mat[i2][j+nLoc])
+						) resVec[k]++;
+
+				} /* end if */
+			} /* end for j in 1:(nLoc) */
+
+			/* Divide the number of shared alleles by the number of typed alleles */
+			if(nbAll > 0) resVec[k] = resVec[k]/nbAll;
+			/*printf("\n debug: resVec[i1,i2]/nbAll (%d,%d)=# %f #", i1,i2,resVec[k]);*/
+			k++;
+
+		} /* end for i2 */
+	} /* end for i1*/
+
+	/* Free allocated memory */
+	freeinttab(mat);
+
+} /* end sharedAll */