[adegenet-commits] r1048 - in pkg: . R man

Thu Nov 8 15:13:56 CET 2012

Author: jombart
Date: 2012-11-08 15:13:56 +0100 (Thu, 08 Nov 2012)
New Revision: 1048

Added:
   pkg/man/findMutations.Rd
Modified:
   pkg/DESCRIPTION
   pkg/R/gengraph.R
   pkg/R/sequences.R
Log:
adding a new generic function to identify mutations between pairs of sequences

Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2012-11-08 11:57:17 UTC (rev 1047)
+++ pkg/DESCRIPTION	2012-11-08 14:13:56 UTC (rev 1048)
@@ -1,5 +1,5 @@
 Package: adegenet
-Version: 1.3-5
+Version: 1.3-6
 Date: 2012/10/31
 Title: adegenet: an R package for the exploratory analysis of genetic and genomic data.
 Author:  Thibaut Jombart <t.jombart at imperial.ac.uk>

Modified: pkg/R/gengraph.R
===================================================================
--- pkg/R/gengraph.R	2012-11-08 11:57:17 UTC (rev 1047)
+++ pkg/R/gengraph.R	2012-11-08 14:13:56 UTC (rev 1048)
@@ -10,7 +10,8 @@
 #############
 ## DEFAULT ##
 #############
-gengraph.default <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, ...){
+gengraph.default <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky,
+                             truenames=TRUE, ...){
     stop(paste("No method for objects of class",class(x)))
 } # end gengraph.default
 
@@ -21,7 +22,8 @@
 ############
 ## MATRIX ##
 ############
-gengraph.matrix <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, ...){
+gengraph.matrix <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky,
+                            truenames=TRUE, ...){
     ## CHECKS ##
     if(!require("igraph")) stop("igraph is required")
 
@@ -50,7 +52,11 @@
             if(plot){
                 abline(v=ans,col="red",lty=2, lwd=2)
             }
-            res <- gengraph.matrix(x, cutoff=ans)
+            res <- gengraph.matrix(x, cutoff=ans, truenames=truenames)
+            if(truenames){
+                V(res$graph)$label <- rownames(x)
+            }
+
             cat(paste("\nNumber of clusters found:  ", res$clust$no, sep=""))
             if(plot && show.graph) plot(res$graph)
             ans <- ""
@@ -74,6 +80,9 @@
         col <- col.pal(clust$no)[1:clust$no]
         names(col) <- 1:clust$no
         res <- list(graph=g, clust=clusters(g), cutoff=cutoff, col=col)
+        if(truenames){
+            V(res$graph)$label <- rownames(x)
+        }
 
     } else { ## IF CUT-OFF POINT NEEDS TO BE FOUND ##
         if(ngrp>=nrow(x)) stop("ngrp is greater than or equal to the number of individuals")
@@ -93,7 +102,6 @@
 
         ## FIND THE LOWEST CUTOFF GIVING NGRP ##
         res <- gengraph.matrix(x,cutoff=cutoff)
-
         while(res$clust$no>ngrp){
             cutoff <- cutoff+1
             res <- gengraph.matrix(x,cutoff=cutoff)
@@ -104,6 +112,12 @@
 
 
     ## RETURN ##
+    if(truenames){
+        V(res$graph)$label <- rownames(x)
+    } else {
+        V(res$graph)$label <- 1:nrow(x)
+    }
+
     return(res)
 
 } # end gengraph.matrix
@@ -122,7 +136,8 @@
     if(!require("igraph")) stop("igraph is required")
 
     ## USE MATRIX METHOD ##
-    res <- gengraph(as.matrix(x), cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal, ...)
+    res <- gengraph(as.matrix(x), cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal,
+                    truenames=truenames, ...)
     return(res)
 } # end gengraph.dist
 
@@ -135,7 +150,8 @@
 ############
 ## GENIND ##
 ############
-gengraph.genind <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, ...){
+gengraph.genind <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky,
+                            truenames=TRUE, ...){
     ## CHECKS ##
     if(!require("igraph")) stop("igraph is required")
 
@@ -144,7 +160,12 @@
     D <- (1-propShared(x))*nLoc(x)*ploidy(x)
 
     ## USE MATRIX METHOD ##
-    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal, ...)
+    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal,
+                    truenames=truenames, ...)
+    if(truenames){
+        V(res$graph)$label <- indNames(x)
+    }
+
     return(res)
 } # end gengraph.genind
 
@@ -158,7 +179,8 @@
 ############
 ## GENPOP ##
 ############
-gengraph.genpop <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, method=1, ...){
+gengraph.genpop <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, method=1,
+                            truenames=TRUE, ...){
     ## CHECKS ##
     if(!require("igraph")) stop("igraph is required")
 
@@ -171,7 +193,12 @@
     }
 
     ## USE MATRIX METHOD ##
-    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal, ...)
+    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal,
+                    truenames=truenames, ...)
+    if(truenames){
+        V(res$graph)$label <- x at pop.names
+    }
+
     return(res)
 } # end gengraph.genpop
 
@@ -184,7 +211,8 @@
 ############
 ## DNABIN ##
 ############
-gengraph.DNAbin <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky, ...){
+gengraph.DNAbin <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TRUE, show.graph=TRUE, col.pal=funky,
+                            truenames=TRUE, ...){
     ## CHECKS ##
     if(!require("igraph")) stop("igraph is required")
     if(!require("ape")) stop("ape is required")
@@ -193,7 +221,8 @@
     D <- as.matrix(round(dist.dna(x,model="raw", pairwise.deletion = TRUE)*ncol(x)))
 
     ## USE MATRIX METHOD ##
-    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal, ...)
+    res <- gengraph(D, cutoff=cutoff, ngrp=ngrp, computeAll=computeAll, plot=plot, show.graph=show.graph, col.pal=col.pal,
+                    truenames=truenames, ...)
     return(res)
 } # end gengraph.DNAbin
 

Modified: pkg/R/sequences.R
===================================================================
--- pkg/R/sequences.R	2012-11-08 11:57:17 UTC (rev 1047)
+++ pkg/R/sequences.R	2012-11-08 14:13:56 UTC (rev 1048)
@@ -76,9 +76,9 @@
 
 
 
-################
-# alignment2genind
-################
+####################
+## alignment2genind
+####################
 alignment2genind <- function(x, pop=NULL, exp.char=c("a","t","g","c"), na.char="-", polyThres=1/100){
 
     ## misc checks
@@ -155,13 +155,51 @@
 
 
 
+#################
+## findMutations
+#################
 
+## generic
+findMutations <- function(...){
+    UseMethod("findMutations")
+}
 
+## method for DNAbin
+findMutations.DNAbin <- function(x, pairs=NULL, ...){
+    ## CHECKS ##
+    if(!require(ape)) stop("the ape package is needed")
+    if(!inherits(x,"DNAbin")) stop("x is not a DNAbin object")
+    x <- as.matrix(x)
 
+    ## function to pull out mutations from sequence a to b ##
+    f1 <- function(a,b){
+        seqa <- as.character(x[a,])
+        seqb <- as.character(x[b,])
+        temp <- which(seqa != seqb)
+        ori <- seqa[temp]
+        mut <- seqb[temp]
+        names(ori) <- names(mut) <- temp
+        toRemove <- !ori %in% c('a','t','g','c') | !mut %in% c('a','t','g','c')
+        ori <- ori[!toRemove]
+        mut <- mut[!toRemove]
+        res <- data.frame(ori,mut)
+        names(res) <- rownames(x)[c(a,b)]
+        res$short <- paste(row.names(res),":",res[,1],"->",res[,2],sep="")
+        return(res)
+    }
 
+    ## get list of pairs to compare ##
+    if(is.null(pairs)){
+        pairs <- expand.grid(1:nrow(x),1:nrow(x))
+        pairs <- pairs[pairs[,1]!=pairs[,2],,drop=FALSE]
+    }
 
+    out <- lapply(1:nrow(pairs), function(i) f1(pairs[i,1], pairs[i,2]))
+    names(out) <- paste(rownames(x)[pairs[,1]], rownames(x)[pairs[,2]],sep="->")
 
+    return(out)
 
+} # end findMutations
 
 
 
@@ -176,6 +214,12 @@
 
 
 
+
+
+
+
+
+
 ## ###############
 ## ## transiProb
 ## ###############

Added: pkg/man/findMutations.Rd
===================================================================
--- pkg/man/findMutations.Rd	                        (rev 0)
+++ pkg/man/findMutations.Rd	2012-11-08 14:13:56 UTC (rev 1048)
@@ -0,0 +1,49 @@
+\encoding{UTF-8}
+\name{findMutations}
+\alias{findMutations}
+\alias{findMutations.DNAbin}
+\title{Identify mutations between DNA sequences}
+\description{
+  This function compares pairs of aligned DNA sequences and identify
+  mutations (position and nature).\cr
+
+  The function \code{findMutations} is a generic, but the only method
+  implemented in adegenet so far is for \code{\link[ape]{DNAbin}}
+  objects.
+}
+\usage{
+findMutations(x, pairs=NULL, \dots)
+
+\method{findMutations}{DNAbin}(x, pairs=NULL, \dots)
+
+}
+\arguments{
+  \item{x}{a \code{DNAbin} object containing aligned sequences, as a matrix.}
+  \item{pairs}{a matrix with two columns specifying which
+    sequences to compare. Mutations will be identified from the first
+    column to the second. Any way of refering to the rows of \code{x} is
+    acceptable. If \code{NULL}, all pairs are considered.}
+}
+\value{
+  A named list indicating the mutations from one sequence to another.
+  For each comparison, a three-column matrix is provided, corresponding
+  to the nucleotides in first and second sequence, and a summary of the
+  mutation provided as:
+  [position]:[nucleotide in first sequence]->[nucleotide in second sequence].
+}
+\seealso{
+  The \code{\link{fasta2DNAbin}} to read fasta alignments with minimum
+  RAM use.
+}
+\author{
+  Thibaut Jombart \email{t.jombart at imperial.ac.uk}.
+ }
+\examples{
+\dontrun{
+if(require(ape)){
+data(woodmouse)
+findMutations(woodmouse[1:5,])
+findMutations(woodmouse, pairs=cbind(c(1,1),c(2,3)))
+}
+}
+}