[adegenet-commits] r704 - in www: . files files/patches

Wed Nov 3 15:22:42 CET 2010

Author: jombart
Date: 2010-11-03 15:22:42 +0100 (Wed, 03 Nov 2010)
New Revision: 704

Added:
   www/files/adegenet_1.2-7.tgz
   www/files/adegenet_1.2-7.zip
   www/files/patches/export.R
Removed:
   www/files/patches/chooseCN.R
   www/files/patches/loadingplot.R
Modified:
   www/download.html
Log:
Fix for genind2hierfstat


Modified: www/download.html
===================================================================

--- www/download.html	2010-11-03 12:24:37 UTC (rev 703)
+++ www/download.html	2010-11-03 14:22:42 UTC (rev 704)
@@ -21,8 +21,8 @@
 (adegenet_1.2-7)
 is available as:<br>
 - <a href="files/adegenet_1.2-7.tar.gz">linux/unix sources</a><br>
-- MacOS X binary<br>
-- Windows binary<br>
+- <a href="files/adegenet_1.2-7.tgz">MacOS X binary</a><br>
+- <a href="files/adegenet_1.2-7.zip">Windows binary</a><br>
 <br>
 <img alt="" src="images/bullet.png" style="width: 10px; height: 10px;">
 The
@@ -47,6 +47,10 @@
 use
 a
 patch.<br>
+<a href="files/patches/export.R"><span style="font-family: monospace;">export.R</span></a>:
+fixes issues in data conversion from adegenet to hierfstat
+(genind2hierfstat); also fixes <span style="font-family: monospace;">fstat</span>
+function.<br>
 <br>
 <img alt="" src="images/bullet.png" style="width: 10px; height: 10px;">
 <span style="font-weight: bold;">Older

Added: www/files/adegenet_1.2-7.tgz
===================================================================
(Binary files differ)


Property changes on: www/files/adegenet_1.2-7.tgz
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: www/files/adegenet_1.2-7.zip
===================================================================
(Binary files differ)


Property changes on: www/files/adegenet_1.2-7.zip
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Deleted: www/files/patches/chooseCN.R
===================================================================
--- www/files/patches/chooseCN.R	2010-11-03 12:24:37 UTC (rev 703)
+++ www/files/patches/chooseCN.R	2010-11-03 14:22:42 UTC (rev 704)
@@ -1,218 +0,0 @@
-#####################
-# Function chooseCN
-#####################
-chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL, k=NULL,
-                     a=NULL, dmin=NULL, plot.nb=TRUE, edit.nb=FALSE){
-
-  if(is.data.frame(xy)) xy <- as.matrix(xy)
-  if(ncol(xy) != 2) stop("xy does not have two columns.")
-  if(any(is.na(xy))) stop("NA entries in xy.")
-  result.type <- tolower(result.type)
-   if(is.null(type) & !ask) stop("Non-interactive mode but no graph chosen; please provide a value for 'type' argument.")
-
-  if(!require(spdep, quiet=TRUE)) stop("spdep library is required.")
-
-  res <- list()
-
-  if(!is.null(d2)){
-      if(d2=="dmin"){
-          tempmat <- as.matrix(dist(xy))
-          d2min <- max(apply(tempmat, 1, function(r) min(r[r>1e-12])))
-          d2min <- d2min * 1.0001 # to avoid exact number problem
-          d2 <- d2min
-      } else if(d2=="dmax"){
-          d2max <- max(dist(xy))
-          d2max <- d2max * 1.0001 # to avoid exact number problem
-          d2 <- d2max
-      }
-  } # end handle d2
-
-  d1.first <- d1
-  d2.first <- d2
-  k.first <- k
-
-  ## handle type argument
-  if(!is.null(type)){
-      type <- as.integer(type)
-      if(type < 1 |type > 7) stop("type must be between 1 and 7")
-      ask <- FALSE
-  }
-
-  ## check for uniqueness of coordinates
-  if(any(xyTable(xy)$number>1)){ # if duplicate coords
-      DUPLICATE.XY <- TRUE
-  } else {
-      DUPLICATE.XY <- FALSE
-  }
-
-
-  ## if(is.null(type) & !ask) { type <- 1 }
-
-  ### begin large while ###
-  chooseAgain <- TRUE
-  while(chooseAgain){
-    # re-initialisation of some variables
-    d1 <- d1.first
-    d2 <- d2.first
-    k <- k.first
-
-  ## read type from console
-    if(ask){
-      temp <- TRUE
-      while(temp){
-        cat("\nChoose a connection network:\n")
-        cat("\t Delaunay triangulation (type 1)\n")
-        cat("\t Gabriel graph (type 2)\n")
-        cat("\t Relative neighbours (type 3)\n")
-        cat("\t Minimum spanning tree (type 4)\n")
-        cat("\t Neighbourhood by distance (type 5)\n")
-        cat("\t K nearest neighbours (type 6)\n")
-        cat("\t Inverse distances (type 7)\n")
-        cat("Answer: ")
-
-        type <- as.integer(readLines(n = 1))
-        temp <- type < 1 |type > 7
-        if(temp) cat("\nWrong answer\n")
-
-        if(type %in% 1:4 & DUPLICATE.XY){
-            cat("\n\n== PROBLEM DETECTED ==")
-            cat("\nDuplicate locations detected\nPlease choose another graph (5-7) or add random noise to locations (see ?jitter).\n")
-            temp <- TRUE
-        }
-
-      } # end while
-    }
-    ##
-
-    ## warning about duplicate xy coords
-    if(type %in% 1:4 & DUPLICATE.XY){
-        stop("Duplicate locations detected and incompatible with graph type 1-4.\nPlease choose another graph (5-7) or add random noise to locations (see ?jitter).")
-    }
-
-    ## graph types
-    ## type 1: Delaunay
-    if(type==1){
-      if(!require(tripack, quiet=TRUE)) stop("tripack library is required.")
-      cn <- tri2nb(xy)
-    }
-
-    # type 2: Gabriel
-    if(type==2){
-      cn <- gabrielneigh(xy)
-      cn <- graph2nb(cn, sym=TRUE)
-    }
-
-    ## type 3: Relative neighbours
-    if(type==3){
-      cn <- relativeneigh(xy)
-      cn <- graph2nb(cn, sym=TRUE)
-    }
-
-    ## type 4: Minimum spanning tree
-    if(type==4){
-      if(!require(ade4, quiet=TRUE)) stop("ade4 library is required.")
-      cn <- ade4::mstree(dist(xy)) # there is also a spdep::mstree
-      cn <- neig2nb(cn)
-    }
-
-    ## type 5: Neighbourhood by distance
-    if(type==5){
-      if(is.null(d1) |is.null(d2)){
-        tempmat <- as.matrix(dist(xy))
-        d2min <- max(apply(tempmat, 1, function(r) min(r[r>1e-12])))
-        d2min <- d2min * 1.0001 # to avoid exact number problem
-        d2max <- max(dist(xy))
-        d2max <- d2max * 1.0001 # to avoid exact number problem
-        dig <- options("digits")
-        options("digits=5")
-        cat("\n Enter minimum distance: ")
-        d1 <- as.numeric(readLines(n = 1))
-        cat("\n Enter maximum distance \n(dmin=", d2min, ", dmax=", d2max, "): ")
-        d2 <- readLines(n = 1)
-        ## handle character
-        if(d2=="dmin") {
-            d2 <- d2min
-        } else if(d2=="dmax") {
-            d2 <- d2max
-        } else {
-            d2 <- as.numeric(d2)
-        }
-        ## restore initial digit option
-        options(dig)
-      }
-    # avoid that a point is its neighbour
-      dmin <- mean(dist(xy))/100000
-      if(d1<dmin) d1 <- dmin
-      if(d2<d1) stop("d2 < d1")
-      cn <- dnearneigh(x=xy, d1=d1, d2=d2)
-    }
-
-    ## type 6: K nearests
-    if(type==6){
-      if(is.null(k)) {
-        cat("\n Enter the number of neighbours: ")
-        k <- as.numeric(readLines(n = 1))
-      }
-      cn <- knearneigh(x=xy, k=k)
-      cn <- knn2nb(cn, sym=TRUE)
-    }
-
-    ## type 7: inverse distances
-    if(type==7){
-        if(is.null(a)) {
-            cat("\n Enter the exponent: ")
-            a <- as.numeric(readLines(n = 1))
-        }
-        cn <- as.matrix(dist(xy))
-        if(is.null(dmin)) {
-            cat("\n Enter the minimum distance \n(range = 0 -", max(cn),"): ")
-            dmin <- as.numeric(readLines(n = 1))
-        }
-        if(a<1) { a <- 1 }
-        thres <- mean(cn)/1e8
-        if(dmin > thres) dmin <- thres
-        cn[cn < dmin] <- dmin
-        cn <- 1/(cn^a)
-        diag(cn) <- 0
-        cn <- prop.table(cn,1)
-        plot.nb <- FALSE
-        edit.nb <- FALSE
-        result.type <- "listw"
-    } # end type 7
-
-## end graph types
-
-    if(ask & plot.nb) {
-      plot(cn,xy)
-      cat("\nKeep this graph (y/n)? ")
-    ans <- tolower(readLines(n=1))
-      if(ans=="n") {chooseAgain <- TRUE} else {chooseAgain <- FALSE}
-    }
-    else if(plot.nb){
-      plot(cn,xy)
-      chooseAgain <- FALSE
-    }
-  else {chooseAgain <- FALSE}
-
-  }
-### end large while
-
-  if(edit.nb) {cn <- edit(cn,xy)}
-
-  if(result.type == "listw") {
-      if(type!=7) {
-          cn <- nb2listw(cn, style="W", zero.policy=TRUE)
-      } else {
-          cn <- mat2listw(cn)
-          cn$style <- "W"
-      }
-  }
-
-  res <- cn
-
-  attr(res,"xy") <- xy
-
-  return(res)
-
-} # end chooseCN
-

Added: www/files/patches/export.R
===================================================================
--- www/files/patches/export.R	                        (rev 0)
+++ www/files/patches/export.R	2010-11-03 14:22:42 UTC (rev 704)
@@ -0,0 +1,198 @@
+############################################
+#
+# Functions to transform a genind object
+# into other R classes
+#
+# Thibaut Jombart
+# t.jombart at imperial.ac.uk
+#
+############################################
+
+
+
+###########################
+# Function genind2genotype
+###########################
+genind2genotype <- function(x,pop=NULL,res.type=c("matrix","list")){
+
+  if(!is.genind(x)) stop("x is not a valid genind object")
+  if(x at ploidy != as.integer(2)) stop("not implemented for non-diploid genotypes")
+  checkType(x)
+
+  if(!require(genetics)) stop("genetics package is not required but not installed.")
+  if(is.null(pop)) pop <- x at pop
+  if(is.null(pop)) pop <- as.factor(rep("P1",nrow(x at tab)))
+  res.type <- tolower(res.type[1])
+
+  # make one table by locus from x at tab
+  kX <- seploc(x,res.type="matrix")
+  # kX is a list of nloc tables
+
+  # function to recode a genotype in form "A1/A2" from frequencies
+  recod <- function(vec,lab){
+    if(all(is.na(vec))) return(NA)
+    if(round(sum(vec),10) != 1) return(NA)
+    temp <- c(which(vec==0.5),which(vec==1))
+    if(length(temp)==0) return(NA)
+    lab <- lab[temp]
+    res <- paste(lab[1],lab[length(lab)],sep="/")
+    return(res)
+  }
+
+  # function which converts data of a locus into a list of genotypes per population ## no longer used
+  # f1 <- function(X){
+  #  tapply(X,pop,function(mat) apply(mat,1,recod))
+  #}
+
+  # kGen is a list of nloc vectors of genotypes
+  kGen <- lapply(1:length(kX), function(i) apply(kX[[i]],1,recod,x at all.names[[i]]))
+  names(kGen) <- x at loc.names
+
+  if(res.type=="list"){ # list type
+    # each genotype is splited per population
+
+    # correction of an error due to a change in as.genotype
+    # error occurs in list type when a population is entierly untyped for a locus,
+    # that is, all values are NA.
+    res <- lapply(kGen,split,pop)
+
+    f2 <- function(x){# x is a vector of character to be converted into genotype
+      if(all(is.na(x))) return(NA)
+      return(as.genotype(x))
+    }
+    res <- lapply(res,function(e) lapply(e,f2))
+  } else if(res.type=="matrix"){ # matrix type
+    res <- cbind.data.frame(kGen)
+    res <- makeGenotypes(res,convert=1:ncol(res))
+  } else stop("Unknown res.type requested.")
+
+  return(res)
+}
+
+
+
+
+
+############################
+# Function genind2hierfstat
+############################
+genind2hierfstat <- function(x,pop=NULL){
+    ##  if(!inherits(x,"genind")) stop("x must be a genind object (see ?genind)")
+    ##   invisible(validObject(x))
+    if(!is.genind(x)) stop("x is not a valid genind object")
+    if(x at ploidy != as.integer(2)) stop("not implemented for non-diploid genotypes")
+    checkType(x)
+
+    if(is.null(pop)) pop <- pop(x)
+    if(is.null(pop)) pop <- as.factor(rep("P1",nrow(x at tab)))
+
+    ## ## NOTES ON THE CODING IN HIERFSTAT ##
+    ## - interpreting function is genot2al
+    ## - same coding has to be used for all loci
+    ## (i.e., all based on the maximum number of digits to be used)
+    ## - alleles have to be coded as integers
+    ## - alleles have to be sorted by increasing order when coding a genotype
+    ## - for instance, 121 is 1/21, 101 is 1/1, 11 is 1/1
+
+    ## find max number of alleles ##
+    max.nall <- max(x at loc.nall)
+    x at all.names <- lapply(x$all.names, function(e) .genlab("",max.nall)[1:length(e)])
+
+
+    ## VERSION USING GENIND2DF ##
+    gen <- genind2df(x, sep="", usepop=FALSE)
+    gen <- as.matrix(data.frame(lapply(gen, as.numeric)))
+    res <- cbind(as.numeric(pop),as.data.frame(gen))
+    colnames(res) <- c("pop",x at loc.names)
+    if(!any(table(x at ind.names)>1)){
+        rownames(res) <- x at ind.names
+    } else {
+        warning("non-unique labels for individuals; using generic labels")
+        rownames(res) <- 1:nrow(res)
+    }
+
+    return(res)
+} # end genind2hierfstat
+
+
+
+
+
+#####################
+# Function genind2df
+#####################
+genind2df <- function(x, pop=NULL, sep="", usepop=TRUE, oneColPerAll=FALSE){
+
+  if(!is.genind(x)) stop("x is not a valid genind object")
+  ## checkType(x)
+
+  if(is.null(pop)) {
+      pop <- x at pop
+      levels(pop) <- x at pop.names
+  }
+
+  if(oneColPerAll){
+      sep <- "/"
+  }
+
+  ## PA case ##
+  if(x at type=="PA"){
+      temp <- truenames(x)
+      if(is.list(temp) & usepop){
+          res <- cbind.data.frame(pop=temp[[2]],temp[[1]])
+      } else{
+          if(is.list(temp)) {
+              res <- temp[[1]]
+          } else{
+              res <- temp
+          }
+      }
+
+      return(res) # exit here
+  }
+
+  ## codom case ##
+  # make one table by locus from x at tab
+  kX <- seploc(x,res.type="matrix")
+  kX <- lapply(kX, function(X) round(X*x at ploidy)) # take data as numbers of alleles
+  ## (kX is a list of nloc tables)
+
+  ## function to recode a genotype in form "A1[sep]...[sep]Ak" from frequencies
+  recod <- function(vec,lab){
+      if(any(is.na(vec))) return(NA)
+      res <- paste( rep(lab,vec), collapse=sep)
+      return(res)
+  }
+
+
+  # kGen is a list of nloc vectors of genotypes
+  kGen <- lapply(1:length(kX), function(i) apply(kX[[i]],1,recod,x at all.names[[i]]))
+  names(kGen) <- x at loc.names
+
+  ## if use one column per allele
+  if(oneColPerAll){
+      f1 <- function(vec){ # to repeat NA with seperators
+          vec[is.na(vec)] <- paste(rep("NA",x at ploidy), collapse=sep)
+          return(vec)
+      }
+      temp <- lapply(kGen, f1)
+      temp <- lapply(temp, strsplit,sep)
+
+      res <- lapply(temp, function(e) matrix(unlist(e), ncol=x at ploidy, byrow=TRUE))
+      res <- data.frame(res,stringsAsFactors=FALSE)
+      names(res) <- paste(rep(locNames(x),each=x at ploidy), 1:x at ploidy, sep=".")
+
+      ## handle pop here
+      if(!is.null(pop) & usepop) res <- cbind.data.frame(pop,res,stringsAsFactors=FALSE)
+
+      return(res) # exit here
+  } # end if oneColPerAll
+
+  ## build the final data.frame
+  res <- cbind.data.frame(kGen,stringsAsFactors=FALSE)
+
+  ## handle pop here
+  if(!is.null(pop) & usepop) res <- cbind.data.frame(pop,res,stringsAsFactors=FALSE)
+
+  return(res)
+}

Deleted: www/files/patches/loadingplot.R
===================================================================
--- www/files/patches/loadingplot.R	2010-11-03 12:24:37 UTC (rev 703)
+++ www/files/patches/loadingplot.R	2010-11-03 14:22:42 UTC (rev 704)
@@ -1,76 +0,0 @@
-##############
-# loadingplot
-##############
-loadingplot <- function(x, at=NULL, threshold=quantile(x,0.75), axis=1, fac=NULL, byfac=FALSE,
-                        lab=names(x), cex.lab=0.7, cex.fac=1, lab.jitter=0,
-                        main="Loading plot", xlab="Variables", ylab="Loadings",...){
-    ## some checks
-    if(is.data.frame(x) || is.matrix(x)){
-        temp <- rownames(x)
-        x <- x[,axis]
-        names(x) <- temp
-    }
-    if(!is.numeric(x)) stop("x is not numeric")
-    if(any(is.na(x))) stop("NA entries in x")
-    if(any(x<0)) {
-        warning("Some values in x are less than 0\n Using abs(x) instead, but this might not be optimal.")
-        x <- abs(x)
-    }
-    if(is.null(at)){
-        at <- 1:length(x)
-    } else {
-        if(length(at) != length(x)) stop("x and at do not have the same length.")
-    }
-
-    ## preliminary computations
-    y.min <- min(min(x),0)
-    y.max <- max(max(x),0)
-    y.offset <- (y.max-y.min)*0.02
-    if(is.null(lab)) {lab <- 1:length(x)}
-
-    if(!is.null(fac)){
-        if(byfac){
-            x <- tapply(x, fac, mean)
-            if(length(lab) != length(x)) lab <- names(x)
-        } else {
-            fac <- factor(fac, levels=unique(fac))
-            grp.idx <- cumsum(table(fac)) + 0.5
-            grp.lab.idx <- tapply(1:length(x), fac, mean)
-            grp.lab <- names(grp.idx)
-            grp.idx <- grp.idx[-length(grp.idx)]
-    }
-    } # end fac handling
-
-
-    ## start the plot
-    dat <- cbind(at, x)
-    plot(dat, type="h", xlab=xlab, ylab=ylab,
-         main=main, xaxt="n", ylim=c(y.min,y.max*1.2), ...)
-
-    ## add groups of variables (optional)
-    if(!is.null(fac) & !byfac) {
-        abline(v=grp.idx,lty=2) # split groups of variables
-        text(x=grp.lab.idx,y=y.max*1.15, labels=grp.lab, cex=cex.fac) # annotate groups
-    }
-
-    ## annotate variables that are above the threshold
-    x.ann <- at[x > threshold]
-    x.ann <- jitter(x.ann,fac=lab.jitter)
-
-    y.ann <- x[x > threshold] + y.offset
-    y.ann <- jitter(y.ann,fac=lab.jitter)
-
-    txt.ann <- lab[x > threshold]
-    text(x=x.ann, y=y.ann, label=txt.ann, cex=cex.lab)
-
-    ## indicate the threshold
-    abline(h=threshold, col="grey")
-
-    ## build the result
-    res <- list(threshold=threshold,
-                var.names=txt.ann,
-                var.idx=which(x > threshold),
-                var.values=x[x > threshold])
-    return(invisible(res))
-
-} # end loadingplot