[adegenet-commits] r117 - in pkg: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu May 22 16:57:42 CEST 2008


Author: jombart
Date: 2008-05-22 16:57:42 +0200 (Thu, 22 May 2008)
New Revision: 117

Modified:
   pkg/R/auxil.R
   pkg/R/export.R
   pkg/R/hybridize.R
   pkg/R/import.R
   pkg/R/propShared.R
   pkg/R/spca.R
   pkg/TODO
   pkg/man/df2genind.Rd
Log:
Fixed some problems with genind2df and pop argument;
recoded genind2df (now twice as fast).
Had to precise usepop=FALSE where genind2df was called.


Modified: pkg/R/auxil.R
===================================================================
--- pkg/R/auxil.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/auxil.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -423,7 +423,7 @@
     
     
     ## extract info
-    listTab <- lapply(x,genind2df)
+    listTab <- lapply(x,genind2df,usepop=FALSE)
     getPop <- function(obj){
         if(is.null(obj$pop)) return(factor(rep(NA,nrow(obj$tab))))
       pop <- obj$pop

Modified: pkg/R/export.R
===================================================================
--- pkg/R/export.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/export.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -117,12 +117,15 @@
 #####################
 # Function genind2df
 #####################
-genind2df <- function(x,pop=NULL, sep=""){
+genind2df <- function(x, pop=NULL, sep="", usepop=TRUE){
 
   if(!is.genind(x)) stop("x is not a valid genind object")
 
-  if(is.null(pop)) pop <- x at pop
-  if(is.null(pop)) pop <- as.factor(rep("P1",nrow(x at tab)))
+  if(is.null(pop)) {
+      pop <- x at pop
+      levels(pop) <- x at pop.names
+  }
+  ## if(is.null(pop)) pop <- as.factor(rep("P1",nrow(x at tab))) # no longer used
   
   # make one table by locus from x at tab
   kX <- seploc(x,res.type="matrix")
@@ -131,14 +134,26 @@
   
   # function to recode a genotype in form "A1/A2" from frequencies
   recod <- function(vec,lab){
-    if(all(is.na(vec))) return(NA)
-    if(round(sum(vec),10) != 1) return(NA)
-    temp <- c(which(vec==0.5),which(vec==1))
-    if(length(temp)==0) return(NA)
-    lab <- lab[temp]
-    res <- paste(lab[1],lab[length(lab)],sep=sep)
-    return(res)
+      vec <- as.logical(vec)
+      sumVec <- sum(vec)
+      if(is.na(sumVec)) {
+          return(NA)
+      } else if(sumVec==2){ # heteroZ
+          return(paste(lab[vec], collapse=sep))
+      } else if(sumVec==1){ # homoZ
+          return(paste(lab[vec],lab[vec],sep=sep))
+      } else return(NA)
   }
+  
+  ##  recod <- function(vec,lab){ ## old version, new one is faster
+  ##     if(all(is.na(vec))) return(NA)
+  ##     if(round(sum(vec),10) != 1) return(NA)
+  ##     temp <- c(which(vec==0.5),which(vec==1))
+  ##     if(length(temp)==0) return(NA)
+  ##     lab <- lab[temp]
+  ##     res <- paste(lab[1],lab[length(lab)],sep=sep)
+  ##     return(res)
+  ##   }
  
   # kGen is a list of nloc vectors of genotypes
   kGen <- lapply(1:length(kX), function(i) apply(kX[[i]],1,recod,x at all.names[[i]]))
@@ -146,5 +161,8 @@
 
   res <- cbind.data.frame(kGen,stringsAsFactors=FALSE)
 
+  ## handle pop here
+  if(!is.null(pop) & usepop) res <- cbind.data.frame(pop,res)
+  
   return(res)
 }

Modified: pkg/R/hybridize.R
===================================================================
--- pkg/R/hybridize.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/hybridize.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -44,7 +44,7 @@
     gam1 at loc.fac <- x1 at loc.fac
     gam1 at all.names <- x1 at all.names
     gam1 at loc.nall <- x1 at loc.nall
-    gam1 <- genind2df(gam1,sep="/")
+    gam1 <- genind2df(gam1,sep="/",usepop=FALSE)
     gam1 <- as.matrix(gam1)
     
     ## gam 2
@@ -55,7 +55,7 @@
     gam2 at loc.fac <- x2 at loc.fac
     gam2 at all.names <- x2 at all.names
     gam2 at loc.nall <- x2 at loc.nall
-    gam2 <- genind2df(gam2,sep="/")
+    gam2 <- genind2df(gam2,sep="/",usepop=FALSE)
     gam2 <- as.matrix(gam2)
 
     #### construction of zygotes
@@ -68,8 +68,8 @@
         res <- as.data.frame(matrix(res,ncol=k))
         names(res) <- x1 at loc.names
         row.names(res) <- .genlab(hyb.label,n)
-        df1 <- genind2df(x1,sep=" ") # make df with parents and hybrids
-        df2 <- genind2df(x2,sep=" ")
+        df1 <- genind2df(x1,sep=" ",usepop=FALSE) # make df with parents and hybrids
+        df2 <- genind2df(x2,sep=" ",usepop=FALSE)
         res <- rbind.data.frame(df1,df2,res) # rbind the three df
         res[is.na(res)] <- "-9 -9" # this is two missing alleles for STRUCTURE
         pop <- rep(1:3,c(nrow(x1 at tab), nrow(x2 at tab), n)) # make a pop identifier

Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/import.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -84,6 +84,8 @@
     temp <- apply(X,2,function(c) all(c==missTyp))
     if(any(temp)){
         X <- X[,!temp]
+        loc.names <- loc.names[!temp]
+        nloc <- ncol(X)
         warning("entirely non-type marker(s) deleted")
     }
     
@@ -93,6 +95,7 @@
         X <- X[!temp,]
         pop <- pop[!temp]
         ind.names <- ind.names[!temp]
+        n <- nrow(X)
         warning("entirely non-type individual(s) deleted")        
     }
     

Modified: pkg/R/propShared.R
===================================================================
--- pkg/R/propShared.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/propShared.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -15,7 +15,7 @@
     ## NAs are coded by 0
     ## The matrix is a cbind of two matrices, storing respectively the
     ## first and the second allele.
-    temp <- genind2df(x)
+    temp <- genind2df(x,usepop=FALSE)
     alleleSize <- max(apply(temp,1:2,nchar))/2
     mat1 <- apply(temp, 1:2, substr, 1, alleleSize)
     mat2 <- apply(temp, 1:2, substr, alleleSize+1, alleleSize*2)

Modified: pkg/R/spca.R
===================================================================
--- pkg/R/spca.R	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/R/spca.R	2008-05-22 14:57:42 UTC (rev 117)
@@ -65,7 +65,11 @@
   if(is.genind(obj)) { X <- obj at tab }
   if(is.genpop(obj)) { X <- makefreq(obj, quiet=TRUE)$tab }
 
-  X <- apply(X,2,f1)
+  ## handle NAs
+  if(any(is.na(X))){
+      warning("NAs in data are automatically replaced (to mean allele frequency")
+      X <- apply(X,2,f1)
+  }
 
   if(truenames){
     rownames(X) <- rownames(truenames(obj))

Modified: pkg/TODO
===================================================================
--- pkg/TODO	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/TODO	2008-05-22 14:57:42 UTC (rev 117)
@@ -26,8 +26,8 @@
 
 # CODE ISSUES:
 ==============
-* genind2df does not handle the pop argument correctly.
-* check the behavior of the constructor when one of several loci are entirely non-typed (same for individuals/populations)
+* genind2df does not handle the pop argument correctly. -- fixed (TJ)
+* check the behavior of the constructor when one of several loci are entirely non-typed (same for individuals/populations) -- done, was a pb in df2genind, now fixed (TJ)
 
 # DOCUMENTATION ISSUES:
 =======================
@@ -35,7 +35,6 @@
 
 # NEW IMPLEMENTATIONS:
 =====================
-* genind2df: add an option to give NA per allele instead of per locus: NA[sep]NA instead of NA.
 * implement different levels of ploidy in genind / genpop objects.
 
 # TESTING:
@@ -54,7 +53,8 @@
 * Implement a method to merge different markers for the same individuals
 * Build accessors for marker names, indiv names, pop names, spatial coords, ...
 * Return a spatial object from monmonier (class sp?)
-* Issue a warning when NAs exist in input of sPCA
+* Issue a warning when NAs exist in input of sPCA -- done (TJ)
+* genind2df: add an option to give NA per allele instead of per locus: NA[sep]NA instead of NA.
 
 # LONG TERM
 ==========================

Modified: pkg/man/df2genind.Rd
===================================================================
--- pkg/man/df2genind.Rd	2008-05-22 12:33:52 UTC (rev 116)
+++ pkg/man/df2genind.Rd	2008-05-22 14:57:42 UTC (rev 117)
@@ -22,7 +22,7 @@
 \usage{
 df2genind(X, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL,
  missing=NA)
-genind2df(x,pop=NULL, sep="")
+genind2df(x,pop=NULL, sep="",usepop=TRUE)
 }
 \arguments{
   \item{X}{a matrix or a data.frame (see decription)}
@@ -37,7 +37,9 @@
   \item{missing}{can be NA, 0 or "mean". See details section.}
   \item{x}{a \linkS4class{genind} object}
   \item{sep}{a character used to separate two alleles}
- }
+  \item{usepop}{a logical stating whether the population (argument \code{pop}
+    or \code{x at pop} should be used (TRUE, default) or not (FALSE).}
+}
    
 \details{There are 3 treatments for missing values: \cr
   - NA: kept as NA.\cr



More information about the adegenet-commits mailing list