[adegenet-commits] r225 - in pkg: . R data man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Jan 24 19:43:22 CET 2009


Author: jombart
Date: 2009-01-24 19:43:22 +0100 (Sat, 24 Jan 2009)
New Revision: 225

Modified:
   pkg/DESCRIPTION
   pkg/R/basicMethods.R
   pkg/R/classes.R
   pkg/R/import.R
   pkg/R/makefreq.R
   pkg/R/scale.R
   pkg/R/spca.R
   pkg/data/microbov.RData
   pkg/data/nancycats.RData
   pkg/data/rupica.RData
   pkg/data/spcaIllus.RData
   pkg/man/as.genind.Rd
   pkg/man/as.genpop.Rd
   pkg/man/df2genind.Rd
   pkg/man/genind.Rd
   pkg/man/genpop.Rd
   pkg/man/import.Rd
   pkg/man/read.structure.Rd
Log:
Merge devel with current version



Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/DESCRIPTION	2009-01-24 18:43:22 UTC (rev 225)
@@ -1,5 +1,5 @@
 Package: adegenet
-Version: 1.2-2
+Version: 1.2-3
 Date: 2008/07/30
 Title: adegenet: a R package for the multivariate analysis of genetic markers.
 Author: Thibaut Jombart <jombart at biomserv.univ-lyon1.fr>, with contributions from Peter Solymos

Modified: pkg/R/basicMethods.R
===================================================================
--- pkg/R/basicMethods.R	2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/basicMethods.R	2009-01-24 18:43:22 UTC (rev 225)
@@ -9,7 +9,7 @@
   cat("   #####################")
   cat("\n- genotypes of individuals - \n")
   cat("\nS4 class: ", as.character(class(x)))
-  
+
   cat("\n at call: ")
   print(x at call)
 
@@ -17,18 +17,19 @@
   len <- 7
 
   cat("\n at tab: ", nrow(x at tab), "x", ncol(x at tab), "matrix of genotypes\n" )
- 
+
   cat("\n at ind.names: vector of ", length(x at ind.names), "individual names")
   cat("\n at loc.names: vector of ", length(x at loc.names), "locus names")
   cat("\n at loc.nall: number of alleles per locus")
   cat("\n at loc.fac: locus factor for the ", ncol(x at tab), "columns of @tab")
   cat("\n at all.names: list of ", length(x at all.names), "components yielding allele names for each locus")
   cat("\n at ploidy: ",x at ploidy)
-  
+  cat("\n at type: ",x at type)
+
   cat("\n\nOptionnal contents: ")
   cat("\n at pop: ", ifelse(is.null(x at pop), "- empty -", "factor giving the population of each individual"))
   cat("\n at pop.names: ", ifelse(is.null(x at pop.names), "- empty -", "factor giving the population of each individual"))
- 
+
   cat("\n\n at other: ")
   if(!is.null(x at other)){
     cat("a list containing: ")
@@ -36,9 +37,9 @@
   } else {
     cat("- empty -\n")
   }
-  
+
   cat("\n")
-} 
+}
 ) # end show method for genind
 
 
@@ -55,20 +56,22 @@
   cat("       #####################")
   cat("\n- Alleles counts for populations - \n")
   cat("\nS4 class: ", as.character(class(x)))
-  
+
   cat("\n at call: ")
   print(x at call)
 
   p <- ncol(x at tab)
 
   cat("\n at tab: ", nrow(x at tab), "x", ncol(x at tab), "matrix of alleles counts\n" )
-  
+
   cat("\n at pop.names: vector of ", length(x at pop.names), "population names")
   cat("\n at loc.names: vector of ", length(x at loc.names), "locus names")
   cat("\n at loc.nall: number of alleles per locus")
   cat("\n at loc.fac: locus factor for the ", ncol(x at tab), "columns of @tab")
   cat("\n at all.names: list of ", length(x at all.names), "components yielding allele names for each locus")
- 
+  cat("\n at ploidy: ",x at ploidy)
+  cat("\n at type: ",x at type)
+
   cat("\n\n at other: ")
   if(!is.null(x at other)){
     cat("a list containing: ")
@@ -76,10 +79,10 @@
   } else {
     cat("- empty -\n")
   }
-  
+
   cat("\n")
-  
-} 
+
+}
 ) # end show method for genpop
 
 
@@ -123,16 +126,16 @@
           H <- mean(H,na.rm=TRUE)
           return(H)
       }
-      
+
       res$Hobs <- unlist(lapply(temp,f1))
-      
+
       ## auxiliary function to compute expected heterozygosity
       ## freq is a vector of frequencies
       f2 <- function(freq){
           H <- 1-sum(freq*freq,na.rm=TRUE)
           return(H)
       }
-      
+
       temp <- genind2genpop(x,pop=rep(1,nrow(x at tab)),quiet=TRUE)
       temp <- makefreq(temp,quiet=TRUE)$tab
       temp.names <- colnames(temp)
@@ -140,13 +143,13 @@
       names(temp) <- temp.names
       temp <- split(temp,x at loc.fac)
       ## temp is a list of alleles frequencies (one element per locus)
-      
-      res$Hexp <- unlist(lapply(temp,f2))  
+
+      res$Hexp <- unlist(lapply(temp,f2))
   } else { # no possible heterozygosity for haploid genotypes
       res$Hobs <- 0
       res$Xexp <- 0
   }
-  
+
   ## print to screen
   listlab <- c("# Total number of genotypes: ",
                "# Population sample sizes: ",
@@ -160,7 +163,7 @@
     cat("\n",listlab[i],"\n")
     print(res[[i]])
   }
-  
+
   return(invisible(res))
 }) # end summary.genind
 
@@ -174,7 +177,7 @@
 setMethod ("summary", "genpop", function(object, ...){
   x <- object
   if(!inherits(x,"genpop")) stop("To be used with a genpop object")
-  
+
   res <- list()
 
   res$npop <- nrow(x at tab)
@@ -190,7 +193,7 @@
       w <- w/sum(w)
       return(sum(x*w))
   }
-  
+
   w <- apply(x at tab,1,sum,na.rm=TRUE) # weights for populations
   res$NA.perc <- 100*(1-mean.w(propTyped(x), w=w))
   ## res$NA.perc <- 100*(1-mean(propTyped(x,by="both"))) <- old
@@ -205,10 +208,10 @@
     cat("\n",listlab[i],"\n")
     print(res[[i]])
   }
-  
+
   return(invisible(res))
 
-} 
+}
 )# end summary.genpop
 
 

Modified: pkg/R/classes.R
===================================================================
--- pkg/R/classes.R	2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/classes.R	2009-01-24 18:43:22 UTC (rev 225)
@@ -6,7 +6,7 @@
 ########################################################################
 
 ###############################
-# Two classes of R object are 
+# Two classes of R object are
 # defined :
 # gen - common part to genind and genpop
 # genind - genotypes of individuals
@@ -71,7 +71,7 @@
 setClassUnion("factorOrNULL", c("factor","NULL"))
 setClassUnion("charOrNULL", c("character","NULL"))
 setClassUnion("callOrNULL", c("call","NULL"))
-setClassUnion("intOrNum", c("integer","numeric"))
+setClassUnion("intOrNum", c("integer","numeric","NULL"))
 
 
 
@@ -84,19 +84,23 @@
   p <- ncol(object at tab)
   k <- length(unique(object at loc.names))
 
-  if(length(object at loc.fac) != p) {
-    cat("\ninvalid length for loc.fac\n")
-    return(FALSE)
-  }
+  if(!is.null(object at loc.fac)){
+      if(length(object at loc.fac) != p) {
+          cat("\ninvalid length for loc.fac\n")
+          return(FALSE)
+      }
 
-  if(length(levels(object at loc.fac)) != k) {
-    cat("\ninvalid number of levels in loc.fac\n")
-    return(FALSE)
+      if(length(levels(object at loc.fac)) != k) {
+          cat("\ninvalid number of levels in loc.fac\n")
+          return(FALSE)
+      }
   }
 
-  if(length(object at loc.nall) != k) {
-    cat("\ninvalid length in loc.nall\n")
-    return(FALSE)
+  if(!is.null(object at loc.nall)){
+      if(length(object at loc.nall) != k) {
+          cat("\ninvalid length in loc.nall\n")
+          return(FALSE)
+      }
   }
 
   temp <- table(object at loc.names[object at loc.names!=""])
@@ -105,9 +109,11 @@
       print(temp[temp>1])
   }
 
-  if(length(unlist(object at all.names)) != p) {
-    cat("\ninvalid length in all.names\n")
-    return(FALSE)
+  if(!is.null(object at all.names)){
+      if(length(unlist(object at all.names)) != p) {
+          cat("\ninvalid length in all.names\n")
+          return(FALSE)
+      }
   }
 
   return(TRUE)
@@ -117,9 +123,9 @@
 
 setClass("gen", representation(tab = "matrix",
                                loc.names = "character",
-                               loc.fac = "factor",
+                               loc.fac = "factorOrNULL",
                                loc.nall = "intOrNum",
-                               all.names = "list",
+                               all.names = "listOrNULL",
                                call = "callOrNULL",
                                "VIRTUAL"),
          prototype(tab=matrix(ncol=0,nrow=0), loc.nall=integer(0), call=NULL))
@@ -137,8 +143,9 @@
                                    pop = "factorOrNULL",
                                    pop.names = "charOrNULL",
                                    ploidy = "integer",
+                                   type = "character",
                                    other = "listOrNULL", "VIRTUAL"),
-         prototype(pop=NULL, pop.names = NULL, ploidy = as.integer(2), other = NULL))
+         prototype(pop=NULL, pop.names = NULL, type = "codom", ploidy = as.integer(2), other = NULL))
 
 
 
@@ -160,7 +167,7 @@
         warning("\nduplicate names in ind.names:\n")
         print(temp[temp>1])
     }
-    
+
     if(!is.null(object at pop)){ # check pop
 
         if(length(object at pop) != nrow(object at tab)) {
@@ -191,7 +198,14 @@
         cat("\nploidy inferior to 1\n")
         return(FALSE)
     }
-    
+
+    ## check type of marker
+    if(!object at type %in% c("codom","PA") ){
+        cat("\nunknowon type of marker\n")
+        return(FALSE)
+    }
+
+
     return(TRUE)
 } #end .genind.valid
 
@@ -203,8 +217,9 @@
 ########################
 # virtual class popInfo
 ########################
-setClass("popInfo", representation(pop.names = "character", other = "listOrNULL", "VIRTUAL"),
-         prototype(other = NULL))
+setClass("popInfo", representation(pop.names = "character", ploidy = "integer",
+                                   type = "character", other = "listOrNULL", "VIRTUAL"),
+         prototype(type = "codom", ploidy = as.integer(2), other = NULL))
 
 
 
@@ -263,53 +278,64 @@
 # Function genind
 ##################
 ## constructor of a genind object
-genind <- function(tab,pop=NULL,prevcall=NULL,ploidy=2){
+genind <- function(tab,pop=NULL,prevcall=NULL,ploidy=2,type=c("codom","PA")){
 
   X <- as.matrix(tab)
   if(is.null(colnames(X))) stop("tab columns have no name.")
   if(is.null(rownames(X))) {rownames(X) <- 1:nrow(X)}
- 
-  
+
+  type <- match.arg(type)
+
   # labels for individuals
   nind <- nrow(X)
   ind.names <- .rmspaces(rownames(X))
   ind.codes <- .genlab("", nind)
   names(ind.names) <- ind.codes
-  
+
   # labels for loci
   # and loc.nall
-  temp <- colnames(X)
-  temp <- gsub("[.].*$","",temp)
-  temp <- .rmspaces(temp)
-  # beware !!! Function 'table' gives ordred output.
-  loc.names <- unique(temp)
-  loc.nall <-  table(temp)[match(loc.names,names(table(temp)))]
-  loc.nall <- as.integer(loc.nall)
+  if(type=="codom"){
+      temp <- colnames(X)
+      temp <- gsub("[.].*$","",temp)
+      temp <- .rmspaces(temp)
+      ## beware !!! Function 'table' gives ordred output.
+      loc.names <- unique(temp)
+      loc.nall <-  table(temp)[match(loc.names,names(table(temp)))]
+      loc.nall <- as.integer(loc.nall)
 
-  nloc <- length(loc.names)
-  loc.codes <- .genlab("L",nloc)
+      nloc <- length(loc.names)
+      loc.codes <- .genlab("L",nloc)
 
-  names(loc.names) <- loc.codes
+      names(loc.names) <- loc.codes
 
-  names(loc.nall) <- loc.codes
+      names(loc.nall) <- loc.codes
 
-  # loc.fac
-  loc.fac <- rep(loc.codes,loc.nall)
+      ## loc.fac
+      loc.fac <- rep(loc.codes,loc.nall)
 
-  # alleles name
-  temp <- colnames(X)
-  temp <- gsub("^.*[.]","",temp)
-  temp <- .rmspaces(temp)
-  all.names <- split(temp,loc.fac)
-  all.codes <- lapply(all.names,function(e) .genlab("",length(e)))
-  for(i in 1:length(all.names)){
-    names(all.names[[i]]) <- all.codes[[i]]
+      ## alleles name
+      temp <- colnames(X)
+      temp <- gsub("^.*[.]","",temp)
+      temp <- .rmspaces(temp)
+      all.names <- split(temp,loc.fac)
+      all.codes <- lapply(all.names,function(e) .genlab("",length(e)))
+      for(i in 1:length(all.names)){
+          names(all.names[[i]]) <- all.codes[[i]]
+      }
+
+      rownames(X) <- ind.codes
+      colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
+      loc.fac <- as.factor(loc.fac)
+  } else { # end if type=="codom"
+      nloc <- ncol(X)
+      loc.codes <- .genlab("N", nloc)
+      colnames(X) <- loc.codes
+      loc.names <-colnames(X)
+      names(loc.names) <- loc.codes
+      loc.fac <- NULL
+      all.names <- NULL
+      loc.nall <- NULL
   }
-  
-  rownames(X) <- ind.codes
-  colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
-  loc.fac <- as.factor(loc.fac)
-  
   # This was used in S3 version
   #
   #res <- list( tab=X, ind.names=ind.names, loc.names=loc.names,
@@ -323,7 +349,7 @@
   res at loc.nall <- loc.nall
   res at loc.fac <- loc.fac
   res at all.names <- all.names
-  
+
   # populations name (optional)
   # beware, keep levels of pop sorted in
   # there order of appearance
@@ -348,11 +374,14 @@
   if(plo < as.integer(1)) stop("ploidy inferior to 1")
   res at ploidy <- plo
 
+  ## type of marker
+  res at type <- as.character(type)
+
   if(is.null(prevcall)) {prevcall <- match.call()}
   res at call <- prevcall
- 
+
   return(res)
-  
+
 } # end genind
 
 ######################
@@ -365,18 +394,18 @@
 ##################
 # Function genpop
 ##################
-genpop <- function(tab,prevcall=NULL){
+genpop <- function(tab,prevcall=NULL,ploidy=as.integer(2),type=c("codom","PA")){
 
   X <- as.matrix(tab)
   if(is.null(colnames(X))) stop("tab columns have no name.")
   if(is.null(rownames(X))) {rownames(X) <- 1:nrow(X)}
- 
+
   # labels for populations
   npop <- nrow(X)
   pop.names <- .rmspaces(rownames(X))
   pop.codes <- .genlab("P", npop)
   names(pop.names) <- pop.codes
-  
+
   # labels for loci
   # and loc.nall
   temp <- colnames(X)
@@ -406,7 +435,7 @@
   for(i in 1:length(all.names)){
     names(all.names[[i]]) <- all.codes[[i]]
   }
-  
+
   rownames(X) <- pop.codes
   colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
   loc.fac <- as.factor(loc.fac)
@@ -424,12 +453,14 @@
   res at loc.nall <- loc.nall
   res at loc.fac <- loc.fac
   res at all.names <- all.names
- 
+  res at ploidy <- ploidy
+  res at type <- as.character(type)
+
   if(is.null(prevcall)) {prevcall <- match.call()}
   res at call <- prevcall
-  
+
   return(res)
-  
+
 } # end genpop
 
 

Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/import.R	2009-01-24 18:43:22 UTC (rev 225)
@@ -1,16 +1,17 @@
+###################################################################
+## Fonctions designed to import files from other softwares
+## into genind objects
+##
+## currently supported formats are :
+## .gtx (GENETIX)
+## .dat (Fstat)
+## .gen (Genepop)
+## .stru (STRUCTURE)
+##
+## Thibaut Jombart, avril 2006
+## jombart at biomserv.univ-lyon1.fr
+##
 ##################################################################
-# Fonctions designed to import files from other softwares
-# into genind objects
-#
-# currently supported formats are :
-# .gtx (GENETIX)
-# .dat (Fstat)
-# .gen (Genepop)
-#
-# Thibaut Jombart, avril 2006
-# jombart at biomserv.univ-lyon1.fr
-#
-##################################################################
 
 #######################
 # Function rmspaces
@@ -39,13 +40,61 @@
 #####################
 # Function df2genind
 #####################
-df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL, missing=NA, ploidy=2){
+df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL, missing=NA, ploidy=2, type=c("codom","PA")){
 
     if(is.data.frame(X)) X <- as.matrix(X)
     if (!inherits(X, "matrix")) stop ("X is not a matrix")
 
     res <- list()
+    type <- match.arg(type)
 
+    ## type PA
+    if(toupper(type)=="PA"){
+        warning("experimental mode for presence/absence")
+        mode(X) <- "numeric"
+
+        ## pop optionnelle
+        if(!is.null(pop)){
+            if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
+            pop <- as.factor(pop)
+        }
+
+        if(!is.null(ind.names)) rownames(X) <- ind.names
+        if(!is.null(loc.names)) colnames(X) <- loc.names
+
+        ## handle entirely non-typed loci and individuals
+        X <- gsub("^0*$",NA,X)
+        X <- gsub("(NA)+",NA,X)
+
+        ## Erase entierely non-typed loci
+        temp <- apply(X,2,function(c) all(is.na(c)))
+        if(any(temp)){
+            X <- X[,!temp]
+            warning("entirely non-type marker(s) deleted")
+        }
+
+        ## Erase entierely non-type individuals
+        temp <- apply(X,1,function(r) all(is.na(r)))
+        if(any(temp)){
+            X <- X[!temp,]
+            pop <- pop[!temp]
+            warning("entirely non-type individual(s) deleted")
+        }
+
+        ## erase non-polymorphic loci
+        temp <- apply(X, 2, function(loc) length(unique(loc[!is.na(loc)]))==1)
+        if(any(temp)){
+            X <- X[,!temp]
+            warning("non-polymorphic marker(s) deleted")
+        }
+
+        prevcall <- match.call()
+
+        res <- genind( tab=X, pop=pop, prevcall=prevcall, ploidy=ploidy, type="PA")
+
+        return(res)
+    } # end type PA
+
     ## make sure X is in character mode
     mode(X) <- "character"
 
@@ -229,7 +278,7 @@
 
     prevcall <- match.call()
 
-    res <- genind( tab=mat, pop=pop, prevcall=prevcall, ploidy=ploidy )
+    res <- genind( tab=mat, pop=pop, prevcall=prevcall, ploidy=ploidy, type=type)
 
     return(res)
 } # end df2genind
@@ -247,7 +296,8 @@
 
 
     ## read from file
-    if(!file.exists(file)) stop("Specified file does not exist.")
+    ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+
     if(toupper(.readExt(file)) != "GTX") stop("File extension .gtx expected")
     ## retrieve first infos
     nloc <- as.integer(scan(file,nlines=1,what="character",quiet=TRUE)[1])
@@ -313,43 +363,43 @@
 # Function read.fstat
 ##########################
 read.fstat <- function(file,missing=NA,quiet=FALSE){
-  if(!file.exists(file)) stop("Specified file does not exist.")
-  if(toupper(.readExt(file)) != "DAT") stop("File extension .dat expected")
+    ##if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+    if(toupper(.readExt(file)) != "DAT") stop("File extension .dat expected")
 
-  if(!quiet) cat("\n Converting data from a FSTAT .dat file to a genind object... \n\n")
+    if(!quiet) cat("\n Converting data from a FSTAT .dat file to a genind object... \n\n")
 
-  call <- match.call()
-  txt <- scan(file,what="character",sep="\n",quiet=TRUE)
-  txt <- gsub("\t"," ",txt)
+    call <- match.call()
+    txt <- scan(file,what="character",sep="\n",quiet=TRUE)
+    txt <- gsub("\t"," ",txt)
 
-  # read first infos
-  info <- unlist(strsplit(txt[1],"([[:space:]]+)"))
-  # npop <- as.numeric(info[1]) ## no longer used
-  nloc <- as.numeric(info[2])
+                                        # read first infos
+    info <- unlist(strsplit(txt[1],"([[:space:]]+)"))
+                                        # npop <- as.numeric(info[1]) ## no longer used
+    nloc <- as.numeric(info[2])
 
-  loc.names <- txt[2:(nloc+1)]
+    loc.names <- txt[2:(nloc+1)]
 
-  # build genotype matrix
-  txt <- txt[-(1:(nloc+1))]
-  txt <- .rmspaces(txt)
-  txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
-  X <- t(txt)
-  pop <- factor(X[,1])
-  if(length(levels(pop)) == 1 ) pop <- NULL
-  X <- X[,-1]
+                                        # build genotype matrix
+    txt <- txt[-(1:(nloc+1))]
+    txt <- .rmspaces(txt)
+    txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
+    X <- t(txt)
+    pop <- factor(X[,1])
+    if(length(levels(pop)) == 1 ) pop <- NULL
+    X <- X[,-1]
 
-  colnames(X) <- loc.names
-  rownames(X) <- 1:nrow(X)
+    colnames(X) <- loc.names
+    rownames(X) <- 1:nrow(X)
 
-  res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
-  # beware : fstat files do not yield ind names
-  res at ind.names <- rep("",length(res at ind.names))
-  names(res at ind.names) <- rownames(res at tab)
-  res at call <- call
+    res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+                                        # beware : fstat files do not yield ind names
+    res at ind.names <- rep("",length(res at ind.names))
+    names(res at ind.names) <- rownames(res at tab)
+    res at call <- call
 
-  if(!quiet) cat("\n...done.\n\n")
+    if(!quiet) cat("\n...done.\n\n")
 
-  return(res)
+    return(res)
 
 } # end read.fstat
 
@@ -361,17 +411,17 @@
 # Function read.genepop
 ##########################
 read.genepop <- function(file,missing=NA,quiet=FALSE){
-  if(!file.exists(file)) stop("Specified file does not exist.")
-  if(toupper(.readExt(file)) != "GEN") stop("File extension .gen expected")
+    ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+    if(toupper(.readExt(file)) != "GEN") stop("File extension .gen expected")
 
-  if(!quiet) cat("\n Converting data from a Genepop .gen file to a genind object... \n\n")
+    if(!quiet) cat("\n Converting data from a Genepop .gen file to a genind object... \n\n")
 
-  prevcall <- match.call()
+    prevcall <- match.call()
 
-  txt <- scan(file,sep="\n",what="character",quiet=TRUE)
-  if(!quiet) cat("\nFile description: ",txt[1], "\n")
-  txt <- txt[-1]
-  txt <- gsub("\t", " ", txt)
+    txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+    if(!quiet) cat("\nFile description: ",txt[1], "\n")
+    txt <- txt[-1]
+    txt <- gsub("\t", " ", txt)
 
   # two cases for locus names:
   # 1) all on the same row, separated by ","
@@ -402,57 +452,57 @@
   #  txt <- txt[-(1:nloc)]
   #}
 
-  # new strategy (shorter): isolate the 'locus names' part and then parse it.
-  locinfo.idx <- 1:(min(grep("POP",toupper(txt)))-1)
-  locinfo <- txt[locinfo.idx]
-  locinfo <- paste(locinfo,collapse=",")
-  loc.names <- unlist(strsplit(locinfo,"([,]|[\n])+"))
-  loc.names <- .rmspaces(loc.names)
-  nloc <- length(loc.names)
-  txt <- txt[-locinfo.idx]
+    ## new strategy (shorter): isolate the 'locus names' part and then parse it.
+    locinfo.idx <- 1:(min(grep("POP",toupper(txt)))-1)
+    locinfo <- txt[locinfo.idx]
+    locinfo <- paste(locinfo,collapse=",")
+    loc.names <- unlist(strsplit(locinfo,"([,]|[\n])+"))
+    loc.names <- .rmspaces(loc.names)
+    nloc <- length(loc.names)
+    txt <- txt[-locinfo.idx]
 
-  # locus names have been retreived
+    ## locus names have been retreived
 
-  # build the pop factor
-  # and correct the genotypes splited on more than 1 line
-  pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
-  npop <- length(pop.idx)
-  # correction for splited genotype
-  # isolated by the absence of comma on a line not containing "pop"
-  nocomma <- which(! (1:length(txt)) %in% grep(",",txt))
-  splited <- nocomma[which(! nocomma %in% pop.idx)]
-  if(length(splited)>0){
-    for(i in sort(splited,dec=TRUE)){
-      txt[i-1] <- paste(txt[i-1],txt[i],sep=" ")
+    ## build the pop factor
+    ## and correct the genotypes splited on more than 1 line
+    pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+    npop <- length(pop.idx)
+    ## correction for splited genotype
+    ## isolated by the absence of comma on a line not containing "pop"
+    nocomma <- which(! (1:length(txt)) %in% grep(",",txt))
+    splited <- nocomma[which(! nocomma %in% pop.idx)]
+    if(length(splited)>0){
+        for(i in sort(splited,dec=TRUE)){
+            txt[i-1] <- paste(txt[i-1],txt[i],sep=" ")
+        }
+        txt <- txt[-splited]
     }
-    txt <- txt[-splited]
-  }
-  # end correction
+    ## end correction
 
-  # reevaluate pop index
-  pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+    ## reevaluate pop index
+    pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
 
-  txt[length(txt)+1] <- "POP"
-  nind.bypop <- diff(grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt)))-1
-  pop <- factor(rep(1:npop,nind.bypop))
+    txt[length(txt)+1] <- "POP"
+    nind.bypop <- diff(grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt)))-1
+    pop <- factor(rep(1:npop,nind.bypop))
 
-  txt <- txt[-c(pop.idx,length(txt))]
+    txt <- txt[-c(pop.idx,length(txt))]
 
-  temp <- sapply(1:length(txt),function(i) strsplit(txt[i],","))
-  # temp is a list with nind elements, first being ind. name and 2nd, genotype
+    temp <- sapply(1:length(txt),function(i) strsplit(txt[i],","))
+    ## temp is a list with nind elements, first being ind. name and 2nd, genotype
 
-  ind.names <- sapply(temp,function(e) e[1])
-  ind.names <- .rmspaces(ind.names)
-  # individuals' name are now clean
+    ind.names <- sapply(temp,function(e) e[1])
+    ind.names <- .rmspaces(ind.names)
+    ## individuals' name are now clean
 
-  vec.genot <- sapply(temp,function(e) e[2])
-  vec.genot <- .rmspaces(vec.genot)
+    vec.genot <- sapply(temp,function(e) e[2])
+    vec.genot <- .rmspaces(vec.genot)
 
-  # X is a individual x locus genotypes matrix
-  X <- matrix(unlist(strsplit(vec.genot,"[[:space:]]+")),ncol=nloc,byrow=TRUE)
+    ## X is a individual x locus genotypes matrix
+    X <- matrix(unlist(strsplit(vec.genot,"[[:space:]]+")),ncol=nloc,byrow=TRUE)
 
-  rownames(X) <- ind.names
-  colnames(X) <- loc.names
+    rownames(X) <- ind.names
+    colnames(X) <- loc.names
 
  ##  # correct X to fulfill the genetix format
 ##   f1 <- function(char){
@@ -466,18 +516,18 @@
 ##   if(all(nchar(X)==2)) {X <- apply(X,c(1,2),f1)}
 ##   if(all(nchar(X)==4)) {X <- apply(X,c(1,2),f2)}
 
-  # give right pop names
-  # beware: genepop takes the name of the last individual of a sample as this sample's name
-  pop.names.idx <- cumsum(table(pop))
-  pop.names <- ind.names[pop.names.idx]
-  levels(pop) <- pop.names
+    ## give right pop names
+    ## beware: genepop takes the name of the last individual of a sample as this sample's name
+    pop.names.idx <- cumsum(table(pop))
+    pop.names <- ind.names[pop.names.idx]
+    levels(pop) <- pop.names
 
-  res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
-  res at call <- prevcall
+    res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+    res at call <- prevcall
 
-  if(!quiet) cat("\n...done.\n\n")
+    if(!quiet) cat("\n...done.\n\n")
 
-  return(res)
+    return(res)
 
 } # end read.genepop
 
@@ -490,162 +540,162 @@
 ############################
 read.structure <- function(file, n.ind=NULL, n.loc=NULL,  onerowperind=NULL, col.lab=NULL, col.pop=NULL, col.others=NULL, row.marknames=NULL, NA.char="-9", pop=NULL, missing=NA, ask=TRUE, quiet=FALSE){
 
-  if(!file.exists(file)) stop("Specified file does not exist.")
-  if(!toupper(.readExt(file)) %in% c("STR","STRU")) stop("File extension .stru expected")
+    ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+    if(!toupper(.readExt(file)) %in% c("STR","STRU")) stop("File extension .stru expected")
 
-  ## set defaults for non optional arguments without default values
-  if(!ask){
-      if(is.null(col.lab)) col.lab <- as.integer(0)
-      if(is.null(col.pop)) col.pop <- as.integer(0)
-      if(is.null(row.marknames)) row.marknames <- as.integer(0)
-  }
+    ## set defaults for non optional arguments without default values
+    if(!ask){
+        if(is.null(col.lab)) col.lab <- as.integer(0)
+        if(is.null(col.pop)) col.pop <- as.integer(0)
+        if(is.null(row.marknames)) row.marknames <- as.integer(0)
+    }
 
-  ## required questions
-  if(is.null(n.ind)){
-    cat("\n How many genotypes are there? ")
-    n.ind <- as.integer(readLines(n = 1))
-  }
+    ## required questions
+    if(is.null(n.ind)){
+        cat("\n How many genotypes are there? ")
+        n.ind <- as.integer(readLines(n = 1))
+    }
 
-  if(is.null(n.loc)){
-    cat("\n How many markers are there? ")
-    n.loc <- as.integer(readLines(n = 1))
-  }
+    if(is.null(n.loc)){
+        cat("\n How many markers are there? ")
+        n.loc <- as.integer(readLines(n = 1))
+    }
 
-  if(is.null(col.lab)){
-    cat("\n Which column contains labels for genotypes ('0' if absent)? ")
-    col.lab <- as.integer(readLines(n = 1))
-  }
+    if(is.null(col.lab)){
+        cat("\n Which column contains labels for genotypes ('0' if absent)? ")
+        col.lab <- as.integer(readLines(n = 1))
+    }
 
-  if(is.null(col.pop)){
-    cat("\n Which column contains the population factor ('0' if absent)? ")
-    col.pop <- as.integer(readLines(n = 1))
-  }
+    if(is.null(col.pop)){
+        cat("\n Which column contains the population factor ('0' if absent)? ")
+        col.pop <- as.integer(readLines(n = 1))
+    }
 
-  if(is.null(col.others) & ask){
-    cat("\n Which other optional columns should be read (press 'return' when done)? ")
-    col.others <- scan(quiet=TRUE)
-    if(length(col.others) == 0)  col.others <- NULL
-  }
+    if(is.null(col.others) & ask){
+        cat("\n Which other optional columns should be read (press 'return' when done)? ")
+        col.others <- scan(quiet=TRUE)
+        if(length(col.others) == 0)  col.others <- NULL
+    }
 
-  if(is.null(row.marknames)){
-    cat("\n Which row contains the marker names ('0' if absent)? ")
-    row.marknames <- as.integer(readLines(n = 1))
-  }
+    if(is.null(row.marknames)){
+        cat("\n Which row contains the marker names ('0' if absent)? ")
+        row.marknames <- as.integer(readLines(n = 1))
+    }
 
-  if(is.null(onerowperind)){
-    cat("\n Are genotypes coded by a single row (y/n)? ")
-    onerowperind <- toupper(readLines(n = 1))
-    if(onerowperind == "Y") {
-      onerowperind <- TRUE
-    } else {
-      onerowperind <- FALSE
+    if(is.null(onerowperind)){
+        cat("\n Are genotypes coded by a single row (y/n)? ")
+        onerowperind <- toupper(readLines(n = 1))
+        if(onerowperind == "Y") {
+            onerowperind <- TRUE
+        } else {
+            onerowperind <- FALSE
+        }
     }
-  }
 
-  if(is.null(NA.char)){
-    cat("\n What is the code for missing data (default is '-9')? ")
-    NA.char <- as.character(readLines(n = 1))
-  }
+    if(is.null(NA.char)){
+        cat("\n What is the code for missing data (default is '-9')? ")
+        NA.char <- as.character(readLines(n = 1))
+    }
 
-  # message to console
-  if(!quiet) cat("\n Converting data from a STRUCTURE .stru file to a genind object... \n\n")
+    ## message to console
+    if(!quiet) cat("\n Converting data from a STRUCTURE .stru file to a genind object... \n\n")
 
-  # read the file
-  txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+    ## read the file
+    txt <- scan(file,sep="\n",what="character",quiet=TRUE)
 
-  # remove empty lines and spaces/tabs at the end of a line
-  temp <- grep("^[[:space:]]*$",txt)
-  if(length(temp) > 0) {
-    txt <- txt[-temp]
-  }
+    ## remove empty lines and spaces/tabs at the end of a line
+    temp <- grep("^[[:space:]]*$",txt)
+    if(length(temp) > 0) {
+        txt <- txt[-temp]
+    }
 
-  txt <- gsub("([[:blank:]]+)$","",txt)
+    txt <- gsub("([[:blank:]]+)$","",txt)
 
-  ## isolate each useful component of the file
-  # matrix of data
-  if(onerowperind) {
-    n <- n.ind
-    p <- 2*n.loc
-  } else{
-    n <- 2*n.ind
-    p <- n.loc
-  }
+    ## isolate each useful component of the file
+    ## matrix of data
+    if(onerowperind) {
+        n <- n.ind
+        p <- 2*n.loc
+    } else{
+        n <- 2*n.ind
+        p <- n.loc
+    }
 
-  lastline <- length(txt)
-  mat <- txt[(lastline-n+1):lastline]
-  mat <- t(as.data.frame(strsplit(mat,"[[:blank:]]+")))
-  rownames(mat) <- 1:n
-  gen <- mat[, (ncol(mat)-p+1):ncol(mat)]
+    lastline <- length(txt)
+    mat <- txt[(lastline-n+1):lastline]
+    mat <- t(as.data.frame(strsplit(mat,"[[:blank:]]+")))
+    rownames(mat) <- 1:n
+    gen <- mat[, (ncol(mat)-p+1):ncol(mat)]
 
 
-  # markers names
-  if(row.marknames != 0) {
-    loc.names <- .rmspaces(txt[row.marknames])
-    loc.names <- unlist(strsplit(loc.names,"[[:blank:]]+"))
-  } else {
-    loc.names <- .genlab("L",n.loc)
-  }
+    ## markers names
+    if(row.marknames != 0) {
+        loc.names <- .rmspaces(txt[row.marknames])
+        loc.names <- unlist(strsplit(loc.names,"[[:blank:]]+"))
+    } else {
+        loc.names <- .genlab("L",n.loc)
+    }
 
-  # genotypes labels
-  if(col.lab !=0) {
-    ind.names <- mat[, col.lab]
-  } else {
-    ind.names <- .genlab("",n.ind)
-  }
+    ## genotypes labels
+    if(col.lab !=0) {
+        ind.names <- mat[, col.lab]
+    } else {
+        ind.names <- .genlab("",n.ind)
+    }
 
-  # population factor
-  if(col.pop !=0) {
-     pop <- factor(mat[, col.pop])
-  } else {
-    pop <- NULL
-  }
+    ## population factor
+    if(col.pop !=0) {
+        pop <- factor(mat[, col.pop])
+    } else {
+        pop <- NULL
+    }
 
-  # other variables
-  if(!is.null(col.others)){
-    X.other <- mat[, col.others]
-  }
+    ## other variables
+    if(!is.null(col.others)){
+        X.other <- mat[,col.others]
+    }
 
-  ## transformations if onerowperind is FALSE
-  if(!onerowperind) {
-    temp <- seq(1,n,by=2)
-    ind.names <- ind.names[temp]
-    if(length(ind.names) < n.ind) warning("Duplicated identifier for genotypes")
-    pop <- pop[temp]
-    if(exists("X.other")) X.other <- X.other[temp]
+    ## transformations if onerowperind is FALSE
+    if(!onerowperind) {
+        temp <- seq(1,n,by=2)
+        ind.names <- ind.names[temp]
+        if(length(ind.names) < n.ind) warning("Duplicated identifier for genotypes")
+        pop <- pop[temp]
+        if(exists("X.other")) X.other <- X.other[temp]
 
-    ## make sur that all strings in gen have the same number of characters
-    ncode <- max(nchar(gen))
-    keepCheck <- any(nchar(gen) < ncode)
-
-    while(keepCheck){
-        mat0 <- matrix("", ncol=ncol(gen), nrow=nrow(gen))
-        mat0[nchar(gen) < ncode] <- "0"
-        gen <-  matrix(paste(mat0, gen, sep=""), nrow=nrow(mat0))
+        ## make sur that all strings in gen have the same number of characters
+        ncode <- max(nchar(gen))
         keepCheck <- any(nchar(gen) < ncode)
-    }
 
-    # reorder matrix of genotypes
-    X <- t(sapply(temp, function(i) paste(gen[i,],gen[i+1,],sep="") ))
+        while(keepCheck){
+            mat0 <- matrix("", ncol=ncol(gen), nrow=nrow(gen))
+            mat0[nchar(gen) < ncode] <- "0"
+            gen <-  matrix(paste(mat0, gen, sep=""), nrow=nrow(mat0))
+            keepCheck <- any(nchar(gen) < ncode)
+        }
 
-  } else { # else of "if(!onerowperind)"
-      temp <- seq(1,p-1,by=2)
-      X <- paste(gen[,temp] , gen[,temp+1], sep="")
-      X <- matrix(X, nrow=n.ind)
-  }
+        ## reorder matrix of genotypes
+        X <- t(sapply(temp, function(i) paste(gen[i,],gen[i+1,],sep="") ))
 
-  # replace missing values by NAs
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/adegenet -r 225


More information about the adegenet-commits mailing list