[adegenet-commits] r156 - pkg/R www www/files

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Jul 16 19:29:00 CEST 2008


Author: jombart
Date: 2008-07-16 19:29:00 +0200 (Wed, 16 Jul 2008)
New Revision: 156

Added:
   www/files/import.R
Modified:
   pkg/R/import.R
   www/download.html
Log:
Correct a bug of non-detection of NAs while reading objects.


Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R	2008-07-13 22:29:35 UTC (rev 155)
+++ pkg/R/import.R	2008-07-16 17:29:00 UTC (rev 156)
@@ -75,13 +75,14 @@
     tempX <- X
     if(!is.null(sep)) tempX <- gsub(sep,"",X)
     ## turn NANANA, 00000, ... into NA
-    tempX <- gsub("^0*$",NA,X)
+    tempX <- gsub("^0*$",NA,tempX)
     tempX <- gsub("(NA)+",NA,tempX)
-    
+
     ## Erase entierely non-typed loci
     temp <- apply(tempX,2,function(c) all(is.na(c)))
     if(any(temp)){
         X <- X[,!temp]
+        tempX <- tempX[,!temp]
         loc.names <- loc.names[!temp]
         nloc <- ncol(X)
         warning("entirely non-type marker(s) deleted")
@@ -91,6 +92,7 @@
     temp <- apply(tempX,1,function(r) all(is.na(r)))
     if(any(temp)){
         X <- X[!temp,]
+        tempX <- tempX[!temp,]
         pop <- pop[!temp]
         ind.names <- ind.names[!temp]
         n <- nrow(X)
@@ -98,7 +100,8 @@
     }
 
     n <- nrow(X)
-    X <- gsub("^.*NA.*$",NA,X) # set correct NAs in X
+    ## SET NAs IN X
+    X[is.na(tempX)] <- NA
     
     # ind.names <- rownames(X) this erases the real labels
     # note: if X is kept as a matrix, duplicate row names are no problem

Modified: www/download.html
===================================================================
--- www/download.html	2008-07-13 22:29:35 UTC (rev 155)
+++ www/download.html	2008-07-16 17:29:00 UTC (rev 156)
@@ -43,6 +43,9 @@
 (CRAN) ones. Simply download the file in your working directory and
 type <span style="font-family: monospace;">source("[your-patch-file.R]")</span>&nbsp;
 to use a patch.<br>
+- <a href="files/import.R"><span style="font-family: monospace;">import.R</span></a>:
+corrects an important bug arising in adegenet_1.2-0: NA were not
+detected.<br>
 <br>
 <img alt="" src="images/bullet.png" style="width: 10px; height: 10px;">
 <span style="font-weight: bold;">Older

Added: www/files/import.R
===================================================================
--- www/files/import.R	                        (rev 0)
+++ www/files/import.R	2008-07-16 17:29:00 UTC (rev 156)
@@ -0,0 +1,670 @@
+##################################################################
+# Fonctions designed to import files from other softwares
+# into genind objects
+#
+# currently supported formats are :
+# .gtx (GENETIX)
+# .dat (Fstat)
+# .gen (Genepop)
+#
+# Thibaut Jombart, avril 2006
+# jombart at biomserv.univ-lyon1.fr
+# 
+##################################################################
+
+#######################
+# Function rmspaces
+#######################
+# removes spaces and tab at the begining and the end of each element of charvec
+.rmspaces <- function(charvec){
+    charvec <- gsub("^([[:blank:]]+)","",charvec)
+    charvec <- gsub("([[:blank:]]+)$","",charvec)
+    return(charvec)
+}
+
+
+
+###################
+# Function readExt
+###################
+.readExt <- function(char){
+    temp <- as.character(char)
+    temp <- unlist(strsplit(char,"[.]"))
+    res <- temp[length(temp)]
+    return(res)
+}
+
+
+
+#####################
+# Function df2genind
+#####################
+df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL, missing=NA, ploidy=2){
+
+    if(is.data.frame(X)) X <- as.matrix(X)
+    if (!inherits(X, "matrix")) stop ("X is not a matrix")
+
+    res <- list()
+
+    ## make sure X is in character mode
+    mode(X) <- "character"
+    
+    n <- nrow(X)
+    nloc <- ncol(X)
+    ploidy <- as.integer(ploidy)
+    if(ploidy < as.integer(1)) stop("ploidy cannot be less than 1")
+
+    if(is.null(ind.names)) {ind.names <- rownames(X)}
+    if(is.null(loc.names)) {loc.names <- colnames(X)}
+
+    ## pop optionnelle
+    if(!is.null(pop)){
+      if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
+      pop <- as.factor(pop)
+    }
+
+    ## find or check the number of coding characters, 'ncode'
+    if(is.null(sep)){
+        if(!is.null(ncode)) {if(ncode <  max(nchar(X)) ) stop("some character strings exceed the provided ncode.")}
+        if(is.null(ncode)) { ncode <- max(nchar(X)) }
+        if((ncode %% ploidy)>0) stop(paste(ploidy,"alleles cannot be coded by a total of",
+                                           ncode,"characters", sep=" "))
+    }
+
+    ## ERASE ENTIRELY NON-TYPE LOCI AND INDIVIDUALS
+    tempX <- X
+    if(!is.null(sep)) tempX <- gsub(sep,"",X)
+    ## turn NANANA, 00000, ... into NA
+    tempX <- gsub("^0*$",NA,tempX)
+    tempX <- gsub("(NA)+",NA,tempX)
+
+    ## Erase entierely non-typed loci
+    temp <- apply(tempX,2,function(c) all(is.na(c)))
+    if(any(temp)){
+        X <- X[,!temp]
+        tempX <- tempX[,!temp]
+        loc.names <- loc.names[!temp]
+        nloc <- ncol(X)
+        warning("entirely non-type marker(s) deleted")
+    }
+
+    ## Erase entierely non-type individuals
+    temp <- apply(tempX,1,function(r) all(is.na(r)))
+    if(any(temp)){
+        X <- X[!temp,]
+        tempX <- tempX[!temp,]
+        pop <- pop[!temp]
+        ind.names <- ind.names[!temp]
+        n <- nrow(X)
+        warning("entirely non-type individual(s) deleted")
+    }
+
+    n <- nrow(X)
+    ## SET NAs IN X
+    X[is.na(tempX)] <- NA
+    
+    # ind.names <- rownames(X) this erases the real labels
+    # note: if X is kept as a matrix, duplicate row names are no problem
+ 
+
+    ## function to fill a matrix of char 'M' with the required
+    ## number of zero, targetN being the total number of char required
+    fillWithZero <- function(M, targetN){
+        naIdx <- is.na(M)
+        keepCheck <- any(nchar(M) < targetN)
+        while(keepCheck){
+            mat0 <- matrix("", ncol=ncol(M), nrow=nrow(M))
+            mat0[nchar(M) < targetN] <- "0"
+            M <-  matrix(paste(mat0, M, sep=""), nrow=nrow(mat0))
+            keepCheck <- any(nchar(M) < targetN)
+        }
+
+        ## restore NA (otherwise we're left with "NA")
+        M[naIdx] <- NA
+        return(M)
+    }
+
+    ## CHECK STRING LENGTH IF NO SEPARATOR PROVIDED
+    if(is.null(sep) | ploidy==as.integer(1)){
+        ##     ## now check all strings and make sure they all have 'ncode' characters
+        ##         ## NA are temporarily coded as "00", "000" or "000000" to fit the check
+        ##         keepCheck <- any(nchar(X) < ncode)
+        ##         missAll <- paste(rep("0",ncode/ploidy),collapse="")
+        ##         missTyp <- paste(rep("0",ncode),collapse="")
+        ##         X[is.na(X)] <- missTyp
+
+        ##         while(keepCheck){
+        ##             mat0 <- matrix("", ncol=ncol(X), nrow=nrow(X))
+        ##             mat0[nchar(X) < ncode] <- "0"
+        ##             X <-  matrix(paste(mat0, X, sep=""), nrow=nrow(mat0))
+        ##             keepCheck <- any(nchar(X) < ncode)
+        ##         }
+
+        X <- fillWithZero(X,targetN=ncode)
+
+        ## now split X by allele
+        splitX <- list()
+        for(i in 1:ploidy){
+            splitX[[i]] <- substr(X,1,ncode/ploidy)
+            X <- sub(paste("^.{",ncode/ploidy,"}",sep=""),"",X)
+        }
+
+    } # END CHECK STRING LENGTH WITHOUT SEP
+
+
+    ## CHECK STRING LENGTH WITH SEPARATOR PROVIDED
+    if(!is.null(sep)){
+        if(ploidy > 1){
+            temp <- t(as.matrix(as.data.frame(strsplit(X,sep))))
+            splitX <- list()
+            for(i in 1:ncol(temp)){
+                splitX[[i]] <- matrix(temp[,i], nrow=n)
+            } # each matrix of splitX contains typing for 1 allele
+        } else {
+            splitX <- list()
+            splitX[[1]] <- X
+        }
+
+        ## get the right ncode
+        temp <- unlist(splitX)
+        temp <- temp[!is.na(temp)]
+        ncode <- max(nchar(temp))*ploidy
+        splitX <- lapply(splitX, function(Y) fillWithZero(Y,targetN=ncode/ploidy))
+    } # END CHECK STRING LENGTH WITH SEP
+
+
+    ## AT THIS STAGE, splitX IS A LIST OF MATRICES,
+    ## EACH GIVING TYPING FOR AN ALLELE
+
+    ## fetch all possible alleles per locus
+    loc.all <- list()
+    for(i in 1:nloc){
+        temp <- unlist(lapply(splitX,function(e) e[,i]))
+        loc.all[[i]] <- sort(unique(temp[!is.na(temp)]))
+    }
+
+    names(loc.all) <- loc.names
+    ## loc.all is a list whose element are vectors of sorted possible alleles at a locus
+    temp <- lapply(1:nloc, function(i) matrix(0,nrow=n,ncol=length(loc.all[[i]]),
+       dimnames=list(NULL,loc.all[[i]])) )
+
+    names(temp) <- loc.names
+    # note: keep rownames as NULL in case of duplicates
+    ## temp is a list whose elements are one matrix (indiv x alleles) for each marker
+
+    ## now tables in 'temp' are filled up
+    findall <- function(cha,loc.all){
+        if(is.na(cha)) return(NULL)
+        return(which(cha==loc.all))
+    }
+
+    for(k in 1:ploidy){
+        for(i in 1:n){
+            for(j in 1:nloc){
+                allIdx <- findall(splitX[[k]][i,j],loc.all[[j]])
+                temp[[j]][i,allIdx] <- temp[[j]][i,allIdx] + 1
+                if(is.null(allIdx)) {temp[[j]][i,] <- NA}
+            }
+        }
+    }
+
+    ## beware: colnames are wrong when there is only one allele in a locus
+    ## right colnames are first generated
+    nall <- unlist(lapply(temp,ncol))
+    loc.rep <- rep(names(nall),nall)
+    col.lab <- paste(loc.rep,unlist(loc.all,use.names=FALSE),sep=".")
+
+    mat <- as.matrix(cbind.data.frame(temp))
+    mat <- mat/ploidy
+    colnames(mat) <- col.lab
+    rownames(mat) <- ind.names
+    
+    if(!is.na(missing)){
+      if(missing==0) {mat[is.na(mat)] <- 0}
+      if(toupper(missing)=="MEAN") {
+        moy <- apply(mat,2,function(c) mean(c,na.rm=TRUE))
+        for(j in 1:ncol(mat)) {mat[,j][is.na(mat[,j])] <- moy[j]}
+      }
+    }
+     
+    prevcall <- match.call()
+
+    res <- genind( tab=mat, pop=pop, prevcall=prevcall, ploidy=ploidy )
+    
+    return(res)
+} # end df2genind
+
+
+
+
+
+########################################
+# Function read.genetix
+# code based on previous ade4 functions
+########################################
+read.genetix <- function(file=NULL,missing=NA,quiet=FALSE) {
+    if(!quiet) cat("\n Converting data from GENETIX to a genind object... \n")
+
+      
+    ## read from file
+    if(!file.exists(file)) stop("Specified file does not exist.")
+    if(toupper(.readExt(file)) != "GTX") stop("File extension .gtx expected")
+      # retrieve first infos
+    nloc <- as.numeric(scan(file,nlines=1,what="character",quiet=TRUE)[1])
+    npop <- as.numeric(scan(file,nlines=1,skip=1,what="character",quiet=TRUE)[1])
+    txt <- scan(file,skip=2,what="character",sep="\n",quiet=TRUE)
+    txt <- gsub("\t"," ",txt)
+    loc.names <- txt[seq(1,by=2,length=nloc)]
+    txt <- txt[-(1:(nloc*2))]
+
+    ## retrieve populations infos
+    pop.names <- vector(mode="character",length=npop)
+    pop.nind <- vector(mode="integer",length=npop)
+    index <- 1
+    temp <- vector(mode="integer",length=npop)
+    for(i in 1:npop){
+        pop.names[i] <- txt[index]
+        pop.nind[i] <- as.numeric(txt[index+1])
+        temp[i] <- index
+        index <- index + pop.nind[i] + 2
+    }
+    pop.names <- .rmspaces(pop.names)
+      
+    ## retrieve genotypes infos
+    txt <- txt[-c(temp,temp+1)]
+    txt <- .rmspaces(txt)
+    txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
+    X <- t(txt)
+    if(ncol(X) == (nloc+1)){
+        rownames(X) <- X[,1]
+        X <- X[,-1]
+    } else{
+        rownames(X) <- 1:nrow(X)
+    }
+    
+    colnames(X) <- loc.names
+    
+    ## make a factor "pop" if there is more than one population
+    pop <- factor(rep(pop.names,pop.nind))
+    
+    ## pass X to df2genind
+    res <- df2genind(X=X, ncode=6, pop=pop, missing=missing, ploidy=2)
+    res at call <- match.call()
+    
+    if(!quiet) cat("\n...done.\n\n")
+    
+    return(res)
+} # end read.genetix
+
+
+
+##########################
+# Function read.fstat
+##########################
+read.fstat <- function(file,missing=NA,quiet=FALSE){
+  if(!file.exists(file)) stop("Specified file does not exist.")
+  if(toupper(.readExt(file)) != "DAT") stop("File extension .dat expected")
+
+  if(!quiet) cat("\n Converting data from a FSTAT .dat file to a genind object... \n\n")
+
+  call <- match.call()
+  txt <- scan(file,what="character",sep="\n",quiet=TRUE)
+  txt <- gsub("\t"," ",txt)
+
+  # read first infos
+  info <- unlist(strsplit(txt[1],"([[:space:]]+)"))
+  # npop <- as.numeric(info[1]) ## no longer used
+  nloc <- as.numeric(info[2]) 
+  
+  loc.names <- txt[2:(nloc+1)]
+
+  # build genotype matrix
+  txt <- txt[-(1:(nloc+1))]
+  txt <- .rmspaces(txt)
+  txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
+  X <- t(txt)
+  pop <- factor(X[,1])
+  if(length(levels(pop)) == 1 ) pop <- NULL
+  X <- X[,-1]
+    
+  colnames(X) <- loc.names
+  rownames(X) <- 1:nrow(X)
+
+  res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+  # beware : fstat files do not yield ind names
+  res at ind.names <- rep("",length(res at ind.names))
+  names(res at ind.names) <- rownames(res at tab)
+  res at call <- call
+  
+  if(!quiet) cat("\n...done.\n\n")
+
+  return(res)
+  
+} # end read.fstat
+
+
+
+
+
+##########################
+# Function read.genepop 
+##########################
+read.genepop <- function(file,missing=NA,quiet=FALSE){
+  if(!file.exists(file)) stop("Specified file does not exist.")
+  if(toupper(.readExt(file)) != "GEN") stop("File extension .gen expected")
+
+  if(!quiet) cat("\n Converting data from a Genepop .gen file to a genind object... \n\n")
+
+  prevcall <- match.call()
+  
+  txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+  if(!quiet) cat("\nFile description: ",txt[1], "\n")
+  txt <- txt[-1]
+  txt <- gsub("\t", " ", txt)
+
+  # two cases for locus names:
+  # 1) all on the same row, separated by ","
+  # 2) one per row
+  # ! spaces and tab allowed
+  # a bug was reported by S. Devillard, occuring
+  # when the two cases occur together,
+  # that is:
+  # loc1,
+  # loc2,
+  # ...
+
+  ### former version
+  #1
+  #if(length(grep(",",txt[1])) > 0){
+  #  loc.names <- unlist(strsplit(txt[1],","))
+  #  loc.names <- gsub("^([[:blank:]]*)([[:space:]]*)","",loc.names)
+  #  loc.names <- gsub("([[:blank:]]*)([[:space:]]*)$","",loc.names)
+  #  nloc <- length(loc.names)
+
+  #  txt <- txt[-1]
+  #} else { #2
+  #  nloc <- min(grep("POP",toupper(txt)))-1
+  #  loc.names <- txt[1:nloc]
+  #  loc.names <- gsub("^([[:blank:]]*)([[:space:]]*)","",loc.names)
+  #  loc.names <- gsub("([[:blank:]]*)([[:space:]]*)$","",loc.names)
+  
+  #  txt <- txt[-(1:nloc)]
+  #}
+
+  # new strategy (shorter): isolate the 'locus names' part and then parse it.
+  locinfo.idx <- 1:(min(grep("POP",toupper(txt)))-1)
+  locinfo <- txt[locinfo.idx]
+  locinfo <- paste(locinfo,collapse=",")
+  loc.names <- unlist(strsplit(locinfo,"([,]|[\n])+"))
+  loc.names <- .rmspaces(loc.names)
+  nloc <- length(loc.names)
+  txt <- txt[-locinfo.idx]
+  
+  # locus names have been retreived  
+
+  # build the pop factor
+  # and correct the genotypes splited on more than 1 line
+  pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+  npop <- length(pop.idx)
+  # correction for splited genotype
+  # isolated by the absence of comma on a line not containing "pop"
+  nocomma <- which(! (1:length(txt)) %in% grep(",",txt))
+  splited <- nocomma[which(! nocomma %in% pop.idx)]
+  if(length(splited)>0){
+    for(i in sort(splited,dec=TRUE)){
+      txt[i-1] <- paste(txt[i-1],txt[i],sep=" ")
+    }
+    txt <- txt[-splited]
+  }
+  # end correction
+
+  # reevaluate pop index
+  pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+  
+  txt[length(txt)+1] <- "POP"
+  nind.bypop <- diff(grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt)))-1
+  pop <- factor(rep(1:npop,nind.bypop))
+  
+  txt <- txt[-c(pop.idx,length(txt))]
+
+  temp <- sapply(1:length(txt),function(i) strsplit(txt[i],","))
+  # temp is a list with nind elements, first being ind. name and 2nd, genotype
+
+  ind.names <- sapply(temp,function(e) e[1])
+  ind.names <- .rmspaces(ind.names)
+  # individuals' name are now clean
+
+  vec.genot <- sapply(temp,function(e) e[2])
+  vec.genot <- .rmspaces(vec.genot)
+  
+  # X is a individual x locus genotypes matrix
+  X <- matrix(unlist(strsplit(vec.genot,"[[:space:]]+")),ncol=nloc,byrow=TRUE)
+ 
+  rownames(X) <- ind.names
+  colnames(X) <- loc.names
+
+ ##  # correct X to fulfill the genetix format
+##   f1 <- function(char){
+##     paste("00", substr(char,1,1), "00", substr(char,2,2), sep="")
+##   }
+
+##   f2 <- function(char){
+##     paste("0", substr(char,1,2), "0", substr(char,3,4), sep="")
+##   }
+
+##   if(all(nchar(X)==2)) {X <- apply(X,c(1,2),f1)}
+##   if(all(nchar(X)==4)) {X <- apply(X,c(1,2),f2)}
+
+  # give right pop names
+  # beware: genepop takes the name of the last individual of a sample as this sample's name
+  pop.names.idx <- cumsum(table(pop))
+  pop.names <- ind.names[pop.names.idx]
+  levels(pop) <- pop.names
+  
+  res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+  res at call <- prevcall
+  
+  if(!quiet) cat("\n...done.\n\n")
+
+  return(res)
+    
+} # end read.genepop
+
+
+
+
+
+############################
+# Function read.structure
+############################
+read.structure <- function(file, n.ind=NULL, n.loc=NULL,  onerowperind=FALSE, col.lab=NULL, col.pop=NULL, col.others=NULL, row.marknames=NULL, NA.char="-9", pop=NULL, missing=NA, ask=TRUE, quiet=FALSE){
+  
+  if(!file.exists(file)) stop("Specified file does not exist.")
+  if(!toupper(.readExt(file)) %in% c("STR","STRU")) stop("File extension .stru expected")
+
+  ## set defaults for non optional arguments without default values
+  if(!ask){
+      if(is.null(col.lab)) col.lab <- 0
+      if(is.null(col.pop)) col.pop <- 0
+      if(is.null(row.marknames)) row.marknames <- 0
+  }
+  
+  ## required questions
+  if(is.null(n.ind)){
+    cat("\n How many genotypes are there? ")
+    n.ind <- as.integer(readLines(n = 1))
+  }
+
+  if(is.null(n.loc)){
+    cat("\n How many markers are there? ")
+    n.loc <- as.integer(readLines(n = 1))
+  }
+
+  if(is.null(col.lab)){
+    cat("\n Which column contains labels for genotypes ('0' if absent)? ")
+    col.lab <- as.integer(readLines(n = 1))
+  }
+
+  if(is.null(col.pop)){
+    cat("\n Which column contains the population factor ('0' if absent)? ")
+    col.pop <- as.integer(readLines(n = 1))
+  }
+
+  if(is.null(col.others) & ask){
+    cat("\n Which other optional columns should be read (press 'return' when done)? ")
+    col.others <- scan(quiet=TRUE)
+    if(length(col.others) == 0)  col.others <- NULL
+  }
+
+  if(is.null(row.marknames)){
+    cat("\n Which row contains the marker names ('0' if absent)? ")
+    row.marknames <- as.integer(readLines(n = 1))
+  }  
+
+  if(is.null(onerowperind)){
+    cat("\n Use the option 'onerowperind' (y/n)? ")
+    onerowperind <- toupper(readLines(n = 1))
+    if(onerowperind == "Y") {
+      onerowperind <- TRUE
+    } else {
+      onerowperind <- FALSE
+    }
+  }
+  
+  if(is.null(NA.char)){
+    cat("\n What is the code for missing data (default is '-9')? ")
+    NA.char <- as.character(readLines(n = 1))
+  }
+
+  # message to console
+  if(!quiet) cat("\n Converting data from a STRUCTURE .stru file to a genind object... \n\n")
+
+  # read the file
+  txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+
+  # remove empty lines and spaces/tabs at the end of a line
+  temp <- grep("^[[:space:]]*$",txt)
+  if(length(temp) > 0) {
+    txt <- txt[-temp]
+  }
+
+  txt <- gsub("([[:blank:]]+)$","",txt)
+  
+  ## isolate each useful component of the file
+  # matrix of data
+  if(onerowperind) {
+    n <- n.ind
+    p <- 2*n.loc
+  } else{
+    n <- 2*n.ind
+    p <- n.loc
+  }
+
+  lastline <- length(txt)
+  mat <- txt[(lastline-n+1):lastline]
+  mat <- t(as.data.frame(strsplit(mat,"[[:blank:]]+")))
+  rownames(mat) <- 1:n
+  gen <- mat[, (ncol(mat)-p+1):ncol(mat)]
+  
+  
+  # markers names
+  if(row.marknames != 0) {
+    loc.names <- .rmspaces(txt[row.marknames])
+    loc.names <- unlist(strsplit(loc.names,"[[:blank:]]+"))
+  } else {
+    loc.names <- .genlab("L",n.loc)
+  }
+
+  # genotypes labels
+  if(col.lab !=0) {
+    ind.names <- mat[, col.lab]
+  } else {
+    ind.names <- .genlab("",n.ind)
+  }
+
+  # population factor
+  if(col.pop !=0) {
+     pop <- factor(mat[, col.pop])
+  } else {
+    pop <- NULL
+  }
+
+  # other variables
+  if(!is.null(col.others)){
+    X.other <- mat[col.others]
+  }
+  
+  ## transformations if onerowperind is FALSE
+  if(!onerowperind) {
+    temp <- seq(1,n,by=2)
+    ind.names <- ind.names[temp]
+    if(length(ind.names) < n.ind) warning("Duplicated identifier for genotypes")
+    pop <- pop[temp]
+    if(exists("X.other")) X.other <- X.other[temp]
+
+    ## make sur that all strings in gen have the same number of characters
+    ncode <- max(nchar(gen))
+    keepCheck <- any(nchar(gen) < ncode)
+    
+    while(keepCheck){
+        mat0 <- matrix("", ncol=ncol(gen), nrow=nrow(gen))
+        mat0[nchar(gen) < ncode] <- "0"
+        gen <-  matrix(paste(mat0, gen, sep=""), nrow=nrow(mat0))
+        keepCheck <- any(nchar(gen) < ncode)
+    }
+    
+    # reorder matrix of genotypes
+    X <- t(sapply(temp, function(i) paste(gen[i,],gen[i+1,],sep="") ))
+    
+  } else { # else of "if(!onerowperind)"
+      temp <- seq(1,p-1,by=2)
+      X <- paste(gen[,temp] , gen[,temp+1], sep="")
+      X <- matrix(X, nrow=n.ind)
+  }
+  
+  # replace missing values by NAs
+  X <- gsub(NA.char,NA,X)
+  rownames(X) <- ind.names
+  colnames(X) <- loc.names
+
+  res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+
+  res at call <- match.call()
+
+  if(exists("X.other")) {res at other <- list(X=X.other)}
+
+  return(res)
+  
+}
+
+
+
+
+#########################
+# Function import2genind
+#########################
+import2genind <- function(file,missing=NA,quiet=FALSE, ...){
+  if(!file.exists(file)) stop("Specified file does not exist.")
+  ext <- .readExt(file)
+  ext <- toupper(ext)
+  
+  if(ext == "GTX")
+    return(read.genetix(file,missing=missing,quiet=quiet))
+
+  if(ext == "DAT")
+    return(read.fstat(file,missing=missing,quiet=quiet))
+
+  if(ext == "GEN")
+    return(read.genepop(file,missing=missing,quiet=quiet))
+
+  if(ext %in% c("STR","STRU"))
+    return(read.structure(file,missing=missing,quiet=quiet, ...))
+  
+  # evaluated only if extension is not supported
+  cat("\n File format (",ext,") not supported.\n")
+  cat("\nSupported formats are:\nGENETIX (.gtx) \nFSTAT (.dat) \nGenepop (.gen)\n \nSTRUCTURE (.str)\n")
+       
+  return(invisible())    
+}
+
+



More information about the adegenet-commits mailing list