[adegenet-commits] r225 - in pkg: . R data man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Jan 24 19:43:22 CET 2009
Author: jombart
Date: 2009-01-24 19:43:22 +0100 (Sat, 24 Jan 2009)
New Revision: 225
Modified:
pkg/DESCRIPTION
pkg/R/basicMethods.R
pkg/R/classes.R
pkg/R/import.R
pkg/R/makefreq.R
pkg/R/scale.R
pkg/R/spca.R
pkg/data/microbov.RData
pkg/data/nancycats.RData
pkg/data/rupica.RData
pkg/data/spcaIllus.RData
pkg/man/as.genind.Rd
pkg/man/as.genpop.Rd
pkg/man/df2genind.Rd
pkg/man/genind.Rd
pkg/man/genpop.Rd
pkg/man/import.Rd
pkg/man/read.structure.Rd
Log:
Merge devel with current version
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/DESCRIPTION 2009-01-24 18:43:22 UTC (rev 225)
@@ -1,5 +1,5 @@
Package: adegenet
-Version: 1.2-2
+Version: 1.2-3
Date: 2008/07/30
Title: adegenet: a R package for the multivariate analysis of genetic markers.
Author: Thibaut Jombart <jombart at biomserv.univ-lyon1.fr>, with contributions from Peter Solymos
Modified: pkg/R/basicMethods.R
===================================================================
--- pkg/R/basicMethods.R 2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/basicMethods.R 2009-01-24 18:43:22 UTC (rev 225)
@@ -9,7 +9,7 @@
cat(" #####################")
cat("\n- genotypes of individuals - \n")
cat("\nS4 class: ", as.character(class(x)))
-
+
cat("\n at call: ")
print(x at call)
@@ -17,18 +17,19 @@
len <- 7
cat("\n at tab: ", nrow(x at tab), "x", ncol(x at tab), "matrix of genotypes\n" )
-
+
cat("\n at ind.names: vector of ", length(x at ind.names), "individual names")
cat("\n at loc.names: vector of ", length(x at loc.names), "locus names")
cat("\n at loc.nall: number of alleles per locus")
cat("\n at loc.fac: locus factor for the ", ncol(x at tab), "columns of @tab")
cat("\n at all.names: list of ", length(x at all.names), "components yielding allele names for each locus")
cat("\n at ploidy: ",x at ploidy)
-
+ cat("\n at type: ",x at type)
+
cat("\n\nOptionnal contents: ")
cat("\n at pop: ", ifelse(is.null(x at pop), "- empty -", "factor giving the population of each individual"))
cat("\n at pop.names: ", ifelse(is.null(x at pop.names), "- empty -", "factor giving the population of each individual"))
-
+
cat("\n\n at other: ")
if(!is.null(x at other)){
cat("a list containing: ")
@@ -36,9 +37,9 @@
} else {
cat("- empty -\n")
}
-
+
cat("\n")
-}
+}
) # end show method for genind
@@ -55,20 +56,22 @@
cat(" #####################")
cat("\n- Alleles counts for populations - \n")
cat("\nS4 class: ", as.character(class(x)))
-
+
cat("\n at call: ")
print(x at call)
p <- ncol(x at tab)
cat("\n at tab: ", nrow(x at tab), "x", ncol(x at tab), "matrix of alleles counts\n" )
-
+
cat("\n at pop.names: vector of ", length(x at pop.names), "population names")
cat("\n at loc.names: vector of ", length(x at loc.names), "locus names")
cat("\n at loc.nall: number of alleles per locus")
cat("\n at loc.fac: locus factor for the ", ncol(x at tab), "columns of @tab")
cat("\n at all.names: list of ", length(x at all.names), "components yielding allele names for each locus")
-
+ cat("\n at ploidy: ",x at ploidy)
+ cat("\n at type: ",x at type)
+
cat("\n\n at other: ")
if(!is.null(x at other)){
cat("a list containing: ")
@@ -76,10 +79,10 @@
} else {
cat("- empty -\n")
}
-
+
cat("\n")
-
-}
+
+}
) # end show method for genpop
@@ -123,16 +126,16 @@
H <- mean(H,na.rm=TRUE)
return(H)
}
-
+
res$Hobs <- unlist(lapply(temp,f1))
-
+
## auxiliary function to compute expected heterozygosity
## freq is a vector of frequencies
f2 <- function(freq){
H <- 1-sum(freq*freq,na.rm=TRUE)
return(H)
}
-
+
temp <- genind2genpop(x,pop=rep(1,nrow(x at tab)),quiet=TRUE)
temp <- makefreq(temp,quiet=TRUE)$tab
temp.names <- colnames(temp)
@@ -140,13 +143,13 @@
names(temp) <- temp.names
temp <- split(temp,x at loc.fac)
## temp is a list of alleles frequencies (one element per locus)
-
- res$Hexp <- unlist(lapply(temp,f2))
+
+ res$Hexp <- unlist(lapply(temp,f2))
} else { # no possible heterozygosity for haploid genotypes
res$Hobs <- 0
res$Xexp <- 0
}
-
+
## print to screen
listlab <- c("# Total number of genotypes: ",
"# Population sample sizes: ",
@@ -160,7 +163,7 @@
cat("\n",listlab[i],"\n")
print(res[[i]])
}
-
+
return(invisible(res))
}) # end summary.genind
@@ -174,7 +177,7 @@
setMethod ("summary", "genpop", function(object, ...){
x <- object
if(!inherits(x,"genpop")) stop("To be used with a genpop object")
-
+
res <- list()
res$npop <- nrow(x at tab)
@@ -190,7 +193,7 @@
w <- w/sum(w)
return(sum(x*w))
}
-
+
w <- apply(x at tab,1,sum,na.rm=TRUE) # weights for populations
res$NA.perc <- 100*(1-mean.w(propTyped(x), w=w))
## res$NA.perc <- 100*(1-mean(propTyped(x,by="both"))) <- old
@@ -205,10 +208,10 @@
cat("\n",listlab[i],"\n")
print(res[[i]])
}
-
+
return(invisible(res))
-}
+}
)# end summary.genpop
Modified: pkg/R/classes.R
===================================================================
--- pkg/R/classes.R 2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/classes.R 2009-01-24 18:43:22 UTC (rev 225)
@@ -6,7 +6,7 @@
########################################################################
###############################
-# Two classes of R object are
+# Two classes of R object are
# defined :
# gen - common part to genind and genpop
# genind - genotypes of individuals
@@ -71,7 +71,7 @@
setClassUnion("factorOrNULL", c("factor","NULL"))
setClassUnion("charOrNULL", c("character","NULL"))
setClassUnion("callOrNULL", c("call","NULL"))
-setClassUnion("intOrNum", c("integer","numeric"))
+setClassUnion("intOrNum", c("integer","numeric","NULL"))
@@ -84,19 +84,23 @@
p <- ncol(object at tab)
k <- length(unique(object at loc.names))
- if(length(object at loc.fac) != p) {
- cat("\ninvalid length for loc.fac\n")
- return(FALSE)
- }
+ if(!is.null(object at loc.fac)){
+ if(length(object at loc.fac) != p) {
+ cat("\ninvalid length for loc.fac\n")
+ return(FALSE)
+ }
- if(length(levels(object at loc.fac)) != k) {
- cat("\ninvalid number of levels in loc.fac\n")
- return(FALSE)
+ if(length(levels(object at loc.fac)) != k) {
+ cat("\ninvalid number of levels in loc.fac\n")
+ return(FALSE)
+ }
}
- if(length(object at loc.nall) != k) {
- cat("\ninvalid length in loc.nall\n")
- return(FALSE)
+ if(!is.null(object at loc.nall)){
+ if(length(object at loc.nall) != k) {
+ cat("\ninvalid length in loc.nall\n")
+ return(FALSE)
+ }
}
temp <- table(object at loc.names[object at loc.names!=""])
@@ -105,9 +109,11 @@
print(temp[temp>1])
}
- if(length(unlist(object at all.names)) != p) {
- cat("\ninvalid length in all.names\n")
- return(FALSE)
+ if(!is.null(object at all.names)){
+ if(length(unlist(object at all.names)) != p) {
+ cat("\ninvalid length in all.names\n")
+ return(FALSE)
+ }
}
return(TRUE)
@@ -117,9 +123,9 @@
setClass("gen", representation(tab = "matrix",
loc.names = "character",
- loc.fac = "factor",
+ loc.fac = "factorOrNULL",
loc.nall = "intOrNum",
- all.names = "list",
+ all.names = "listOrNULL",
call = "callOrNULL",
"VIRTUAL"),
prototype(tab=matrix(ncol=0,nrow=0), loc.nall=integer(0), call=NULL))
@@ -137,8 +143,9 @@
pop = "factorOrNULL",
pop.names = "charOrNULL",
ploidy = "integer",
+ type = "character",
other = "listOrNULL", "VIRTUAL"),
- prototype(pop=NULL, pop.names = NULL, ploidy = as.integer(2), other = NULL))
+ prototype(pop=NULL, pop.names = NULL, type = "codom", ploidy = as.integer(2), other = NULL))
@@ -160,7 +167,7 @@
warning("\nduplicate names in ind.names:\n")
print(temp[temp>1])
}
-
+
if(!is.null(object at pop)){ # check pop
if(length(object at pop) != nrow(object at tab)) {
@@ -191,7 +198,14 @@
cat("\nploidy inferior to 1\n")
return(FALSE)
}
-
+
+ ## check type of marker
+ if(!object at type %in% c("codom","PA") ){
+ cat("\nunknowon type of marker\n")
+ return(FALSE)
+ }
+
+
return(TRUE)
} #end .genind.valid
@@ -203,8 +217,9 @@
########################
# virtual class popInfo
########################
-setClass("popInfo", representation(pop.names = "character", other = "listOrNULL", "VIRTUAL"),
- prototype(other = NULL))
+setClass("popInfo", representation(pop.names = "character", ploidy = "integer",
+ type = "character", other = "listOrNULL", "VIRTUAL"),
+ prototype(type = "codom", ploidy = as.integer(2), other = NULL))
@@ -263,53 +278,64 @@
# Function genind
##################
## constructor of a genind object
-genind <- function(tab,pop=NULL,prevcall=NULL,ploidy=2){
+genind <- function(tab,pop=NULL,prevcall=NULL,ploidy=2,type=c("codom","PA")){
X <- as.matrix(tab)
if(is.null(colnames(X))) stop("tab columns have no name.")
if(is.null(rownames(X))) {rownames(X) <- 1:nrow(X)}
-
-
+
+ type <- match.arg(type)
+
# labels for individuals
nind <- nrow(X)
ind.names <- .rmspaces(rownames(X))
ind.codes <- .genlab("", nind)
names(ind.names) <- ind.codes
-
+
# labels for loci
# and loc.nall
- temp <- colnames(X)
- temp <- gsub("[.].*$","",temp)
- temp <- .rmspaces(temp)
- # beware !!! Function 'table' gives ordred output.
- loc.names <- unique(temp)
- loc.nall <- table(temp)[match(loc.names,names(table(temp)))]
- loc.nall <- as.integer(loc.nall)
+ if(type=="codom"){
+ temp <- colnames(X)
+ temp <- gsub("[.].*$","",temp)
+ temp <- .rmspaces(temp)
+ ## beware !!! Function 'table' gives ordred output.
+ loc.names <- unique(temp)
+ loc.nall <- table(temp)[match(loc.names,names(table(temp)))]
+ loc.nall <- as.integer(loc.nall)
- nloc <- length(loc.names)
- loc.codes <- .genlab("L",nloc)
+ nloc <- length(loc.names)
+ loc.codes <- .genlab("L",nloc)
- names(loc.names) <- loc.codes
+ names(loc.names) <- loc.codes
- names(loc.nall) <- loc.codes
+ names(loc.nall) <- loc.codes
- # loc.fac
- loc.fac <- rep(loc.codes,loc.nall)
+ ## loc.fac
+ loc.fac <- rep(loc.codes,loc.nall)
- # alleles name
- temp <- colnames(X)
- temp <- gsub("^.*[.]","",temp)
- temp <- .rmspaces(temp)
- all.names <- split(temp,loc.fac)
- all.codes <- lapply(all.names,function(e) .genlab("",length(e)))
- for(i in 1:length(all.names)){
- names(all.names[[i]]) <- all.codes[[i]]
+ ## alleles name
+ temp <- colnames(X)
+ temp <- gsub("^.*[.]","",temp)
+ temp <- .rmspaces(temp)
+ all.names <- split(temp,loc.fac)
+ all.codes <- lapply(all.names,function(e) .genlab("",length(e)))
+ for(i in 1:length(all.names)){
+ names(all.names[[i]]) <- all.codes[[i]]
+ }
+
+ rownames(X) <- ind.codes
+ colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
+ loc.fac <- as.factor(loc.fac)
+ } else { # end if type=="codom"
+ nloc <- ncol(X)
+ loc.codes <- .genlab("N", nloc)
+ colnames(X) <- loc.codes
+ loc.names <-colnames(X)
+ names(loc.names) <- loc.codes
+ loc.fac <- NULL
+ all.names <- NULL
+ loc.nall <- NULL
}
-
- rownames(X) <- ind.codes
- colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
- loc.fac <- as.factor(loc.fac)
-
# This was used in S3 version
#
#res <- list( tab=X, ind.names=ind.names, loc.names=loc.names,
@@ -323,7 +349,7 @@
res at loc.nall <- loc.nall
res at loc.fac <- loc.fac
res at all.names <- all.names
-
+
# populations name (optional)
# beware, keep levels of pop sorted in
# there order of appearance
@@ -348,11 +374,14 @@
if(plo < as.integer(1)) stop("ploidy inferior to 1")
res at ploidy <- plo
+ ## type of marker
+ res at type <- as.character(type)
+
if(is.null(prevcall)) {prevcall <- match.call()}
res at call <- prevcall
-
+
return(res)
-
+
} # end genind
######################
@@ -365,18 +394,18 @@
##################
# Function genpop
##################
-genpop <- function(tab,prevcall=NULL){
+genpop <- function(tab,prevcall=NULL,ploidy=as.integer(2),type=c("codom","PA")){
X <- as.matrix(tab)
if(is.null(colnames(X))) stop("tab columns have no name.")
if(is.null(rownames(X))) {rownames(X) <- 1:nrow(X)}
-
+
# labels for populations
npop <- nrow(X)
pop.names <- .rmspaces(rownames(X))
pop.codes <- .genlab("P", npop)
names(pop.names) <- pop.codes
-
+
# labels for loci
# and loc.nall
temp <- colnames(X)
@@ -406,7 +435,7 @@
for(i in 1:length(all.names)){
names(all.names[[i]]) <- all.codes[[i]]
}
-
+
rownames(X) <- pop.codes
colnames(X) <- paste(loc.fac,unlist(all.codes),sep=".")
loc.fac <- as.factor(loc.fac)
@@ -424,12 +453,14 @@
res at loc.nall <- loc.nall
res at loc.fac <- loc.fac
res at all.names <- all.names
-
+ res at ploidy <- ploidy
+ res at type <- as.character(type)
+
if(is.null(prevcall)) {prevcall <- match.call()}
res at call <- prevcall
-
+
return(res)
-
+
} # end genpop
Modified: pkg/R/import.R
===================================================================
--- pkg/R/import.R 2009-01-23 13:46:53 UTC (rev 224)
+++ pkg/R/import.R 2009-01-24 18:43:22 UTC (rev 225)
@@ -1,16 +1,17 @@
+###################################################################
+## Fonctions designed to import files from other softwares
+## into genind objects
+##
+## currently supported formats are :
+## .gtx (GENETIX)
+## .dat (Fstat)
+## .gen (Genepop)
+## .stru (STRUCTURE)
+##
+## Thibaut Jombart, avril 2006
+## jombart at biomserv.univ-lyon1.fr
+##
##################################################################
-# Fonctions designed to import files from other softwares
-# into genind objects
-#
-# currently supported formats are :
-# .gtx (GENETIX)
-# .dat (Fstat)
-# .gen (Genepop)
-#
-# Thibaut Jombart, avril 2006
-# jombart at biomserv.univ-lyon1.fr
-#
-##################################################################
#######################
# Function rmspaces
@@ -39,13 +40,61 @@
#####################
# Function df2genind
#####################
-df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL, missing=NA, ploidy=2){
+df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL, pop=NULL, missing=NA, ploidy=2, type=c("codom","PA")){
if(is.data.frame(X)) X <- as.matrix(X)
if (!inherits(X, "matrix")) stop ("X is not a matrix")
res <- list()
+ type <- match.arg(type)
+ ## type PA
+ if(toupper(type)=="PA"){
+ warning("experimental mode for presence/absence")
+ mode(X) <- "numeric"
+
+ ## pop optionnelle
+ if(!is.null(pop)){
+ if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
+ pop <- as.factor(pop)
+ }
+
+ if(!is.null(ind.names)) rownames(X) <- ind.names
+ if(!is.null(loc.names)) colnames(X) <- loc.names
+
+ ## handle entirely non-typed loci and individuals
+ X <- gsub("^0*$",NA,X)
+ X <- gsub("(NA)+",NA,X)
+
+ ## Erase entierely non-typed loci
+ temp <- apply(X,2,function(c) all(is.na(c)))
+ if(any(temp)){
+ X <- X[,!temp]
+ warning("entirely non-type marker(s) deleted")
+ }
+
+ ## Erase entierely non-type individuals
+ temp <- apply(X,1,function(r) all(is.na(r)))
+ if(any(temp)){
+ X <- X[!temp,]
+ pop <- pop[!temp]
+ warning("entirely non-type individual(s) deleted")
+ }
+
+ ## erase non-polymorphic loci
+ temp <- apply(X, 2, function(loc) length(unique(loc[!is.na(loc)]))==1)
+ if(any(temp)){
+ X <- X[,!temp]
+ warning("non-polymorphic marker(s) deleted")
+ }
+
+ prevcall <- match.call()
+
+ res <- genind( tab=X, pop=pop, prevcall=prevcall, ploidy=ploidy, type="PA")
+
+ return(res)
+ } # end type PA
+
## make sure X is in character mode
mode(X) <- "character"
@@ -229,7 +278,7 @@
prevcall <- match.call()
- res <- genind( tab=mat, pop=pop, prevcall=prevcall, ploidy=ploidy )
+ res <- genind( tab=mat, pop=pop, prevcall=prevcall, ploidy=ploidy, type=type)
return(res)
} # end df2genind
@@ -247,7 +296,8 @@
## read from file
- if(!file.exists(file)) stop("Specified file does not exist.")
+ ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+
if(toupper(.readExt(file)) != "GTX") stop("File extension .gtx expected")
## retrieve first infos
nloc <- as.integer(scan(file,nlines=1,what="character",quiet=TRUE)[1])
@@ -313,43 +363,43 @@
# Function read.fstat
##########################
read.fstat <- function(file,missing=NA,quiet=FALSE){
- if(!file.exists(file)) stop("Specified file does not exist.")
- if(toupper(.readExt(file)) != "DAT") stop("File extension .dat expected")
+ ##if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+ if(toupper(.readExt(file)) != "DAT") stop("File extension .dat expected")
- if(!quiet) cat("\n Converting data from a FSTAT .dat file to a genind object... \n\n")
+ if(!quiet) cat("\n Converting data from a FSTAT .dat file to a genind object... \n\n")
- call <- match.call()
- txt <- scan(file,what="character",sep="\n",quiet=TRUE)
- txt <- gsub("\t"," ",txt)
+ call <- match.call()
+ txt <- scan(file,what="character",sep="\n",quiet=TRUE)
+ txt <- gsub("\t"," ",txt)
- # read first infos
- info <- unlist(strsplit(txt[1],"([[:space:]]+)"))
- # npop <- as.numeric(info[1]) ## no longer used
- nloc <- as.numeric(info[2])
+ # read first infos
+ info <- unlist(strsplit(txt[1],"([[:space:]]+)"))
+ # npop <- as.numeric(info[1]) ## no longer used
+ nloc <- as.numeric(info[2])
- loc.names <- txt[2:(nloc+1)]
+ loc.names <- txt[2:(nloc+1)]
- # build genotype matrix
- txt <- txt[-(1:(nloc+1))]
- txt <- .rmspaces(txt)
- txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
- X <- t(txt)
- pop <- factor(X[,1])
- if(length(levels(pop)) == 1 ) pop <- NULL
- X <- X[,-1]
+ # build genotype matrix
+ txt <- txt[-(1:(nloc+1))]
+ txt <- .rmspaces(txt)
+ txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
+ X <- t(txt)
+ pop <- factor(X[,1])
+ if(length(levels(pop)) == 1 ) pop <- NULL
+ X <- X[,-1]
- colnames(X) <- loc.names
- rownames(X) <- 1:nrow(X)
+ colnames(X) <- loc.names
+ rownames(X) <- 1:nrow(X)
- res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
- # beware : fstat files do not yield ind names
- res at ind.names <- rep("",length(res at ind.names))
- names(res at ind.names) <- rownames(res at tab)
- res at call <- call
+ res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+ # beware : fstat files do not yield ind names
+ res at ind.names <- rep("",length(res at ind.names))
+ names(res at ind.names) <- rownames(res at tab)
+ res at call <- call
- if(!quiet) cat("\n...done.\n\n")
+ if(!quiet) cat("\n...done.\n\n")
- return(res)
+ return(res)
} # end read.fstat
@@ -361,17 +411,17 @@
# Function read.genepop
##########################
read.genepop <- function(file,missing=NA,quiet=FALSE){
- if(!file.exists(file)) stop("Specified file does not exist.")
- if(toupper(.readExt(file)) != "GEN") stop("File extension .gen expected")
+ ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+ if(toupper(.readExt(file)) != "GEN") stop("File extension .gen expected")
- if(!quiet) cat("\n Converting data from a Genepop .gen file to a genind object... \n\n")
+ if(!quiet) cat("\n Converting data from a Genepop .gen file to a genind object... \n\n")
- prevcall <- match.call()
+ prevcall <- match.call()
- txt <- scan(file,sep="\n",what="character",quiet=TRUE)
- if(!quiet) cat("\nFile description: ",txt[1], "\n")
- txt <- txt[-1]
- txt <- gsub("\t", " ", txt)
+ txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+ if(!quiet) cat("\nFile description: ",txt[1], "\n")
+ txt <- txt[-1]
+ txt <- gsub("\t", " ", txt)
# two cases for locus names:
# 1) all on the same row, separated by ","
@@ -402,57 +452,57 @@
# txt <- txt[-(1:nloc)]
#}
- # new strategy (shorter): isolate the 'locus names' part and then parse it.
- locinfo.idx <- 1:(min(grep("POP",toupper(txt)))-1)
- locinfo <- txt[locinfo.idx]
- locinfo <- paste(locinfo,collapse=",")
- loc.names <- unlist(strsplit(locinfo,"([,]|[\n])+"))
- loc.names <- .rmspaces(loc.names)
- nloc <- length(loc.names)
- txt <- txt[-locinfo.idx]
+ ## new strategy (shorter): isolate the 'locus names' part and then parse it.
+ locinfo.idx <- 1:(min(grep("POP",toupper(txt)))-1)
+ locinfo <- txt[locinfo.idx]
+ locinfo <- paste(locinfo,collapse=",")
+ loc.names <- unlist(strsplit(locinfo,"([,]|[\n])+"))
+ loc.names <- .rmspaces(loc.names)
+ nloc <- length(loc.names)
+ txt <- txt[-locinfo.idx]
- # locus names have been retreived
+ ## locus names have been retreived
- # build the pop factor
- # and correct the genotypes splited on more than 1 line
- pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
- npop <- length(pop.idx)
- # correction for splited genotype
- # isolated by the absence of comma on a line not containing "pop"
- nocomma <- which(! (1:length(txt)) %in% grep(",",txt))
- splited <- nocomma[which(! nocomma %in% pop.idx)]
- if(length(splited)>0){
- for(i in sort(splited,dec=TRUE)){
- txt[i-1] <- paste(txt[i-1],txt[i],sep=" ")
+ ## build the pop factor
+ ## and correct the genotypes splited on more than 1 line
+ pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+ npop <- length(pop.idx)
+ ## correction for splited genotype
+ ## isolated by the absence of comma on a line not containing "pop"
+ nocomma <- which(! (1:length(txt)) %in% grep(",",txt))
+ splited <- nocomma[which(! nocomma %in% pop.idx)]
+ if(length(splited)>0){
+ for(i in sort(splited,dec=TRUE)){
+ txt[i-1] <- paste(txt[i-1],txt[i],sep=" ")
+ }
+ txt <- txt[-splited]
}
- txt <- txt[-splited]
- }
- # end correction
+ ## end correction
- # reevaluate pop index
- pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
+ ## reevaluate pop index
+ pop.idx <- grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt))
- txt[length(txt)+1] <- "POP"
- nind.bypop <- diff(grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt)))-1
- pop <- factor(rep(1:npop,nind.bypop))
+ txt[length(txt)+1] <- "POP"
+ nind.bypop <- diff(grep("^([[:space:]]*)POP([[:space:]]*)$",toupper(txt)))-1
+ pop <- factor(rep(1:npop,nind.bypop))
- txt <- txt[-c(pop.idx,length(txt))]
+ txt <- txt[-c(pop.idx,length(txt))]
- temp <- sapply(1:length(txt),function(i) strsplit(txt[i],","))
- # temp is a list with nind elements, first being ind. name and 2nd, genotype
+ temp <- sapply(1:length(txt),function(i) strsplit(txt[i],","))
+ ## temp is a list with nind elements, first being ind. name and 2nd, genotype
- ind.names <- sapply(temp,function(e) e[1])
- ind.names <- .rmspaces(ind.names)
- # individuals' name are now clean
+ ind.names <- sapply(temp,function(e) e[1])
+ ind.names <- .rmspaces(ind.names)
+ ## individuals' name are now clean
- vec.genot <- sapply(temp,function(e) e[2])
- vec.genot <- .rmspaces(vec.genot)
+ vec.genot <- sapply(temp,function(e) e[2])
+ vec.genot <- .rmspaces(vec.genot)
- # X is a individual x locus genotypes matrix
- X <- matrix(unlist(strsplit(vec.genot,"[[:space:]]+")),ncol=nloc,byrow=TRUE)
+ ## X is a individual x locus genotypes matrix
+ X <- matrix(unlist(strsplit(vec.genot,"[[:space:]]+")),ncol=nloc,byrow=TRUE)
- rownames(X) <- ind.names
- colnames(X) <- loc.names
+ rownames(X) <- ind.names
+ colnames(X) <- loc.names
## # correct X to fulfill the genetix format
## f1 <- function(char){
@@ -466,18 +516,18 @@
## if(all(nchar(X)==2)) {X <- apply(X,c(1,2),f1)}
## if(all(nchar(X)==4)) {X <- apply(X,c(1,2),f2)}
- # give right pop names
- # beware: genepop takes the name of the last individual of a sample as this sample's name
- pop.names.idx <- cumsum(table(pop))
- pop.names <- ind.names[pop.names.idx]
- levels(pop) <- pop.names
+ ## give right pop names
+ ## beware: genepop takes the name of the last individual of a sample as this sample's name
+ pop.names.idx <- cumsum(table(pop))
+ pop.names <- ind.names[pop.names.idx]
+ levels(pop) <- pop.names
- res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
- res at call <- prevcall
+ res <- df2genind(X=X,pop=pop,missing=missing, ploidy=2)
+ res at call <- prevcall
- if(!quiet) cat("\n...done.\n\n")
+ if(!quiet) cat("\n...done.\n\n")
- return(res)
+ return(res)
} # end read.genepop
@@ -490,162 +540,162 @@
############################
read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL, col.lab=NULL, col.pop=NULL, col.others=NULL, row.marknames=NULL, NA.char="-9", pop=NULL, missing=NA, ask=TRUE, quiet=FALSE){
- if(!file.exists(file)) stop("Specified file does not exist.")
- if(!toupper(.readExt(file)) %in% c("STR","STRU")) stop("File extension .stru expected")
+ ## if(!file.exists(file)) stop("Specified file does not exist.") <- not needed
+ if(!toupper(.readExt(file)) %in% c("STR","STRU")) stop("File extension .stru expected")
- ## set defaults for non optional arguments without default values
- if(!ask){
- if(is.null(col.lab)) col.lab <- as.integer(0)
- if(is.null(col.pop)) col.pop <- as.integer(0)
- if(is.null(row.marknames)) row.marknames <- as.integer(0)
- }
+ ## set defaults for non optional arguments without default values
+ if(!ask){
+ if(is.null(col.lab)) col.lab <- as.integer(0)
+ if(is.null(col.pop)) col.pop <- as.integer(0)
+ if(is.null(row.marknames)) row.marknames <- as.integer(0)
+ }
- ## required questions
- if(is.null(n.ind)){
- cat("\n How many genotypes are there? ")
- n.ind <- as.integer(readLines(n = 1))
- }
+ ## required questions
+ if(is.null(n.ind)){
+ cat("\n How many genotypes are there? ")
+ n.ind <- as.integer(readLines(n = 1))
+ }
- if(is.null(n.loc)){
- cat("\n How many markers are there? ")
- n.loc <- as.integer(readLines(n = 1))
- }
+ if(is.null(n.loc)){
+ cat("\n How many markers are there? ")
+ n.loc <- as.integer(readLines(n = 1))
+ }
- if(is.null(col.lab)){
- cat("\n Which column contains labels for genotypes ('0' if absent)? ")
- col.lab <- as.integer(readLines(n = 1))
- }
+ if(is.null(col.lab)){
+ cat("\n Which column contains labels for genotypes ('0' if absent)? ")
+ col.lab <- as.integer(readLines(n = 1))
+ }
- if(is.null(col.pop)){
- cat("\n Which column contains the population factor ('0' if absent)? ")
- col.pop <- as.integer(readLines(n = 1))
- }
+ if(is.null(col.pop)){
+ cat("\n Which column contains the population factor ('0' if absent)? ")
+ col.pop <- as.integer(readLines(n = 1))
+ }
- if(is.null(col.others) & ask){
- cat("\n Which other optional columns should be read (press 'return' when done)? ")
- col.others <- scan(quiet=TRUE)
- if(length(col.others) == 0) col.others <- NULL
- }
+ if(is.null(col.others) & ask){
+ cat("\n Which other optional columns should be read (press 'return' when done)? ")
+ col.others <- scan(quiet=TRUE)
+ if(length(col.others) == 0) col.others <- NULL
+ }
- if(is.null(row.marknames)){
- cat("\n Which row contains the marker names ('0' if absent)? ")
- row.marknames <- as.integer(readLines(n = 1))
- }
+ if(is.null(row.marknames)){
+ cat("\n Which row contains the marker names ('0' if absent)? ")
+ row.marknames <- as.integer(readLines(n = 1))
+ }
- if(is.null(onerowperind)){
- cat("\n Are genotypes coded by a single row (y/n)? ")
- onerowperind <- toupper(readLines(n = 1))
- if(onerowperind == "Y") {
- onerowperind <- TRUE
- } else {
- onerowperind <- FALSE
+ if(is.null(onerowperind)){
+ cat("\n Are genotypes coded by a single row (y/n)? ")
+ onerowperind <- toupper(readLines(n = 1))
+ if(onerowperind == "Y") {
+ onerowperind <- TRUE
+ } else {
+ onerowperind <- FALSE
+ }
}
- }
- if(is.null(NA.char)){
- cat("\n What is the code for missing data (default is '-9')? ")
- NA.char <- as.character(readLines(n = 1))
- }
+ if(is.null(NA.char)){
+ cat("\n What is the code for missing data (default is '-9')? ")
+ NA.char <- as.character(readLines(n = 1))
+ }
- # message to console
- if(!quiet) cat("\n Converting data from a STRUCTURE .stru file to a genind object... \n\n")
+ ## message to console
+ if(!quiet) cat("\n Converting data from a STRUCTURE .stru file to a genind object... \n\n")
- # read the file
- txt <- scan(file,sep="\n",what="character",quiet=TRUE)
+ ## read the file
+ txt <- scan(file,sep="\n",what="character",quiet=TRUE)
- # remove empty lines and spaces/tabs at the end of a line
- temp <- grep("^[[:space:]]*$",txt)
- if(length(temp) > 0) {
- txt <- txt[-temp]
- }
+ ## remove empty lines and spaces/tabs at the end of a line
+ temp <- grep("^[[:space:]]*$",txt)
+ if(length(temp) > 0) {
+ txt <- txt[-temp]
+ }
- txt <- gsub("([[:blank:]]+)$","",txt)
+ txt <- gsub("([[:blank:]]+)$","",txt)
- ## isolate each useful component of the file
- # matrix of data
- if(onerowperind) {
- n <- n.ind
- p <- 2*n.loc
- } else{
- n <- 2*n.ind
- p <- n.loc
- }
+ ## isolate each useful component of the file
+ ## matrix of data
+ if(onerowperind) {
+ n <- n.ind
+ p <- 2*n.loc
+ } else{
+ n <- 2*n.ind
+ p <- n.loc
+ }
- lastline <- length(txt)
- mat <- txt[(lastline-n+1):lastline]
- mat <- t(as.data.frame(strsplit(mat,"[[:blank:]]+")))
- rownames(mat) <- 1:n
- gen <- mat[, (ncol(mat)-p+1):ncol(mat)]
+ lastline <- length(txt)
+ mat <- txt[(lastline-n+1):lastline]
+ mat <- t(as.data.frame(strsplit(mat,"[[:blank:]]+")))
+ rownames(mat) <- 1:n
+ gen <- mat[, (ncol(mat)-p+1):ncol(mat)]
- # markers names
- if(row.marknames != 0) {
- loc.names <- .rmspaces(txt[row.marknames])
- loc.names <- unlist(strsplit(loc.names,"[[:blank:]]+"))
- } else {
- loc.names <- .genlab("L",n.loc)
- }
+ ## markers names
+ if(row.marknames != 0) {
+ loc.names <- .rmspaces(txt[row.marknames])
+ loc.names <- unlist(strsplit(loc.names,"[[:blank:]]+"))
+ } else {
+ loc.names <- .genlab("L",n.loc)
+ }
- # genotypes labels
- if(col.lab !=0) {
- ind.names <- mat[, col.lab]
- } else {
- ind.names <- .genlab("",n.ind)
- }
+ ## genotypes labels
+ if(col.lab !=0) {
+ ind.names <- mat[, col.lab]
+ } else {
+ ind.names <- .genlab("",n.ind)
+ }
- # population factor
- if(col.pop !=0) {
- pop <- factor(mat[, col.pop])
- } else {
- pop <- NULL
- }
+ ## population factor
+ if(col.pop !=0) {
+ pop <- factor(mat[, col.pop])
+ } else {
+ pop <- NULL
+ }
- # other variables
- if(!is.null(col.others)){
- X.other <- mat[, col.others]
- }
+ ## other variables
+ if(!is.null(col.others)){
+ X.other <- mat[,col.others]
+ }
- ## transformations if onerowperind is FALSE
- if(!onerowperind) {
- temp <- seq(1,n,by=2)
- ind.names <- ind.names[temp]
- if(length(ind.names) < n.ind) warning("Duplicated identifier for genotypes")
- pop <- pop[temp]
- if(exists("X.other")) X.other <- X.other[temp]
+ ## transformations if onerowperind is FALSE
+ if(!onerowperind) {
+ temp <- seq(1,n,by=2)
+ ind.names <- ind.names[temp]
+ if(length(ind.names) < n.ind) warning("Duplicated identifier for genotypes")
+ pop <- pop[temp]
+ if(exists("X.other")) X.other <- X.other[temp]
- ## make sur that all strings in gen have the same number of characters
- ncode <- max(nchar(gen))
- keepCheck <- any(nchar(gen) < ncode)
-
- while(keepCheck){
- mat0 <- matrix("", ncol=ncol(gen), nrow=nrow(gen))
- mat0[nchar(gen) < ncode] <- "0"
- gen <- matrix(paste(mat0, gen, sep=""), nrow=nrow(mat0))
+ ## make sur that all strings in gen have the same number of characters
+ ncode <- max(nchar(gen))
keepCheck <- any(nchar(gen) < ncode)
- }
- # reorder matrix of genotypes
- X <- t(sapply(temp, function(i) paste(gen[i,],gen[i+1,],sep="") ))
+ while(keepCheck){
+ mat0 <- matrix("", ncol=ncol(gen), nrow=nrow(gen))
+ mat0[nchar(gen) < ncode] <- "0"
+ gen <- matrix(paste(mat0, gen, sep=""), nrow=nrow(mat0))
+ keepCheck <- any(nchar(gen) < ncode)
+ }
- } else { # else of "if(!onerowperind)"
- temp <- seq(1,p-1,by=2)
- X <- paste(gen[,temp] , gen[,temp+1], sep="")
- X <- matrix(X, nrow=n.ind)
- }
+ ## reorder matrix of genotypes
+ X <- t(sapply(temp, function(i) paste(gen[i,],gen[i+1,],sep="") ))
- # replace missing values by NAs
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/adegenet -r 225
More information about the adegenet-commits
mailing list