[Genabel-commits] r810 - in pkg/GenABEL: . R inst/unitTests man src/GAlib
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Dec 5 13:02:08 CET 2011
Author: yurii
Date: 2011-12-05 13:02:08 +0100 (Mon, 05 Dec 2011)
New Revision: 810
Added:
pkg/GenABEL/R/zUtil.R
pkg/GenABEL/inst/unitTests/runit.merge.R
pkg/GenABEL/inst/unitTests/runit.qtscore.R
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/DESCRIPTION
pkg/GenABEL/R/estlambda.R
pkg/GenABEL/R/export.plink.R
pkg/GenABEL/R/ibs.R
pkg/GenABEL/R/polygenic_hglm.R
pkg/GenABEL/R/qtscore.R
pkg/GenABEL/R/zzz.R
pkg/GenABEL/generate_documentation.R
pkg/GenABEL/inst/unitTests/runit.exports.R
pkg/GenABEL/man/estlambda.Rd
pkg/GenABEL/man/export.plink.Rd
pkg/GenABEL/man/load.gwaa.data.Rd
pkg/GenABEL/man/polygenic.Rd
pkg/GenABEL/man/srdta.Rd
pkg/GenABEL/src/GAlib/export_plink.cpp
pkg/GenABEL/src/GAlib/export_plink.h
pkg/GenABEL/src/GAlib/gwaa.c
Log:
accumulated changes + regression test for the bug [#1641]
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/CHANGES.LOG 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,12 +1,23 @@
-*** v. 1.7-0 (2011.09.16)
+*** v. 1.7-0 (2011.11.15)
+Added 'KS' method to estimate lambda in 'estlambda'. Generates rather
+good results (under null) and may be considered as default option in the future,
+after testing under the alternative.
+
+Account for situation when HGLM fails to converge (s.e.'s set to NA in
+h2$h2an$se)
+
+added option 'transposed' to 'export.plink' to export TPED files
+
speeding-up 'mmscore' by x1.8 through more efficient vector-matrix
product computations
export.merlin, export.plink now runs much faster (exports on C++ code)
-Deal with exceptional situation in merge.snp.data / monomorphic part;
-added case of no overlap in SNPs (skip monomorphic staff then)
+Bug [#1641] reported by Karl Froner fixed. Now GenABEL
+deals with exceptional situation in merge.snp.data / monomorphic part;
+added case of no overlap in SNPs (skip monomorphic staff then). RUnit
+regression test runit.merge.R/test.merge.bug1641 added.
Modifications in 'estlambda': plot=FALSE by default, added option
to estimate Lambda with median method; added option 'filter' for
Modified: pkg/GenABEL/DESCRIPTION
===================================================================
--- pkg/GenABEL/DESCRIPTION 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/DESCRIPTION 2011-12-05 12:02:08 UTC (rev 810)
@@ -2,7 +2,7 @@
Type: Package
Title: genome-wide SNP association analysis
Version: 1.7-0
-Date: 2011-09-16
+Date: 2011-12-05
Author: GenABEL developers
Maintainer: GenABEL developers <genabel.project at gmail.com>
Depends: R (>= 2.10.0), methods, MASS
Modified: pkg/GenABEL/R/estlambda.R
===================================================================
--- pkg/GenABEL/R/estlambda.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/estlambda.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,45 @@
+#' Estimate the inflation factor for a distribution of P-values
+#'
+#' Estimate the inflation factor for a distribution of P-values or 1df
+#' chi-square test. The major use of this procedure is the Genomic Control, but
+#' can also be used to visualise the distribution of P-values coming from other
+#' tests. Methods implemented include 'median' (median(chi2)/0.455...),
+#' regression (of observed onto expected) and 'KS' (optimizing the
+#' chi2.1df distribution fit by use of Kolmogorov-Smirnov test)
+#'
+#'
+#' @param data A vector of reals. If all are <=1, it is assumed that this is a
+#' vector of P-values, else it is treated as a vector of chi-squares with 1
+#' d.f.
+#' @param plot Wether the prot should be presented
+#' @param proportion The proportion of lowest P (Chi2) to be used when
+#' estimating the inflation factor Lambda
+#' @param method "regression", "median", or "KS" method to be used for
+#' Lambda estimation
+#' @param filter if the test statistics with 0-value of chi-square should be
+#' excluded prior to estimation of Lambda
+#' @param ... arguments passed to \code{\link{plot}} function
+#' @return A list with elements \item{estimate}{Estimate of Lambda}
+#' \item{se}{Standard error of the estimate}
+#' @author Yurii Aulchenko
+#' @seealso \code{\link{ccfast}}, \code{\link{qtscore}}
+#' @keywords htest
+#' @examples
+#'
+#' data(srdta)
+#' pex <- summary(gtdata(srdta))[,"Pexact"]
+#' estlambda(pex, plot=TRUE)
+#' estlambda(pex, method="regression", proportion = 0.95)
+#' estlambda(pex, method="median")
+#' estlambda(pex, method="KS")
+#' a <- qtscore(bt,srdta)
+#' lambda(a)
+#'
"estlambda" <-
- function(data,plot = FALSE, proportion=1.0, method="regression", filter=TRUE,...) {
+ function(data,plot = FALSE, proportion=1.0, method="regression", filter=TRUE, ... ) {
data <- data[which(!is.na(data))]
- if (proportion>1.0 || proportion<=0) stop("proportion argument should be greater then zero and less than or equal to one")
+ if (proportion>1.0 || proportion<=0)
+ stop("proportion argument should be greater then zero and less than or equal to one")
ntp <- round(proportion*length(data))
if (ntp<1) stop("no valid measurments")
if (ntp==1) {
@@ -29,13 +67,7 @@
ppoi <- ppoi[1:ntp]
# s <- summary(lm(data~offset(ppoi)))$coeff
# bug fix thanks to Franz Quehenberger
- if (plot) {
- lim <- c(0,max(data,ppoi,na.rm=T))
-# plot(ppoi,data,xlim=lim,ylim=lim,xlab="Expected",ylab="Observed", ...)
- plot(ppoi,data,xlab="Expected",ylab="Observed", ...)
- abline(a=0,b=1)
- abline(a=0,b=(s[1,1]),col="red")
- }
+
out <- list()
if (method=="regression") {
s <- summary(lm(data~0+ppoi))$coeff
@@ -44,8 +76,23 @@
} else if (method=="median") {
out$estimate <- median(data,na.rm=TRUE)/qchisq(0.5,1)
out$se <- NA
+ } else if (method=="KS") {
+ limits <- c(0.5,100)
+ out$estimate <- estLambdaKS(data,limits=limits)
+ if ( abs(out$estimate-limits[1])<1e-4 || abs(out$estimate-limits[2])<1e-4 )
+ warning("using method='KS' lambda too close to limits, use other method")
+ out$se <- NA
} else {
stop("'method' should be either 'regression' or 'median'!")
}
+
+ if (plot) {
+ lim <- c(0,max(data,ppoi,na.rm=T))
+# plot(ppoi,data,xlim=lim,ylim=lim,xlab="Expected",ylab="Observed", ...)
+ plot(ppoi,data,xlab="Expected",ylab="Observed", ...)
+ abline(a=0,b=1)
+ abline(a=0,b=out$estimate,col="red")
+ }
+
out
}
Modified: pkg/GenABEL/R/export.plink.R
===================================================================
--- pkg/GenABEL/R/export.plink.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/export.plink.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -23,7 +23,9 @@
#' @keywords IO
#'
-"export.plink" <- function(data, filebasename, phenotypes= "all", ...) {
+"export.plink" <- function(data, filebasename="plink", phenotypes= "all",
+ transpose=FALSE, export012na=FALSE, ...)
+{
if (!is.null(phenotypes)) {
phef <- paste(filebasename,".phe",sep="")
@@ -36,10 +38,26 @@
write.table(phed,file=phef,row.names=FALSE,col.names=TRUE,quote=FALSE,sep=" ")
}
- pedf <- paste(filebasename,".ped",sep="")
- mapf <- paste(filebasename,".map",sep="")
+ if (!transpose) {
+ pedf <- paste(filebasename,".ped",sep="")
+ mapf <- paste(filebasename,".map",sep="")
+
+ export.merlin(data,pedfile=pedf,datafile=NULL,
+ mapfile=mapf,format="plink", ... )
+ } else {
+ # export TFAM
+ sx <- male(data)
+ sx[sx==0] <- 2
+ tfam <- data.frame(FID=c(1:nids(data)),IID=idnames(data),father=0,mother=0,
+ sex=sx,trait=-9,stringsAsFactors=FALSE)
+ write.table(tfam,file=paste(filebasename,".tfam",sep=""),row.names=FALSE,
+ col.names=FALSE,quote=FALSE)
+ # export genotypic data
+ pedfilename <- paste(filebasename,".tped",sep="")
+ tmp <- .Call("export_plink_tped",as.character(snpnames(data)), as.character(chromosome(data)),
+ as.double(map(data)),as.raw(gtdata(data)@gtps), as.integer(nsnps(data)),
+ as.integer(nids(data)), as.character(coding(data)), as.character(pedfilename),
+ as.logical(export012na))
+ }
- export.merlin(data,pedfile=pedf,datafile=NULL,
- mapfile=mapf,format="plink", ... )
-
}
\ No newline at end of file
Modified: pkg/GenABEL/R/ibs.R
===================================================================
--- pkg/GenABEL/R/ibs.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/ibs.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,7 @@
"ibs" <-
-function (data,snpsubset,idsubset=NULL,cross.idsubset=NULL,weight="no",snpfreq=NULL) {
+ function (data,snpsubset,idsubset=NULL,cross.idsubset=NULL,weight="no",snpfreq=NULL) {
# idsubset, cross.idsubset: should be real names, not indexes!
- if (is(data,"gwaa.data")) data <- data at gtdata
+ if (is(data,"gwaa.data")) data <- gtdata(data)
if (!is(data,"snp.data")) stop("The data argument must be of snp.data-class or gwaa.data-class")
if (!missing(snpsubset)) data <- data[,snpsubset]
if (is.null(idsubset) && !is.null(cross.idsubset)) stop("cross.idsubset arg cannot be used (idsubset missing)",immediate. = TRUE)
@@ -12,10 +12,27 @@
} else {
snpfreq <- summary(data)[,"Q.2"]
}
- if (weight=="no") {
+
+ wargs <- c("no","freq","homo")
+ if (!(match(weight,wargs,nomatch=0)>0)) {
+ out <- paste("weight argument should be one of",wargs,"\n")
+ stop(out)
+ }
+# if (npairs > 2500000) stop("Too many pairs to evaluate... Stopped")
+ if (weight == "no") {
homodiag <- hom(data)[,"Hom"]
+ option = 0
+ } else if (weight == "freq") {
+ homodiag <- 0.5+(hom(data,snpfreq=snpfreq)[,"F"]/2)
+ option = 1
+ } else if (weight == "homo") {
+ smr <- summary(data)
+ if (any(smr[,"P.12"] != 0 )) warning("'weight=homo' used, but data contain heterozygotes")
+ rm(smr);gc();
+ homodiag <- rep(1.0,nids(data))
+ option = 2
} else {
- homodiag <- 0.5+(hom(data,snpfreq=snpfreq)[,"F"]/2)
+ stop("'weight' argument value not recognised")
}
varidiag <- hom(data)[,"Var"]
ibs.C.option <- 0
@@ -49,16 +66,19 @@
gc()
idset1.num <- which(data at idnames %in% idset1)
idset2.num <- which(data at idnames %in% idset2)
- wargs <- c("no","freq")
- if (!(match(weight,wargs,nomatch=0)>0)) {
- out <- paste("weight argument should be one of",wargs,"\n")
- stop(out)
- }
-# if (npairs > 2500000) stop("Too many pairs to evaluate... Stopped")
- if (weight == "no") option = 0
- if (weight == "freq") option = 1
+
if (ibs.C.option==1) {
- sout <- .C("ibspar", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), as.integer(length(idset1.num)), as.integer(idset1.num-1), as.integer(length(idset2.num)), as.integer(idset2.num-1), as.double(snpfreq), as.integer(option), sout = double(2*length(idset1.num)*length(idset2.num)), PACKAGE="GenABEL")$sout
+ if (option<=1) {
+ sout <- .C("ibspar", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps),
+ as.integer(length(idset1.num)), as.integer(idset1.num-1),
+ as.integer(length(idset2.num)), as.integer(idset2.num-1), as.double(snpfreq),
+ as.integer(option), sout = double(2*length(idset1.num)*length(idset2.num)),
+ PACKAGE="GenABEL")$sout
+ } else if (option==2) {
+ stop("option 2 only availabel in optimized version")
+ } else {
+ stop("unknown 'weight' option")
+ }
out <- list()
out$ibs <- sout[1:(length(idset1.num)*length(idset2.num))]
out$num <- sout[(length(idset1.num)*length(idset2.num)+1):(length(idset1.num)*length(idset2.num)*2)]
@@ -69,7 +89,8 @@
rownames(out$num) <- idset1
colnames(out$num) <- idset2
} else if (ibs.C.option==0) {
- out <- .C("ibsnew", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), as.double(snpfreq), as.integer(option), sout = double(data at nids*data at nids), PACKAGE="GenABEL")$sout
+ out <- .C("ibsnew", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps),
+ as.double(snpfreq), as.integer(option), sout = double(data at nids*data at nids), PACKAGE="GenABEL")$sout
dim(out) <- c(length(idset2.num),length(idset1.num))
out <- t(out)
diag(out) <- homodiag
Modified: pkg/GenABEL/R/polygenic_hglm.R
===================================================================
--- pkg/GenABEL/R/polygenic_hglm.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/polygenic_hglm.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -127,9 +127,17 @@
relmat <- relmat*2.0
s <- svd(relmat)
L <- s$u %*% diag(sqrt(s$d))
+ #res_hglm <- try( hglm(y = y, X = desmat, Z = L, family = family, conv = conv, maxit = maxit, ... ) )
res_hglm <- hglm(y = y, X = desmat, Z = L, family = family, conv = conv, maxit = maxit, ... )
#sum_res_hglm <- summary(res_hglm)
+ #if (is(try(tmp <- res_hglm$fixef),"try-error")) {
+ # warning("HGLM failed")
+ # return(res_hglm)
+ #}
+ #if (length(names(res_hglm$fixef))<1)
+ # names(res_hglm$fixef) <- paste("fe",1:length(res_hglm$fixef),sep="")
+
out <- list()
out$measuredIDs <- mids
@@ -140,7 +148,12 @@
out$h2an$estimate <- c(res_hglm$fixef,out$esth2, tVar)
names(out$h2an$estimate)[(length(out$h2an$estimate) - 1):(length(out$h2an$estimate))] <-
c("h2","totalVar")
- out$h2an$se <- c(res_hglm$SeFe, NA, NA)
+ if (is.null(res_hglm$SeFe)) {
+ warning("Convergence failure HGLM!")
+ out$h2an$se <- rep(NA,length(res_hglm$fixef)+2)
+ } else {
+ out$h2an$se <- c(res_hglm$SeFe, NA, NA)
+ }
names(out$h2an$se) <-
c(names(res_hglm$fixef), "h2","totalVar")
out$pgresidualY <- rep(NA, length(mids))
Modified: pkg/GenABEL/R/qtscore.R
===================================================================
--- pkg/GenABEL/R/qtscore.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/qtscore.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -147,7 +147,7 @@
stop("formula argument should be a formula or a numeric vector")
}
if (!missing(data)) detach(data at phdata)
- if (length(strata)!=data at gtdata@nids) stop("Strata variable and the data do not match in length")
+ if (length(strata)!=nids(data)) stop("Strata variable and the data do not match in length")
if (any(is.na(strata))) stop("Strata variable contains NAs")
if (any(strata!=0)) {
olev <- levels(as.factor(strata))
@@ -169,17 +169,58 @@
# if (trait.type=="binomial" & !is(formula,"formula")) bin<-1 else bin <- 0
if (trait.type=="binomial") bin<-1 else bin<-0
- lenn <- data at gtdata@nsnps;
+ lenn <- nsnps(data);
###out <- list()
if (times>1) {pb <- txtProgressBar(style = 3)}
for (j in c(1:(times+1*(times>1)))) {
if (j>1) resid <- sample(resid,replace=FALSE)
- chi2 <- .C("qtscore_glob",as.raw(data at gtdata@gtps),as.double(resid),as.integer(bin),as.integer(data at gtdata@nids),as.integer(data at gtdata@nsnps), as.integer(nstra), as.integer(strata), chi2 = double(10*data at gtdata@nsnps), PACKAGE="GenABEL")$chi2
- if (any(data at gtdata@chromosome=="X")) {
- ogX <- data at gtdata[,data at gtdata@chromosome=="X"]
- sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
- chi2X <- .C("qtscore_glob",as.raw(ogX at gtps),as.double(resid),as.integer(bin),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(10*ogX at nsnps), PACKAGE="GenABEL")$chi2
- revec <- (data at gtdata@chromosome=="X")
+# if (old) {
+ chi2 <- .C("qtscore_glob",as.raw(data at gtdata@gtps),as.double(resid),as.integer(bin),
+ as.integer(data at gtdata@nids),as.integer(data at gtdata@nsnps), as.integer(nstra),
+ as.integer(strata), chi2 = double(10*data at gtdata@nsnps), PACKAGE="GenABEL")$chi2
+# } else {
+# gtNrow <- dim(data)[1]
+# gtNcol <- dim(data)[2]
+# chi2 <- .Call("iteratorGA",
+# data at gtdata@gtps,
+# as.integer(gtNrow), as.integer(gtNcol),
+# as.character("qtscore_glob"), # Function
+# as.character("R"), # Output type
+# as.integer(2), # MAR
+# as.integer(1), # Steps
+# as.integer(5), # nr of extra arguments
+# as.double(resid),
+# as.integer(bin),
+# as.integer(data at gtdata@nids),
+# as.integer(nstra),
+# as.integer(strata),
+# package="GenABEL")
+# }
+ if (any(chromosome(data)=="X")) {
+ ogX <- gtdata(data[,chromosome(data)=="X"])
+ sxstra <- strata; sxstra[male(ogX)==1] <- strata[male(ogX)==1]+nstra
+# if (old) {
+ chi2X <- .C("qtscore_glob",as.raw(ogX at gtps),as.double(resid),as.integer(bin),
+ as.integer(nids(ogX)),as.integer(nsnps(ogX)), as.integer(nstra*2),
+ as.integer(sxstra), chi2 = double(10*nsnps(ogX)), PACKAGE="GenABEL")$chi2
+# } else {
+# chi2X <- .Call("iteratorGA",
+# ogX at gtps,
+# as.integer(ogX at nsnps), # nCol
+# as.integer(ogX at nids), # nRow
+# as.character("qtscore_glob"), # Function
+# as.character("R"), # Output type
+# as.integer(1), # MAR
+# as.integer(1), # Steps
+# as.integer(5), # nr of arguments
+# as.double(resid),
+# as.integer(bin),
+# as.integer(nids),
+# as.integer(nstra*2),
+# as.integer(sxstra),
+# package="GenABEL")
+# }
+ revec <- (chromosome(data)=="X")
revec <- rep(revec,6)
chi2 <- replace(chi2,revec,chi2X)
rm(ogX,chi2X,revec);gc(verbose=FALSE)
Added: pkg/GenABEL/R/zUtil.R
===================================================================
--- pkg/GenABEL/R/zUtil.R (rev 0)
+++ pkg/GenABEL/R/zUtil.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,10 @@
+# utility functions invisible to user
+lossFunctionLambdaKS <- function(lambda,chi2values, ... ) {
+ ksT <- ks.test(chi2values/lambda, ... )$stat
+ return( ksT )
+}
+estLambdaKS <- function(chi2values,limits=c(0.5,100)) {
+ iniLambda <- 1
+ optRes <- optimize(lossFunctionLambdaKS, interval=limits, chi2values=chi2values, "pchisq", 1)
+ return(optRes$minimum)
+}
Property changes on: pkg/GenABEL/R/zUtil.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/zzz.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,6 +1,6 @@
.onLoad <- function(lib, pkg) {
GenABEL.version <- "1.7-0"
- cat("GenABEL v.",GenABEL.version,"(September 16, 2011) loaded\n")
+ cat("GenABEL v.",GenABEL.version,"(December 05, 2011) loaded\n")
# check for updates and news
address <- c(
Modified: pkg/GenABEL/generate_documentation.R
===================================================================
--- pkg/GenABEL/generate_documentation.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/generate_documentation.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -4,6 +4,7 @@
"arrange_probabel_phe.R",
"blurGenotype.R",
"del.phdata.R",
+ "estlambda.R",
"export.plink.R",
"extract.annotation.impute.R",
"extract.annotation.mach.R",
Modified: pkg/GenABEL/inst/unitTests/runit.exports.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.exports.R 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/inst/unitTests/runit.exports.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -25,7 +25,8 @@
data(ge03d2.clean)
nTestIds <- sample(c(10:min(100,nids(ge03d2.clean))),1)
nTestSnps <- sample(c(10:min(1000,nsnps(ge03d2.clean))),1)
- dta <- ge03d2.clean[sample(1:nids(ge03d2.clean),nTestIds),sample(1:nsnps(ge03d2.clean),nTestSnps)]
+ dta <- ge03d2.clean[sort(sample(1:nids(ge03d2.clean),nTestIds)),
+ sort(sample(1:nsnps(ge03d2.clean),nTestSnps))]
export.plink(dta,filebasename="tmpOld",dpieceFun="old")
export.plink(dta,filebasename="tmpNew",dpieceFun="new")
@@ -60,6 +61,22 @@
checkIdentical(xN,xO)
unlink("tmpOld*")
- unlink("tmpNew*")
-
+ unlink("tmpNew*")
+
+ export.plink(dta,filebasename="tmpTrans",transpose=TRUE)
+ convert.snp.tped(tpedfile="tmpTrans.tped",tfamfile="tmpTrans.tfam",out="tmpTrans.raw")
+ xBack <- load.gwaa.data(gen="tmpTrans.raw",phe="tmpTrans.phe",id="IID")
+ strand(xBack) <- strand(dta)
+ phdata(xBack) <- phdata(xBack)[,-1]
+ sameCode <- which(coding(dta) == coding(xBack))
+ checkIdentical(xBack[,sameCode],dta[,sameCode])
+ swappedCode <- which(refallele(dta)==effallele(xBack) & effallele(dta)==refallele(xBack))
+ for (i in swappedCode) {
+ gtDta <- c(2,1,0)[as.vector(as.numeric(dta[,i]))+1]
+ gtBack <- as.vector(as.numeric(xBack[,i]))
+ print(checkIdentical(gtBack,gtDta))
+ }
+
+ unlink("tmpTrans*")
+
}
Added: pkg/GenABEL/inst/unitTests/runit.merge.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.merge.R (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.merge.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,31 @@
+### --- Test setup ---
+#
+# regression test
+#
+
+if(FALSE) {
+ ## Not really needed, but can be handy when writing tests
+ library(RUnit)
+ library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.merge.bug1641 <- function()
+{
+ library(GenABEL)
+ print( packageVersion("GenABEL") )
+ data(srdta)
+ x1 <- srdta[1:4,1]
+ x2 <- srdta[5:10, 2]
+ xy <- merge(x1, x2, intersected_snps_only=FALSE)
+}
\ No newline at end of file
Property changes on: pkg/GenABEL/inst/unitTests/runit.merge.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: pkg/GenABEL/inst/unitTests/runit.qtscore.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.qtscore.R (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.qtscore.R 2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,4 @@
+#library(GenABEL)
+#data(ge03d2.clean)
+#qtsOld <- qtscore(height~sex,data=ge03d2.clean,old=TRUE)
+#qtsNew <- qtscore(height~sex,data=ge03d2.clean,old=FALSE)
Property changes on: pkg/GenABEL/inst/unitTests/runit.qtscore.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/GenABEL/man/estlambda.Rd
===================================================================
--- pkg/GenABEL/man/estlambda.Rd 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/estlambda.Rd 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,47 +1,36 @@
\name{estlambda}
\alias{estlambda}
-\title{Estimate the inflation factor for a distribution of P-values}
-\description{
-Estimate the inflation factor for a distribution of P-values or 1df chi-square test.
-The major use of this procedure is the Genomic Control, but can also be used to
-visualise the distribution of P-values coming from other tests.
-}
-\usage{
-estlambda(data, plot = FALSE, proportion = 1.0, method="regression", filter=TRUE, ... )
-}
-\arguments{
- \item{data}{A vector of reals. If all are <=1, it is assumed that this
- is a vector of P-values, else it is treated as a vector
- of chi-squares with 1 d.f.}
- \item{plot}{Wether the prot should be presented}
- \item{proportion}{The proportion of lowest P (Chi2) to be used when
- estimating the inflation factor Lambda}
- \item{method}{Either "regression" or "median", the method
- to be used for Lambda estimation}
- \item{filter}{if the test statistics with 0-value of chi-square
- should be excluded prior to estimation of Lambda}
- \item{...}{arguments passed to \code{\link{plot}} function}
-}
-%\details{
-%}
-\value{
- A list with elements
- \item{estimate}{Estimate of Lambda}
- \item{se}{Standard error of the estimate}
-}
-%\references{}
+\title{Estimate the inflation factor for a distribution of P-values...}
+\usage{estlambda(data, plot=FALSE, proportion=1, method="regression",
+ filter=TRUE, ...)}
+\description{Estimate the inflation factor for a distribution of P-values}
+\details{Estimate the inflation factor for a distribution of P-values or 1df
+chi-square test. The major use of this procedure is the Genomic Control, but
+can also be used to visualise the distribution of P-values coming from other
+tests. Methods implemented include 'median' (median(chi2)/0.455...),
+regression (of observed onto expected) and 'KS' (optimizing the
+chi2.1df distribution fit by use of Kolmogorov-Smirnov test)}
+\value{A list with elements \item{estimate}{Estimate of Lambda}
+\item{se}{Standard error of the estimate}}
\author{Yurii Aulchenko}
-%\note{
-%}
-\seealso{
-\code{\link{ccfast}},
-\code{\link{qtscore}}
-}
-\examples{
-data(srdta)
-pex <- summary(srdta at gtdata)[,"Pexact"]
-estlambda(pex)
-a <- ccfast("bt",srdta)
-lambda(a)
-}
-\keyword{htest}% at least one, from doc/KEYWORDS
+\seealso{\code{\link{ccfast}}, \code{\link{qtscore}}}
+\keyword{htest}
+\arguments{\item{data}{A vector of reals. If all are <=1, it is assumed that this is a
+vector of P-values, else it is treated as a vector of chi-squares with 1
+d.f.}
+\item{plot}{Wether the prot should be presented}
+\item{proportion}{The proportion of lowest P (Chi2) to be used when
+estimating the inflation factor Lambda}
+\item{method}{"regression", "median", or "KS" method to be used for
+Lambda estimation}
+\item{filter}{if the test statistics with 0-value of chi-square should be
+excluded prior to estimation of Lambda}
+\item{...}{arguments passed to \code{\link{plot}} function}}
+\examples{data(srdta)
+pex <- summary(gtdata(srdta))[,"Pexact"]
+estlambda(pex, plot=TRUE)
+estlambda(pex, method="regression", proportion = 0.95)
+estlambda(pex, method="median")
+estlambda(pex, method="KS")
+a <- qtscore(bt,srdta)
+lambda(a)}
Modified: pkg/GenABEL/man/export.plink.Rd
===================================================================
--- pkg/GenABEL/man/export.plink.Rd 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/export.plink.Rd 2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,8 @@
\name{export.plink}
\alias{export.plink}
\title{Export GenABEL data in PLINK format...}
-\usage{export.plink(data, filebasename, phenotypes="all", ...)}
+\usage{export.plink(data, filebasename="plink", phenotypes="all",
+ transpose=FALSE, export012na=FALSE, ...)}
\description{Export GenABEL data in PLINK format}
\details{Export GenABEL data in PLINK format. This function is
a simple wrapper to \code{\link{export.merlin}} function
Modified: pkg/GenABEL/man/load.gwaa.data.Rd
===================================================================
--- pkg/GenABEL/man/load.gwaa.data.Rd 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/load.gwaa.data.Rd 2011-12-05 12:02:08 UTC (rev 810)
@@ -9,7 +9,7 @@
force = TRUE, makemap = FALSE, sort = TRUE, id = "id")
}
\arguments{
- \item{phenofile}{data table with pehnotypes}
+ \item{phenofile}{data table with phenotypes}
\item{genofile}{internally formatted genotypic data file (see \code{\link{convert.snp.text}} to
convert data)}
\item{force}{Force loading the data if heterozygous X-chromosome genotypes are found in male}
Modified: pkg/GenABEL/man/polygenic.Rd
===================================================================
--- pkg/GenABEL/man/polygenic.Rd 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/polygenic.Rd 2011-12-05 12:02:08 UTC (rev 810)
@@ -125,7 +125,7 @@
from previous regression, these are expected to change little from the
initial estimate. The default value of 1000 proved to work rather well under a
range of conditions.}
-\item{quiet}{If FALSE (default), details of optimisation process are reported.}
+\item{quiet}{If FALSE (default), details of optimisation process are reported}
\item{steptol}{steptal parameter of "nlm"}
\item{gradtol}{gradtol parameter of "nlm"}
\item{optimbou}{fixed effects boundary scale parameter for 'optim'}
Modified: pkg/GenABEL/man/srdta.Rd
===================================================================
--- pkg/GenABEL/man/srdta.Rd 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/srdta.Rd 2011-12-05 12:02:08 UTC (rev 810)
@@ -11,7 +11,7 @@
analysis. Run demo(srdta) and check tut-srdta.pdf
to see examples of work with this data set.
Original data files used for this set are located at
- YOUR\_R\_LIB\_LOCATION/exdata/srphenos.dat (pehnotypes),
+ YOUR\_R\_LIB\_LOCATION/exdata/srphenos.dat (phenotypes),
srgenos.dat (human-readable genotypes) and srgenos.raw
(genotypes in internal format)}
\usage{data(srdta)}
Modified: pkg/GenABEL/src/GAlib/export_plink.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.cpp 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/export_plink.cpp 2011-12-05 12:02:08 UTC (rev 810)
@@ -107,7 +107,99 @@
return R_NilValue;
}
+SEXP export_plink_tped(SEXP Snpnames, SEXP Chromosomes, SEXP Map,
+ SEXP Snpdata, SEXP Nsnps, SEXP Nids, SEXP Coding, SEXP Pedfilename,
+ SEXP ExportNumeric)
+{
+ std::vector<std::string> snpName;
+ for(unsigned int i=0;i<((unsigned int) length(Snpnames));i++)
+ snpName.push_back(CHAR(STRING_ELT(Snpnames,i)));
+ std::vector<std::string> coding;
+ for(unsigned int i=0;i<((unsigned int) length(Coding));i++)
+ coding.push_back(CHAR(STRING_ELT(Coding,i)));
+
+ std::vector<std::string> chromosome;
+ for(unsigned int i=0;i<((unsigned int) length(Chromosomes));i++)
+ chromosome.push_back(CHAR(STRING_ELT(Chromosomes,i)));
+
+ std::vector<double> position;
+ for(unsigned int i=0;i<((unsigned int) length(Map));i++)
+ position.push_back(REAL(Map)[i]);
+
+ //Rprintf("0\n");
+ unsigned int nsnps = INTEGER(Nsnps)[0];
+ int nids = INTEGER(Nids)[0];
+ bool exportNumeric = LOGICAL(ExportNumeric)[0];
+ std::string filename = CHAR(STRING_ELT(Pedfilename,0));
+ std::ofstream fileWoA;
+ int gtint[nids];
+ int ieq1 = 1;
+ char * snpdata = (char *) RAW(Snpdata);
+ //Rprintf("nsnps=%d\n",nsnps);
+ //Rprintf("nids=%d\n",nids);
+
+ //Rprintf("1\n");
+ std::string* Genotype;
+ std::string sep=" ";
+ int nbytes;
+
+ //Rprintf("nsnps=%d\n",nsnps);
+ //Rprintf("nids=%d\n",nids);
+
+ if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+ fileWoA.open(filename.c_str(),std::fstream::trunc);
+
+ //Rprintf("A\n");
+ for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+ // collect SNP data
+ get_snps_many(snpdata+nbytes*csnp, &nids, &ieq1, gtint);
+ Genotype = getGenotype(coding[csnp],sep);
+ fileWoA << chromosome[csnp] << " " << snpName[csnp] << " 0 " << (unsigned long int) position[csnp];
+ if (!exportNumeric) {
+ for (int i=0;i<nids;i++) {
+ fileWoA << " " << Genotype[gtint[i]];
+ }
+ } else {
+ for (int i=0;i<nids;i++) {
+ if (gtint[i]==0)
+ fileWoA << " NA";
+ else
+ fileWoA << " " << (gtint[i]-1);
+ }
+ }
+ fileWoA << "\n";
+ //Rprintf("\n");
+ }
+ //Rprintf("B\n");
+ /**
+ for (int i=0;i<nids;i++) {
+ fileWoA << i+from << " " << ids[i] << " 0 0 " << sex[i];
+ for (int j=0;j<ntraits;j++) fileWoA << " " << 0;
+ // unwrap genotypes
+ for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+ Genotype = getGenotype(coding[csnp],sep);
+ // figure out the coding
+ fileWoA << " " << Genotype[gtMatrix[i][csnp]];
+ //fileWoA << " x" << Letter0 << Letter1 << Genotype[0] << Genotype[1] << Genotype[2] << Genotype[3];
+ }
+ // end unwrap
+ fileWoA << "\n";
+ }
+ **/
+ //Rprintf("C\n");
+ fileWoA.close();
+
+ //Rprintf("oooo!" );
+ //for (int i=0;i<10;i++) Rprintf("%d ",sex[i]);
+ //Rprintf("oooo!\n" );
+
+ delete [] Genotype;
+
+ return R_NilValue;
+}
+
#ifdef __cplusplus
}
#endif
Modified: pkg/GenABEL/src/GAlib/export_plink.h
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.h 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/export_plink.h 2011-12-05 12:02:08 UTC (rev 810)
@@ -20,6 +20,10 @@
SEXP export_plink(SEXP idnames, SEXP snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP from, SEXP to,
SEXP male, SEXP traits, SEXP pedfilename, SEXP plink, SEXP append);
+SEXP export_plink_tped(SEXP snpnames, SEXP chromosomes, SEXP map,
+ SEXP snpdata, SEXP Nsnps, SEXP Nids, SEXP Coding, SEXP pedfilename,
+ SEXP exportNumeric);
+
#ifdef __cplusplus
}
#endif
Modified: pkg/GenABEL/src/GAlib/gwaa.c
===================================================================
--- pkg/GenABEL/src/GAlib/gwaa.c 2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/gwaa.c 2011-12-05 12:02:08 UTC (rev 810)
@@ -912,6 +912,8 @@
double Ttotg,dgt,totg[nstra],eG[nstra],ePH[nstra],svec[nids],gtctr[nids],phctr[nids];
double Tsg, sg[nstra],sph[nstra];
double u, v;
+ double temp1,temp2;
+
if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
for (igt=0;igt<nsnps;igt++) {
@@ -947,6 +949,7 @@
phctr[i] = pheno[i] - ePH[cstr];
}
}
+
/**
for (i=0;i<nids;i++) {
svec[i] = 0.;
@@ -958,7 +961,6 @@
**/
for (i=0;i<nids;i++) svec[i] = 0.;
- double temp1,temp2;
for (i=0;i<nids;i++) {
int offS = nids*i;
temp1 = gtctr[i];
@@ -1499,7 +1501,7 @@
}
}
noninf=0;
- if (option > 0) {
+ if (option == 1) {
// count[0]=count[1]=count[2]=count[3]=sumgt=0;
// for (i=0;i<nids;i++) count[gt[i]]++;
// sumgt = count[1]+count[2]+count[3];
More information about the Genabel-commits
mailing list