[Genabel-commits] r810 - in pkg/GenABEL: . R inst/unitTests man src/GAlib

Mon Dec 5 13:02:08 CET 2011

Author: yurii
Date: 2011-12-05 13:02:08 +0100 (Mon, 05 Dec 2011)
New Revision: 810

Added:
   pkg/GenABEL/R/zUtil.R
   pkg/GenABEL/inst/unitTests/runit.merge.R
   pkg/GenABEL/inst/unitTests/runit.qtscore.R
Modified:
   pkg/GenABEL/CHANGES.LOG
   pkg/GenABEL/DESCRIPTION
   pkg/GenABEL/R/estlambda.R
   pkg/GenABEL/R/export.plink.R
   pkg/GenABEL/R/ibs.R
   pkg/GenABEL/R/polygenic_hglm.R
   pkg/GenABEL/R/qtscore.R
   pkg/GenABEL/R/zzz.R
   pkg/GenABEL/generate_documentation.R
   pkg/GenABEL/inst/unitTests/runit.exports.R
   pkg/GenABEL/man/estlambda.Rd
   pkg/GenABEL/man/export.plink.Rd
   pkg/GenABEL/man/load.gwaa.data.Rd
   pkg/GenABEL/man/polygenic.Rd
   pkg/GenABEL/man/srdta.Rd
   pkg/GenABEL/src/GAlib/export_plink.cpp
   pkg/GenABEL/src/GAlib/export_plink.h
   pkg/GenABEL/src/GAlib/gwaa.c
Log:
accumulated changes + regression test for the bug [#1641]

Modified: pkg/GenABEL/CHANGES.LOG
===================================================================

--- pkg/GenABEL/CHANGES.LOG	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/CHANGES.LOG	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,12 +1,23 @@
-*** v. 1.7-0 (2011.09.16)
+*** v. 1.7-0 (2011.11.15)
 
+Added 'KS' method to estimate lambda in 'estlambda'. Generates rather 
+good results (under null) and may be considered as default option in the future, 
+after testing under the alternative. 
+
+Account for situation when HGLM fails to converge (s.e.'s set to NA in 
+h2$h2an$se) 
+
+added option 'transposed' to 'export.plink' to export TPED files
+
 speeding-up 'mmscore' by x1.8 through more efficient vector-matrix 
 product computations 
 
 export.merlin, export.plink now runs much faster (exports on C++ code)
 
-Deal with exceptional situation in merge.snp.data / monomorphic part; 
-added case of no overlap in SNPs (skip monomorphic staff then) 
+Bug [#1641] reported by Karl Froner fixed. Now GenABEL 
+deals with exceptional situation in merge.snp.data / monomorphic part; 
+added case of no overlap in SNPs (skip monomorphic staff then). RUnit 
+regression test runit.merge.R/test.merge.bug1641 added.   
 
 Modifications in 'estlambda': plot=FALSE by default, added option 
 to estimate Lambda with median method; added option 'filter' for 

Modified: pkg/GenABEL/DESCRIPTION
===================================================================
--- pkg/GenABEL/DESCRIPTION	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/DESCRIPTION	2011-12-05 12:02:08 UTC (rev 810)
@@ -2,7 +2,7 @@
 Type: Package
 Title: genome-wide SNP association analysis
 Version: 1.7-0
-Date: 2011-09-16
+Date: 2011-12-05
 Author: GenABEL developers
 Maintainer: GenABEL developers <genabel.project at gmail.com>
 Depends: R (>= 2.10.0), methods, MASS

Modified: pkg/GenABEL/R/estlambda.R
===================================================================
--- pkg/GenABEL/R/estlambda.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/estlambda.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,45 @@
+#' Estimate the inflation factor for a distribution of P-values
+#' 
+#' Estimate the inflation factor for a distribution of P-values or 1df
+#' chi-square test. The major use of this procedure is the Genomic Control, but
+#' can also be used to visualise the distribution of P-values coming from other
+#' tests. Methods implemented include 'median' (median(chi2)/0.455...), 
+#' regression (of observed onto expected) and 'KS' (optimizing the 
+#' chi2.1df distribution fit by use of Kolmogorov-Smirnov test)
+#' 
+#' 
+#' @param data A vector of reals. If all are <=1, it is assumed that this is a
+#'   vector of P-values, else it is treated as a vector of chi-squares with 1
+#'   d.f.
+#' @param plot Wether the prot should be presented
+#' @param proportion The proportion of lowest P (Chi2) to be used when
+#'   estimating the inflation factor Lambda
+#' @param method "regression", "median", or "KS" method to be used for
+#'   Lambda estimation
+#' @param filter if the test statistics with 0-value of chi-square should be
+#'   excluded prior to estimation of Lambda
+#' @param ... arguments passed to \code{\link{plot}} function
+#' @return A list with elements \item{estimate}{Estimate of Lambda}
+#'   \item{se}{Standard error of the estimate}
+#' @author Yurii Aulchenko
+#' @seealso \code{\link{ccfast}}, \code{\link{qtscore}}
+#' @keywords htest
+#' @examples
+#' 
+#' data(srdta)
+#' pex <- summary(gtdata(srdta))[,"Pexact"]
+#' estlambda(pex, plot=TRUE)
+#' estlambda(pex, method="regression", proportion = 0.95)
+#' estlambda(pex, method="median")
+#' estlambda(pex, method="KS")
+#' a <- qtscore(bt,srdta)
+#' lambda(a)
+#' 
 "estlambda" <-
-		function(data,plot = FALSE, proportion=1.0, method="regression", filter=TRUE,...) {
+		function(data,plot = FALSE, proportion=1.0, method="regression", filter=TRUE, ... ) {
 	data <- data[which(!is.na(data))]
-	if (proportion>1.0 || proportion<=0) stop("proportion argument should be greater then zero and less than or equal to one")
+	if (proportion>1.0 || proportion<=0) 
+		stop("proportion argument should be greater then zero and less than or equal to one")
 	ntp <- round(proportion*length(data))
 	if (ntp<1) stop("no valid measurments")
 	if (ntp==1) {
@@ -29,13 +67,7 @@
 	ppoi <- ppoi[1:ntp]
 #	s <- summary(lm(data~offset(ppoi)))$coeff
 # 	bug fix thanks to Franz Quehenberger
-	if (plot) {
-		lim <- c(0,max(data,ppoi,na.rm=T))
-#		plot(ppoi,data,xlim=lim,ylim=lim,xlab="Expected",ylab="Observed", ...)
-		plot(ppoi,data,xlab="Expected",ylab="Observed", ...)
-		abline(a=0,b=1)
-		abline(a=0,b=(s[1,1]),col="red")
-	}
+	
 	out <- list()
 	if (method=="regression") {
 		s <- summary(lm(data~0+ppoi))$coeff
@@ -44,8 +76,23 @@
 	} else if (method=="median") {
 		out$estimate <- median(data,na.rm=TRUE)/qchisq(0.5,1)
 		out$se <- NA
+	} else if (method=="KS") {
+		limits <- c(0.5,100)
+		out$estimate <- estLambdaKS(data,limits=limits)
+		if ( abs(out$estimate-limits[1])<1e-4 || abs(out$estimate-limits[2])<1e-4 ) 
+			warning("using method='KS' lambda too close to limits, use other method")
+		out$se <- NA
 	} else {
 		stop("'method' should be either 'regression' or 'median'!")
 	}
+	
+	if (plot) {
+		lim <- c(0,max(data,ppoi,na.rm=T))
+#		plot(ppoi,data,xlim=lim,ylim=lim,xlab="Expected",ylab="Observed", ...)
+		plot(ppoi,data,xlab="Expected",ylab="Observed", ...)
+		abline(a=0,b=1)
+		abline(a=0,b=out$estimate,col="red")
+	}
+	
 	out
 }

Modified: pkg/GenABEL/R/export.plink.R
===================================================================
--- pkg/GenABEL/R/export.plink.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/export.plink.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -23,7 +23,9 @@
 #' @keywords IO
 #' 
 
-"export.plink" <- function(data, filebasename, phenotypes= "all", ...) {
+"export.plink" <- function(data, filebasename="plink", phenotypes= "all", 
+		transpose=FALSE, export012na=FALSE, ...) 
+{
 	
 	if (!is.null(phenotypes)) {
 		phef <- paste(filebasename,".phe",sep="")
@@ -36,10 +38,26 @@
 		write.table(phed,file=phef,row.names=FALSE,col.names=TRUE,quote=FALSE,sep=" ")
 	}
 	
-	pedf <- paste(filebasename,".ped",sep="")
-	mapf <- paste(filebasename,".map",sep="")
+	if (!transpose) {
+		pedf <- paste(filebasename,".ped",sep="")
+		mapf <- paste(filebasename,".map",sep="")
+		
+		export.merlin(data,pedfile=pedf,datafile=NULL,
+				mapfile=mapf,format="plink", ... )
+	} else {
+		# export TFAM
+		sx <- male(data)
+		sx[sx==0] <- 2
+		tfam <- data.frame(FID=c(1:nids(data)),IID=idnames(data),father=0,mother=0,
+				sex=sx,trait=-9,stringsAsFactors=FALSE)
+		write.table(tfam,file=paste(filebasename,".tfam",sep=""),row.names=FALSE,
+				col.names=FALSE,quote=FALSE)
+		# export genotypic data
+		pedfilename <- paste(filebasename,".tped",sep="")
+		tmp <- .Call("export_plink_tped",as.character(snpnames(data)), as.character(chromosome(data)), 
+				as.double(map(data)),as.raw(gtdata(data)@gtps), as.integer(nsnps(data)), 
+				as.integer(nids(data)), as.character(coding(data)), as.character(pedfilename), 
+				as.logical(export012na))
+	}
 	
-	export.merlin(data,pedfile=pedf,datafile=NULL,
-			mapfile=mapf,format="plink", ... )
-	
 }
\ No newline at end of file

Modified: pkg/GenABEL/R/ibs.R
===================================================================
--- pkg/GenABEL/R/ibs.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/ibs.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,7 @@
 "ibs" <- 
-function (data,snpsubset,idsubset=NULL,cross.idsubset=NULL,weight="no",snpfreq=NULL) {
+		function (data,snpsubset,idsubset=NULL,cross.idsubset=NULL,weight="no",snpfreq=NULL) {
 # idsubset, cross.idsubset: should be real names, not indexes!
-	if (is(data,"gwaa.data")) data <- data at gtdata
+	if (is(data,"gwaa.data")) data <- gtdata(data)
 	if (!is(data,"snp.data")) stop("The data argument must be of snp.data-class or gwaa.data-class")
 	if (!missing(snpsubset)) data <- data[,snpsubset]
 	if (is.null(idsubset) && !is.null(cross.idsubset)) stop("cross.idsubset arg cannot be used (idsubset missing)",immediate. = TRUE)
@@ -12,10 +12,27 @@
 	} else {
 		snpfreq <- summary(data)[,"Q.2"]
 	}
-	if (weight=="no") {
+	
+	wargs <- c("no","freq","homo")
+	if (!(match(weight,wargs,nomatch=0)>0)) {
+		out <- paste("weight argument should be one of",wargs,"\n")
+		stop(out)
+	}
+#	if (npairs > 2500000) stop("Too many pairs to evaluate... Stopped")
+	if (weight == "no") {
 		homodiag <- hom(data)[,"Hom"]
+		option = 0
+	} else if (weight == "freq") {
+		homodiag <- 0.5+(hom(data,snpfreq=snpfreq)[,"F"]/2)
+		option = 1
+	} else if (weight == "homo") {
+		smr <- summary(data)
+		if (any(smr[,"P.12"] != 0 )) warning("'weight=homo' used, but data contain heterozygotes")
+		rm(smr);gc();
+		homodiag <- rep(1.0,nids(data))
+		option = 2
 	} else {
-		homodiag <- 0.5+(hom(data,snpfreq=snpfreq)[,"F"]/2)
+		stop("'weight' argument value not recognised")
 	}
 	varidiag <- hom(data)[,"Var"]
 	ibs.C.option <- 0
@@ -49,16 +66,19 @@
 	gc()
 	idset1.num <- which(data at idnames %in% idset1)
 	idset2.num <- which(data at idnames %in% idset2)
-	wargs <- c("no","freq")
-	if (!(match(weight,wargs,nomatch=0)>0)) {
-		out <- paste("weight argument should be one of",wargs,"\n")
-		stop(out)
-	}
-#	if (npairs > 2500000) stop("Too many pairs to evaluate... Stopped")
-	if (weight == "no") option = 0
-	if (weight == "freq") option = 1
+	
 	if (ibs.C.option==1) {
-		sout <- .C("ibspar", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), as.integer(length(idset1.num)), as.integer(idset1.num-1), as.integer(length(idset2.num)), as.integer(idset2.num-1), as.double(snpfreq), as.integer(option), sout = double(2*length(idset1.num)*length(idset2.num)), PACKAGE="GenABEL")$sout
+		if (option<=1) {
+			sout <- .C("ibspar", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), 
+					as.integer(length(idset1.num)), as.integer(idset1.num-1), 
+					as.integer(length(idset2.num)), as.integer(idset2.num-1), as.double(snpfreq), 
+					as.integer(option), sout = double(2*length(idset1.num)*length(idset2.num)), 
+					PACKAGE="GenABEL")$sout
+		} else if (option==2) {
+			stop("option 2 only availabel in optimized version")
+		} else {
+			stop("unknown 'weight' option")
+		}
 		out <- list()
 		out$ibs <- sout[1:(length(idset1.num)*length(idset2.num))]
 		out$num <- sout[(length(idset1.num)*length(idset2.num)+1):(length(idset1.num)*length(idset2.num)*2)]
@@ -69,7 +89,8 @@
 		rownames(out$num) <- idset1
 		colnames(out$num) <- idset2
 	} else if (ibs.C.option==0) {
-		out <- .C("ibsnew", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), as.double(snpfreq), as.integer(option), sout = double(data at nids*data at nids), PACKAGE="GenABEL")$sout
+		out <- .C("ibsnew", as.raw(data at gtps), as.integer(data at nids), as.integer(data at nsnps), 
+				as.double(snpfreq), as.integer(option), sout = double(data at nids*data at nids), PACKAGE="GenABEL")$sout
 		dim(out) <- c(length(idset2.num),length(idset1.num))
 		out <- t(out)
 		diag(out) <- homodiag

Modified: pkg/GenABEL/R/polygenic_hglm.R
===================================================================
--- pkg/GenABEL/R/polygenic_hglm.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/polygenic_hglm.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -127,9 +127,17 @@
 	relmat <- relmat*2.0
 	s <- svd(relmat)
 	L <- s$u %*% diag(sqrt(s$d))
+	#res_hglm <- try( hglm(y = y, X = desmat, Z = L, family = family, conv = conv, maxit = maxit, ... ) )
 	res_hglm <- hglm(y = y, X = desmat, Z = L, family = family, conv = conv, maxit = maxit, ... )
 	#sum_res_hglm <- summary(res_hglm)
 	
+	#if (is(try(tmp <- res_hglm$fixef),"try-error")) {
+	#	warning("HGLM failed")
+	#	return(res_hglm)
+	#}
+	#if (length(names(res_hglm$fixef))<1) 
+	#	names(res_hglm$fixef) <- paste("fe",1:length(res_hglm$fixef),sep="")
+	
 	out <- list()
 	
 	out$measuredIDs <- mids
@@ -140,7 +148,12 @@
 	out$h2an$estimate <- c(res_hglm$fixef,out$esth2, tVar)
 	names(out$h2an$estimate)[(length(out$h2an$estimate) - 1):(length(out$h2an$estimate))] <- 
 			c("h2","totalVar")
-	out$h2an$se <- c(res_hglm$SeFe, NA, NA)
+	if (is.null(res_hglm$SeFe)) {
+		warning("Convergence failure HGLM!")
+		out$h2an$se <- rep(NA,length(res_hglm$fixef)+2)
+	} else {
+		out$h2an$se <- c(res_hglm$SeFe, NA, NA)
+	}
 	names(out$h2an$se) <- 
 			c(names(res_hglm$fixef), "h2","totalVar")
 	out$pgresidualY <- rep(NA, length(mids))

Modified: pkg/GenABEL/R/qtscore.R
===================================================================
--- pkg/GenABEL/R/qtscore.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/qtscore.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -147,7 +147,7 @@
 		stop("formula argument should be a formula or a numeric vector")
 	}
 	if (!missing(data)) detach(data at phdata)
-	if (length(strata)!=data at gtdata@nids) stop("Strata variable and the data do not match in length")
+	if (length(strata)!=nids(data)) stop("Strata variable and the data do not match in length")
 	if (any(is.na(strata))) stop("Strata variable contains NAs")
 	if (any(strata!=0)) {
 		olev <- levels(as.factor(strata))
@@ -169,17 +169,58 @@
 	
 #	if (trait.type=="binomial" & !is(formula,"formula")) bin<-1 else bin <- 0
 	if (trait.type=="binomial") bin<-1 else bin<-0
-	lenn <- data at gtdata@nsnps;
+	lenn <- nsnps(data);
 	###out <- list()
 	if (times>1) {pb <- txtProgressBar(style = 3)}
 	for (j in c(1:(times+1*(times>1)))) {
 		if (j>1) resid <- sample(resid,replace=FALSE)
-		chi2 <- .C("qtscore_glob",as.raw(data at gtdata@gtps),as.double(resid),as.integer(bin),as.integer(data at gtdata@nids),as.integer(data at gtdata@nsnps), as.integer(nstra), as.integer(strata), chi2 = double(10*data at gtdata@nsnps), PACKAGE="GenABEL")$chi2
-		if (any(data at gtdata@chromosome=="X")) {
-			ogX <- data at gtdata[,data at gtdata@chromosome=="X"]
-			sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
-			chi2X <- .C("qtscore_glob",as.raw(ogX at gtps),as.double(resid),as.integer(bin),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(10*ogX at nsnps), PACKAGE="GenABEL")$chi2
-			revec <- (data at gtdata@chromosome=="X")
+#		if (old) {
+			chi2 <- .C("qtscore_glob",as.raw(data at gtdata@gtps),as.double(resid),as.integer(bin),
+					as.integer(data at gtdata@nids),as.integer(data at gtdata@nsnps), as.integer(nstra), 
+					as.integer(strata), chi2 = double(10*data at gtdata@nsnps), PACKAGE="GenABEL")$chi2
+#		} else {
+#			gtNrow <- dim(data)[1]
+#			gtNcol <- dim(data)[2]
+#			chi2 <- .Call("iteratorGA", 
+#					data at gtdata@gtps, 
+#					as.integer(gtNrow), as.integer(gtNcol),
+#					as.character("qtscore_glob"),   # Function
+#					as.character("R"),  			# Output type
+#					as.integer(2),      			# MAR
+#					as.integer(1),					# Steps
+#					as.integer(5),      			# nr of extra arguments
+#					as.double(resid),
+#					as.integer(bin),
+#					as.integer(data at gtdata@nids),
+#					as.integer(nstra), 
+#					as.integer(strata), 
+#					package="GenABEL")
+#		}
+		if (any(chromosome(data)=="X")) {
+			ogX <- gtdata(data[,chromosome(data)=="X"])
+			sxstra <- strata; sxstra[male(ogX)==1] <- strata[male(ogX)==1]+nstra
+#			if (old) {
+				chi2X <- .C("qtscore_glob",as.raw(ogX at gtps),as.double(resid),as.integer(bin),
+						as.integer(nids(ogX)),as.integer(nsnps(ogX)), as.integer(nstra*2), 
+						as.integer(sxstra), chi2 = double(10*nsnps(ogX)), PACKAGE="GenABEL")$chi2
+#			} else {
+#				chi2X <- .Call("iteratorGA", 
+#						ogX at gtps, 
+#						as.integer(ogX at nsnps), 	# nCol
+#						as.integer(ogX at nids), 	# nRow
+#						as.character("qtscore_glob"), # Function
+#						as.character("R"),  	# Output type
+#						as.integer(1),      	# MAR
+#						as.integer(1),			# Steps
+#						as.integer(5),      	# nr of arguments
+#						as.double(resid),
+#						as.integer(bin),
+#						as.integer(nids),
+#						as.integer(nstra*2), 
+#						as.integer(sxstra), 
+#						package="GenABEL")
+#			}
+			revec <- (chromosome(data)=="X")
 			revec <- rep(revec,6)
 			chi2 <- replace(chi2,revec,chi2X)
 			rm(ogX,chi2X,revec);gc(verbose=FALSE)

Added: pkg/GenABEL/R/zUtil.R
===================================================================
--- pkg/GenABEL/R/zUtil.R	                        (rev 0)
+++ pkg/GenABEL/R/zUtil.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,10 @@
+# utility functions invisible to user
+lossFunctionLambdaKS <- function(lambda,chi2values, ... ) {
+	ksT <- ks.test(chi2values/lambda, ... )$stat
+	return( ksT )
+}
+estLambdaKS <- function(chi2values,limits=c(0.5,100)) {
+	iniLambda <- 1
+	optRes <- optimize(lossFunctionLambdaKS, interval=limits, chi2values=chi2values, "pchisq", 1)
+	return(optRes$minimum)
+}


Property changes on: pkg/GenABEL/R/zUtil.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/R/zzz.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,6 +1,6 @@
 .onLoad <- function(lib, pkg) {
 	GenABEL.version <- "1.7-0"
-	cat("GenABEL v.",GenABEL.version,"(September 16, 2011) loaded\n")
+	cat("GenABEL v.",GenABEL.version,"(December 05, 2011) loaded\n")
 	
 	# check for updates and news
 	address <- c(

Modified: pkg/GenABEL/generate_documentation.R
===================================================================
--- pkg/GenABEL/generate_documentation.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/generate_documentation.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -4,6 +4,7 @@
 		"arrange_probabel_phe.R",
 		"blurGenotype.R",
 		"del.phdata.R",
+		"estlambda.R",
 		"export.plink.R",
 		"extract.annotation.impute.R",
 		"extract.annotation.mach.R",

Modified: pkg/GenABEL/inst/unitTests/runit.exports.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.exports.R	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/inst/unitTests/runit.exports.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -25,7 +25,8 @@
 	data(ge03d2.clean)
 	nTestIds <- sample(c(10:min(100,nids(ge03d2.clean))),1)
 	nTestSnps <- sample(c(10:min(1000,nsnps(ge03d2.clean))),1)
-	dta <- ge03d2.clean[sample(1:nids(ge03d2.clean),nTestIds),sample(1:nsnps(ge03d2.clean),nTestSnps)]
+	dta <- ge03d2.clean[sort(sample(1:nids(ge03d2.clean),nTestIds)),
+			sort(sample(1:nsnps(ge03d2.clean),nTestSnps))]
 
 	export.plink(dta,filebasename="tmpOld",dpieceFun="old")
 	export.plink(dta,filebasename="tmpNew",dpieceFun="new")
@@ -60,6 +61,22 @@
 	checkIdentical(xN,xO)
 	
 	unlink("tmpOld*")
-	unlink("tmpNew*")
-
+	unlink("tmpNew*")	
+	
+	export.plink(dta,filebasename="tmpTrans",transpose=TRUE)
+	convert.snp.tped(tpedfile="tmpTrans.tped",tfamfile="tmpTrans.tfam",out="tmpTrans.raw")
+	xBack <- load.gwaa.data(gen="tmpTrans.raw",phe="tmpTrans.phe",id="IID")
+	strand(xBack) <- strand(dta)
+	phdata(xBack) <- phdata(xBack)[,-1]
+	sameCode <- which(coding(dta) == coding(xBack))
+	checkIdentical(xBack[,sameCode],dta[,sameCode])
+	swappedCode <- which(refallele(dta)==effallele(xBack) & effallele(dta)==refallele(xBack))
+	for (i in swappedCode) {
+		gtDta <- c(2,1,0)[as.vector(as.numeric(dta[,i]))+1]
+		gtBack <- as.vector(as.numeric(xBack[,i]))
+		print(checkIdentical(gtBack,gtDta))
+	}
+	
+	unlink("tmpTrans*")
+	
 }

Added: pkg/GenABEL/inst/unitTests/runit.merge.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.merge.R	                        (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.merge.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,31 @@
+### --- Test setup ---
+#
+# regression test
+#
+
+if(FALSE) {
+	## Not really needed, but can be handy when writing tests
+	library(RUnit)
+	library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.merge.bug1641 <- function()
+{
+	library(GenABEL)
+	print(  packageVersion("GenABEL") )
+	data(srdta)
+	x1 <- srdta[1:4,1]
+	x2 <- srdta[5:10, 2]
+	xy <- merge(x1, x2, intersected_snps_only=FALSE)
+}
\ No newline at end of file


Property changes on: pkg/GenABEL/inst/unitTests/runit.merge.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Added: pkg/GenABEL/inst/unitTests/runit.qtscore.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.qtscore.R	                        (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.qtscore.R	2011-12-05 12:02:08 UTC (rev 810)
@@ -0,0 +1,4 @@
+#library(GenABEL)
+#data(ge03d2.clean)
+#qtsOld <- qtscore(height~sex,data=ge03d2.clean,old=TRUE)
+#qtsNew <- qtscore(height~sex,data=ge03d2.clean,old=FALSE)


Property changes on: pkg/GenABEL/inst/unitTests/runit.qtscore.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: pkg/GenABEL/man/estlambda.Rd
===================================================================
--- pkg/GenABEL/man/estlambda.Rd	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/estlambda.Rd	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,47 +1,36 @@
 \name{estlambda}
 \alias{estlambda}
-\title{Estimate the inflation factor for a distribution of P-values}
-\description{
-Estimate the inflation factor for a distribution of P-values or 1df chi-square test.
-The major use of this procedure is the Genomic Control, but can also be used to 
-visualise the distribution of P-values coming from other tests.
-}
-\usage{
-estlambda(data, plot = FALSE, proportion = 1.0, method="regression", filter=TRUE, ... )
-}
-\arguments{
-  \item{data}{A vector of reals. If all are <=1, it is assumed that this 
-		is a vector of P-values, else it is treated as a vector 
-		of chi-squares with 1 d.f.}
-  \item{plot}{Wether the prot should be presented}
-  \item{proportion}{The proportion of lowest P (Chi2) to be used when 
-		estimating the inflation factor Lambda}
-	\item{method}{Either "regression" or "median", the method 
-		to be used for Lambda estimation}
-	\item{filter}{if the test statistics with 0-value of chi-square 
-		should be excluded prior to estimation of Lambda}
- \item{...}{arguments passed to \code{\link{plot}} function}
-}
-%\details{
-%}
-\value{
-	A list with elements
-	\item{estimate}{Estimate of Lambda}
-	\item{se}{Standard error of the estimate}
-}
-%\references{}
+\title{Estimate the inflation factor for a distribution of P-values...}
+\usage{estlambda(data, plot=FALSE, proportion=1, method="regression",
+    filter=TRUE, ...)}
+\description{Estimate the inflation factor for a distribution of P-values}
+\details{Estimate the inflation factor for a distribution of P-values or 1df
+chi-square test. The major use of this procedure is the Genomic Control, but
+can also be used to visualise the distribution of P-values coming from other
+tests. Methods implemented include 'median' (median(chi2)/0.455...), 
+regression (of observed onto expected) and 'KS' (optimizing the 
+chi2.1df distribution fit by use of Kolmogorov-Smirnov test)}
+\value{A list with elements \item{estimate}{Estimate of Lambda}
+\item{se}{Standard error of the estimate}}
 \author{Yurii Aulchenko}
-%\note{
-%}
-\seealso{
-\code{\link{ccfast}},
-\code{\link{qtscore}}
-}
-\examples{
-data(srdta)
-pex <- summary(srdta at gtdata)[,"Pexact"]
-estlambda(pex)
-a <- ccfast("bt",srdta)
-lambda(a)
-}
-\keyword{htest}% at least one, from doc/KEYWORDS
+\seealso{\code{\link{ccfast}}, \code{\link{qtscore}}}
+\keyword{htest}
+\arguments{\item{data}{A vector of reals. If all are <=1, it is assumed that this is a
+vector of P-values, else it is treated as a vector of chi-squares with 1
+d.f.}
+\item{plot}{Wether the prot should be presented}
+\item{proportion}{The proportion of lowest P (Chi2) to be used when
+estimating the inflation factor Lambda}
+\item{method}{"regression", "median", or "KS" method to be used for
+Lambda estimation}
+\item{filter}{if the test statistics with 0-value of chi-square should be
+excluded prior to estimation of Lambda}
+\item{...}{arguments passed to \code{\link{plot}} function}}
+\examples{data(srdta)
+pex <- summary(gtdata(srdta))[,"Pexact"]
+estlambda(pex, plot=TRUE)
+estlambda(pex, method="regression", proportion = 0.95)
+estlambda(pex, method="median")
+estlambda(pex, method="KS")
+a <- qtscore(bt,srdta)
+lambda(a)}

Modified: pkg/GenABEL/man/export.plink.Rd
===================================================================
--- pkg/GenABEL/man/export.plink.Rd	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/export.plink.Rd	2011-12-05 12:02:08 UTC (rev 810)
@@ -1,7 +1,8 @@
 \name{export.plink}
 \alias{export.plink}
 \title{Export GenABEL data in PLINK format...}
-\usage{export.plink(data, filebasename, phenotypes="all", ...)}
+\usage{export.plink(data, filebasename="plink", phenotypes="all",
+    transpose=FALSE, export012na=FALSE, ...)}
 \description{Export GenABEL data in PLINK format}
 \details{Export GenABEL data in PLINK format. This function is 
 a simple wrapper to \code{\link{export.merlin}} function 

Modified: pkg/GenABEL/man/load.gwaa.data.Rd
===================================================================
--- pkg/GenABEL/man/load.gwaa.data.Rd	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/load.gwaa.data.Rd	2011-12-05 12:02:08 UTC (rev 810)
@@ -9,7 +9,7 @@
 		force = TRUE, makemap = FALSE, sort = TRUE, id = "id")
 }
 \arguments{
-  \item{phenofile}{data table with pehnotypes}
+  \item{phenofile}{data table with phenotypes}
   \item{genofile}{internally formatted genotypic data file (see \code{\link{convert.snp.text}} to 
 			convert data)}
   \item{force}{Force loading the data if heterozygous X-chromosome genotypes are found in male}

Modified: pkg/GenABEL/man/polygenic.Rd
===================================================================
--- pkg/GenABEL/man/polygenic.Rd	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/polygenic.Rd	2011-12-05 12:02:08 UTC (rev 810)
@@ -125,7 +125,7 @@
 from previous regression, these are expected to change little from the 
 initial estimate. The default value of 1000 proved to work rather well under a 
 range of conditions.}
-\item{quiet}{If FALSE (default), details of optimisation process are reported.}
+\item{quiet}{If FALSE (default), details of optimisation process are reported}
 \item{steptol}{steptal parameter of "nlm"}
 \item{gradtol}{gradtol parameter of "nlm"}
 \item{optimbou}{fixed effects boundary scale parameter for 'optim'}

Modified: pkg/GenABEL/man/srdta.Rd
===================================================================
--- pkg/GenABEL/man/srdta.Rd	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/man/srdta.Rd	2011-12-05 12:02:08 UTC (rev 810)
@@ -11,7 +11,7 @@
 	analysis. Run demo(srdta) and check tut-srdta.pdf 
 	to see examples of work with this data set. 
 	Original data files used for this set are located at 
-	YOUR\_R\_LIB\_LOCATION/exdata/srphenos.dat (pehnotypes), 
+	YOUR\_R\_LIB\_LOCATION/exdata/srphenos.dat (phenotypes), 
 	srgenos.dat (human-readable genotypes) and srgenos.raw 
 	(genotypes in internal format)}
 \usage{data(srdta)}

Modified: pkg/GenABEL/src/GAlib/export_plink.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.cpp	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/export_plink.cpp	2011-12-05 12:02:08 UTC (rev 810)
@@ -107,7 +107,99 @@
 	return R_NilValue;
 }
 
+SEXP export_plink_tped(SEXP Snpnames, SEXP Chromosomes, SEXP Map,
+		SEXP Snpdata, SEXP Nsnps, SEXP Nids, SEXP Coding, SEXP Pedfilename,
+		SEXP ExportNumeric)
+{
+	std::vector<std::string> snpName;
+	for(unsigned int i=0;i<((unsigned int) length(Snpnames));i++)
+		snpName.push_back(CHAR(STRING_ELT(Snpnames,i)));
 
+	std::vector<std::string> coding;
+	for(unsigned int i=0;i<((unsigned int) length(Coding));i++)
+		coding.push_back(CHAR(STRING_ELT(Coding,i)));
+
+	std::vector<std::string> chromosome;
+	for(unsigned int i=0;i<((unsigned int) length(Chromosomes));i++)
+		chromosome.push_back(CHAR(STRING_ELT(Chromosomes,i)));
+
+	std::vector<double> position;
+	for(unsigned int i=0;i<((unsigned int) length(Map));i++)
+		position.push_back(REAL(Map)[i]);
+
+	//Rprintf("0\n");
+	unsigned int nsnps = INTEGER(Nsnps)[0];
+	int nids = INTEGER(Nids)[0];
+	bool exportNumeric = LOGICAL(ExportNumeric)[0];
+	std::string filename = CHAR(STRING_ELT(Pedfilename,0));
+	std::ofstream fileWoA;
+	int gtint[nids];
+	int ieq1 = 1;
+	char * snpdata = (char *) RAW(Snpdata);
+	//Rprintf("nsnps=%d\n",nsnps);
+	//Rprintf("nids=%d\n",nids);
+
+	//Rprintf("1\n");
+	std::string* Genotype;
+	std::string sep=" ";
+	int nbytes;
+
+	//Rprintf("nsnps=%d\n",nsnps);
+	//Rprintf("nids=%d\n",nids);
+
+	if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+	fileWoA.open(filename.c_str(),std::fstream::trunc);
+
+	//Rprintf("A\n");
+	for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+		// collect SNP data
+		get_snps_many(snpdata+nbytes*csnp, &nids, &ieq1, gtint);
+		Genotype = getGenotype(coding[csnp],sep);
+		fileWoA << chromosome[csnp] << " " << snpName[csnp] << " 0 " << (unsigned long int) position[csnp];
+		if (!exportNumeric) {
+			for (int i=0;i<nids;i++) {
+				fileWoA << " " << Genotype[gtint[i]];
+			}
+		} else {
+			for (int i=0;i<nids;i++) {
+				if (gtint[i]==0)
+					fileWoA << " NA";
+				else
+					fileWoA << " " << (gtint[i]-1);
+			}
+		}
+		fileWoA << "\n";
+		//Rprintf("\n");
+	}
+	//Rprintf("B\n");
+	/**
+	for (int i=0;i<nids;i++) {
+		fileWoA << i+from << " " << ids[i] << " 0 0 " << sex[i];
+		for (int j=0;j<ntraits;j++) fileWoA << " " << 0;
+		// unwrap genotypes
+		for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+			Genotype = getGenotype(coding[csnp],sep);
+			// figure out the coding
+			fileWoA << " " << Genotype[gtMatrix[i][csnp]];
+			//fileWoA << " x" << Letter0 << Letter1 << Genotype[0] << Genotype[1] << Genotype[2] << Genotype[3];
+		}
+		// end unwrap
+		fileWoA << "\n";
+	}
+	 **/
+	//Rprintf("C\n");
+	fileWoA.close();
+
+	//Rprintf("oooo!" );
+	//for (int i=0;i<10;i++) Rprintf("%d ",sex[i]);
+	//Rprintf("oooo!\n" );
+
+	delete [] Genotype;
+
+	return R_NilValue;
+}
+
 #ifdef __cplusplus
 }
 #endif

Modified: pkg/GenABEL/src/GAlib/export_plink.h
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.h	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/export_plink.h	2011-12-05 12:02:08 UTC (rev 810)
@@ -20,6 +20,10 @@
 SEXP export_plink(SEXP idnames, SEXP snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP from, SEXP to,
 		SEXP male, SEXP traits, SEXP pedfilename, SEXP plink, SEXP append);
 
+SEXP export_plink_tped(SEXP snpnames, SEXP chromosomes, SEXP map,
+		SEXP snpdata, SEXP Nsnps, SEXP Nids, SEXP Coding, SEXP pedfilename,
+		SEXP exportNumeric);
+
 #ifdef __cplusplus
 }
 #endif

Modified: pkg/GenABEL/src/GAlib/gwaa.c
===================================================================
--- pkg/GenABEL/src/GAlib/gwaa.c	2011-12-05 11:29:59 UTC (rev 809)
+++ pkg/GenABEL/src/GAlib/gwaa.c	2011-12-05 12:02:08 UTC (rev 810)
@@ -912,6 +912,8 @@
 	double Ttotg,dgt,totg[nstra],eG[nstra],ePH[nstra],svec[nids],gtctr[nids],phctr[nids];
 	double Tsg, sg[nstra],sph[nstra];
 	double u, v;
+	double temp1,temp2;
+
 	if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
 
 	for (igt=0;igt<nsnps;igt++) {
@@ -947,6 +949,7 @@
 				phctr[i] = pheno[i] - ePH[cstr];
 			}
 		}
+
 		/**
 		for (i=0;i<nids;i++) {
 			svec[i] = 0.;
@@ -958,7 +961,6 @@
 		**/
 		for (i=0;i<nids;i++) svec[i] = 0.;
 
-		double temp1,temp2;
 		for (i=0;i<nids;i++) {
 			int offS = nids*i;
 			temp1 = gtctr[i];
@@ -1499,7 +1501,7 @@
 			}
 		}
 		noninf=0;
-		if (option > 0) {
+		if (option == 1) {
 			//			count[0]=count[1]=count[2]=count[3]=sumgt=0;
 			//			for (i=0;i<nids;i++) count[gt[i]]++;
 			//			sumgt = count[1]+count[2]+count[3];