[Genabel-commits] r784 - in pkg/GenABEL: . R inst/unitTests man src/GAlib

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Sep 16 13:30:28 CEST 2011


Author: yurii
Date: 2011-09-16 13:30:28 +0200 (Fri, 16 Sep 2011)
New Revision: 784

Added:
   pkg/GenABEL/inst/unitTests/runit.exports.R
   pkg/GenABEL/inst/unitTests/runit.mmscore.R
   pkg/GenABEL/src/GAlib/export_plink.cpp
   pkg/GenABEL/src/GAlib/export_plink.h
Modified:
   pkg/GenABEL/CHANGES.LOG
   pkg/GenABEL/DESCRIPTION
   pkg/GenABEL/R/export.merlin.R
   pkg/GenABEL/R/mmscore.R
   pkg/GenABEL/R/zzz.R
   pkg/GenABEL/man/export.merlin.Rd
   pkg/GenABEL/man/mmscore.Rd
   pkg/GenABEL/src/GAlib/gwaa.c
Log:
speeding-up 'mmscore' by x1.8 through more efficient vector-matrix 
product computations 

export.merlin, export.plink now runs much faster (exports on C++ code)

Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/CHANGES.LOG	2011-09-16 11:30:28 UTC (rev 784)
@@ -1,5 +1,10 @@
-*** v. 1.7-0 (2011.09.10)
+*** v. 1.7-0 (2011.09.16)
 
+speeding-up 'mmscore' by x1.8 through more efficient vector-matrix 
+product computations 
+
+export.merlin, export.plink now runs much faster (exports on C++ code)
+
 Deal with exceptional situation in merge.snp.data / monomorphic part; 
 added case of no overlap in SNPs (skip monomorphic staff then) 
 

Modified: pkg/GenABEL/DESCRIPTION
===================================================================
--- pkg/GenABEL/DESCRIPTION	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/DESCRIPTION	2011-09-16 11:30:28 UTC (rev 784)
@@ -2,7 +2,7 @@
 Type: Package
 Title: genome-wide SNP association analysis
 Version: 1.7-0
-Date: 2011-09-10
+Date: 2011-09-16
 Author: GenABEL developers
 Maintainer: GenABEL developers <genabel.project at gmail.com>
 Depends: R (>= 2.10.0), methods, MASS

Modified: pkg/GenABEL/R/export.merlin.R
===================================================================
--- pkg/GenABEL/R/export.merlin.R	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/export.merlin.R	2011-09-16 11:30:28 UTC (rev 784)
@@ -1,8 +1,18 @@
 "export.merlin" <- 
-function(data,pedfile="merlin.ped",datafile="merlin.dat",
-		mapfile="merlin.map",format="merlin",fixstrand="no",
-		extendedmap=TRUE,traits=1, order = TRUE, stepids = 100) {
+		function(data,pedfile="merlin.ped",datafile="merlin.dat",
+				mapfile="merlin.map",format="merlin",fixstrand="no",
+				extendedmap=TRUE,traits=1, order = TRUE, stepids = 100, dpieceFun="new") {
 	if (!is(data,"gwaa.data")) stop("Data argument should be of gwaa.data-class")
+	dpieceFunOptions <- c("old","new")
+	dpieceFunInt <- match(dpieceFun,dpieceFunOptions,nomatch=0)
+	if (dpieceFunInt<=0) {
+		out <- paste("dpieceFun argument should be one of",dpieceFunOptions,"\n")
+		stop(out)
+	} else {
+		if (dpieceFunInt==1) dump.piece=dump.piece
+		else if (dpieceFunInt==2) dump.piece=dump.piece.New
+		else stop("weird staff!")
+	}
 	formats <- c("merlin","plink")
 	if (!(match(format,formats,nomatch=0)>0)) {
 		out <- paste("format argument should be one of",formats,"\n")
@@ -15,27 +25,27 @@
 	}
 	if (order) {
 		ord <- sortmap.internal(chromosome(data),map(data))
-		data <- data[,ord$ix]
+		if (any(ord$ix != c(1:length(ord$ix)))) {data <- data[,ord$ix]; gc()}
 	}
 	if (fixstrand != "no") {
-###### as.raw(1) == "+"
-###### as.raw(2) == "-"
+		###### as.raw(1) == "+"
+		###### as.raw(2) == "-"
 		if (fixstrand == "-") {tos <- as.raw(1);fos<-as.raw(2);} 
-			else if (fixstrand == "+") {tos <- as.raw(2);fos <- as.raw(1)}
-			else {stop("strange strand...")}
+		else if (fixstrand == "+") {tos <- as.raw(2);fos <- as.raw(1)}
+		else {stop("strange strand...")}
 		stf <- which(as.raw(data at gtdata@strand) == tos) 
 		if (length(stf)>0) {
-		  revs <- alleleID.revstrand()
-  		  savnam <- names(data at gtdata@strand)
+			revs <- alleleID.revstrand()
+			savnam <- names(data at gtdata@strand)
 #
 #		data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(data at gtdata@coding[stf])]))
 #
 #		ugly fix -- 
 #
-		  data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(as.raw(data at gtdata@coding[stf]))]))
-		  names(data at gtdata@coding) <- savnam
-		  data at gtdata@strand[stf] <- new("snp.strand",rep(fos,length(stf)))
-		  names(data at gtdata@strand) <- savnam
+			data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(as.raw(data at gtdata@coding[stf]))]))
+			names(data at gtdata@coding) <- savnam
+			data at gtdata@strand[stf] <- new("snp.strand",rep(fos,length(stf)))
+			names(data at gtdata@strand) <- savnam
 		}
 	}
 	bstp <- stepids #100
@@ -43,10 +53,12 @@
 		steps <- seq(from=0,to=data at gtdata@nids,by=bstp)
 		if (data at gtdata@nids != steps[length(steps)]) steps[length(steps)+1] <- data at gtdata@nids
 		dump.piece(data=data,from=1,to=steps[2],traits=traits,
-				pedfile=pedfile,append=F,format=format)
+				pedfile=pedfile,append=FALSE,format=format)
+		cat("dumped ids",1,"to",steps[2],"\n")
 		for (jjj in c(2:(length(steps)-1))) {
 			dump.piece(data=data,from=(steps[jjj]+1),to=(steps[jjj+1]),traits=traits,
-					pedfile=pedfile,append=T,format=format)
+					pedfile=pedfile,append=TRUE,format=format)
+			cat("dumped ids",steps[jjj]+1,"to",steps[jjj+1],"\n")
 		}
 	} else {
 		dump.piece(data=data,from=1,to=data at gtdata@nids,traits=traits,
@@ -98,3 +110,18 @@
 	}
 	write.table(x,file=pedfile,col.n=FALSE,row.n=FALSE,quote=FALSE,append=append,sep=" ")
 }
+
+dump.piece.New <- function(data,fromid,toid,traits,pedfile,append,format="merlin") {
+	if (toid < fromid) stop("toid<fromid")
+	# ugly line -- should be in the C++ code
+	if (!append) unlink(pedfile);
+	if (format=="plink") {plink <- TRUE;} else {plink <- FALSE;}
+	#data <- data[c(fromid:toid),]
+	result <- .Call("export_plink",as.character(idnames(data)[c(fromid:toid)]),
+			as.raw(gtdata(data)@gtps), as.integer(nsnps(data)), as.integer(nids(data)),
+			as.character(coding(data)), as.integer(fromid), as.integer(toid),
+			as.integer(male(data)), as.integer(traits), as.character(pedfile), 
+			as.logical(plink), as.logical(append))
+	#
+	return(1)
+}

Modified: pkg/GenABEL/R/mmscore.R
===================================================================
--- pkg/GenABEL/R/mmscore.R	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/mmscore.R	2011-09-16 11:30:28 UTC (rev 784)
@@ -1,22 +1,30 @@
 "mmscore" <-
-function(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,
-		bcast=10,clambda=TRUE,propPs=1.0) {
-  	if (is(data,"gwaa.data")) 
+		function(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,
+				bcast=10,clambda=TRUE,propPs=1.0,cppFun="new") {
+	if (is(data,"gwaa.data")) 
 	{
 		checkphengen(data)
 		data <- data at gtdata
 	}
-  	if (!is(data,"snp.data")) {
+	if (!is(data,"snp.data")) {
 		stop("wrong data class: should be gwaa.data or snp.data")
-  	}
+	}
 	if (class(h2object) != "polygenic") stop("h2object must be of polygenic-class")
 	if (!missing(snpsubset)) data <- data[,snpsubset]
 	if (!missing(idsubset)) data <- data[idsubset,]
 	if (missing(strata)) {nstra=1; strata <- rep(0,data at nids)}
-
+	
 	if (length(strata)!=data at nids) stop("Strata variable and the data do not match in length")
 	if (any(is.na(strata))) stop("Strata variable contains NAs")
-
+	
+	cppFunOptions <- c("old","new")
+	fMatch <- match(cppFun,cppFunOptions,nomatch=0)
+	if (fMatch==0) {
+		stop(paste("cppFun must be one of",cppFunOptions))
+	} else {
+		calledFun = c("mmscore_20090127","mmscore_20110916")[fMatch]
+	}
+	
 	tmeas <- h2object$measuredIDs
 	resid <- h2object$residualY
 	if (any(tmeas == FALSE)) {
@@ -35,20 +43,20 @@
 		rm(tstr)
 	}
 	nstra <- length(levels(as.factor(strata)))
-
+	
 	lenn <- data at nsnps;
 	out <- list()
 	for (j in c(1:(times+1*(times>1)))) {
 		if (j>1) resid <- sample(resid,replace=FALSE)
-		chi2 <- .C("mmscore_20090127",as.raw(data at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(data at nids),as.integer(data at nsnps), as.integer(nstra), as.integer(strata), chi2 = double(7*data at nsnps), PACKAGE="GenABEL")$chi2
+		chi2 <- .C(calledFun,as.raw(data at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(data at nids),as.integer(data at nsnps), as.integer(nstra), as.integer(strata), chi2 = double(7*data at nsnps), PACKAGE="GenABEL")$chi2
 		if (any(data at chromosome=="X")) {
-		  ogX <- data[,data at chromosome=="X"]
-		  sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
-		  chi2X <- .C("mmscore_20090127",as.raw(ogX at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(7*ogX at nsnps), PACKAGE="GenABEL")$chi2
-		  revec <- (data at chromosome=="X")
-		  revec <- rep(revec,6)
-		  chi2 <- replace(chi2,revec,chi2X)
-		  rm(ogX,chi2X,revec);gc(verbose=FALSE)
+			ogX <- data[,data at chromosome=="X"]
+			sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
+			chi2X <- .C(calledFun,as.raw(ogX at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(7*ogX at nsnps), PACKAGE="GenABEL")$chi2
+			revec <- (data at chromosome=="X")
+			revec <- rep(revec,6)
+			chi2 <- replace(chi2,revec,chi2X)
+			rm(ogX,chi2X,revec);gc(verbose=FALSE)
 		}
 		if (j == 1) {
 			chi2.1df <- chi2[1:lenn];
@@ -105,7 +113,7 @@
 		}
 	}
 	if (times > bcast) cat("\n")
-
+	
 	if (times>1) {
 		P1df <- pr.1df/times
 		P1df <- replace(P1df,(P1df==0),1/(1+times))

Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/zzz.R	2011-09-16 11:30:28 UTC (rev 784)
@@ -1,6 +1,6 @@
 .onLoad <- function(lib, pkg) {
 	GenABEL.version <- "1.7-0"
-	cat("GenABEL v.",GenABEL.version,"(September 10, 2011) loaded\n")
+	cat("GenABEL v.",GenABEL.version,"(September 16, 2011) loaded\n")
 	
 	# check for updates and news
 	address <- c(

Added: pkg/GenABEL/inst/unitTests/runit.exports.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.exports.R	                        (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.exports.R	2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,65 @@
+### --- Test setup ---
+#
+# regression tests
+#
+
+if(FALSE) {
+	## Not really needed, but can be handy when writing tests
+	library(RUnit)
+	library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.exports <- function()
+{
+	data(ge03d2.clean)
+	nTestIds <- sample(c(10:min(100,nids(ge03d2.clean))),1)
+	nTestSnps <- sample(c(10:min(1000,nsnps(ge03d2.clean))),1)
+	dta <- ge03d2.clean[sample(1:nids(ge03d2.clean),nTestIds),sample(1:nsnps(ge03d2.clean),nTestSnps)]
+
+	export.plink(dta,filebasename="tmpOld",dpieceFun="old")
+	export.plink(dta,filebasename="tmpNew",dpieceFun="new")
+	xO <- read.table(file="tmpOld.ped",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.ped",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	xO <- read.table(file="tmpOld.map",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.map",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	xO <- read.table(file="tmpOld.map.ext",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.map.ext",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	
+	unlink("tmpOld*")
+	unlink("tmpNew*")
+	
+	export.merlin(dta,pedfile="tmpOld.ped",datafile="tmpOld.dat",
+			mapfile="tmpOld.map",dpieceFun="old")
+	export.merlin(dta,pedfile="tmpNew.ped",datafile="tmpNew.dat",
+			mapfile="tmpNew.map",dpieceFun="new")
+	xO <- read.table(file="tmpOld.ped",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.ped",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	xO <- read.table(file="tmpOld.map",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.map",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	xO <- read.table(file="tmpOld.map.ext",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.map.ext",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	xO <- read.table(file="tmpOld.dat",head=FALSE,strings=FALSE)
+	xN <- read.table(file="tmpNew.dat",head=FALSE,strings=FALSE)
+	checkIdentical(xN,xO)
+	
+	unlink("tmpOld*")
+	unlink("tmpNew*")
+
+}


Property changes on: pkg/GenABEL/inst/unitTests/runit.exports.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Added: pkg/GenABEL/inst/unitTests/runit.mmscore.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.mmscore.R	                        (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.mmscore.R	2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,32 @@
+### --- Test setup ---
+#
+# regression tests
+#
+
+if(FALSE) {
+	## Not really needed, but can be handy when writing tests
+	library(RUnit)
+	library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.exports <- function()
+{
+	data(ge03d2.clean)
+	dta <- ge03d2.clean[1:200,autosomal(ge03d2.clean)[1:3000]]
+	gkin <- ibs(dta[,1:2000],w="freq")
+	h2 <- polygenic(height,data=dta,kin=gkin)
+	xOld <- mmscore(h2,dta,cppFun="old")
+	xNew <- mmscore(h2,dta,cppFun="new")
+	checkEquals(results(xNew),results(xOld))
+}


Property changes on: pkg/GenABEL/inst/unitTests/runit.mmscore.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: pkg/GenABEL/man/export.merlin.Rd
===================================================================
--- pkg/GenABEL/man/export.merlin.Rd	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/man/export.merlin.Rd	2011-09-16 11:30:28 UTC (rev 784)
@@ -6,7 +6,8 @@
 }
 \usage{
 	export.merlin(data, pedfile = "merlin.ped", datafile = "merlin.dat", mapfile = "merlin.map", 
-	format = "merlin", fixstrand = "no", extendedmap = TRUE, traits = 1, order = TRUE, stepids = 100)
+	format = "merlin", fixstrand = "no", extendedmap = TRUE, traits = 1, order = TRUE, 
+	stepids = 100, dpieceFun = "new")
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -22,6 +23,7 @@
   \item{traits}{How many fake traits to insert before first column of marker data}
   \item{order}{Should output be ordered by chromosome and position}
   \item{stepids}{make this larger for faster processing and smaller for lower RAM use}
+  \item{dpieceFun}{function used to dump data. Do use default.}
 }
 \details{
 	The use is straightforward, with only the "fixstrand" option requiring some 

Modified: pkg/GenABEL/man/mmscore.Rd
===================================================================
--- pkg/GenABEL/man/mmscore.Rd	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/man/mmscore.Rd	2011-09-16 11:30:28 UTC (rev 784)
@@ -6,7 +6,7 @@
 in samples of related individuals
 }
 \usage{
-mmscore(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,bcast=10,clambda=TRUE,propPs=1.0)
+mmscore(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,bcast=10,clambda=TRUE,propPs=1.0,cppFun="new")
 }
 \arguments{
   \item{h2object}{An object returned by \code{\link{polygenic}} polygenic mixed model analysis 
@@ -36,6 +36,7 @@
 		If a numeric value is provided, it is used as a correction factor.}
   \item{propPs}{proportion of non-corrected P-values used to estimate the inflation factor Lambda,
 		passed directly to the \code{\link{estlambda}}}
+  \item{cppFun}{Serves for testing purposes; do use default.}
 }
 \details{
 	Score test is performed using the formula

Added: pkg/GenABEL/src/GAlib/export_plink.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.cpp	                        (rev 0)
+++ pkg/GenABEL/src/GAlib/export_plink.cpp	2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,113 @@
+#include "export_plink.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+std::string* getGenotype(std::string coding,std::string sep)
+{
+	std::string* Genotype = new (std::nothrow) std::string [4];
+	std::string Letter0 = coding.substr(0,1);
+	std::string Letter1 = coding.substr(1,1);
+	Genotype[0] = "0"+sep+"0";
+	Genotype[1] = Letter0+sep+Letter0;
+	Genotype[2] = Letter0+sep+Letter1;
+	Genotype[3] = Letter1+sep+Letter1;
+	return Genotype;
+}
+
+SEXP export_plink(SEXP Ids, SEXP Snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP From, SEXP To,
+		SEXP Male, SEXP Traits, SEXP Pedfilename, SEXP Plink, SEXP Append)
+{
+	vector<unsigned short int> sex;
+	unsigned short int sx;
+	for(unsigned int i=0;i<((unsigned int) length(Male));i++) {
+		sx = INTEGER(Male)[i];
+		if (sx==0) sx=2;
+		sex.push_back(sx);
+	}
+
+	vector<std::string> ids;
+	for(unsigned int i=0;i<((unsigned int) length(Ids));i++)
+		ids.push_back(CHAR(STRING_ELT(Ids,i)));
+
+	vector<std::string> coding;
+	for(unsigned int i=0;i<((unsigned int) length(Coding));i++)
+		coding.push_back(CHAR(STRING_ELT(Coding,i)));
+
+	//Rprintf("0\n");
+	unsigned int nsnps = INTEGER(Nsnps)[0];
+	int from = INTEGER(From)[0];
+	int to = INTEGER(To)[0];
+	int nids = to - from + 1;
+	int nidsTotal = INTEGER(NidsTotal)[0];
+	int ntraits = INTEGER(Traits)[0];
+	bool append = LOGICAL(Append)[0];
+	bool plink = LOGICAL(Plink)[0];
+	std::string filename = CHAR(STRING_ELT(Pedfilename,0));
+	std::ofstream fileWoA;
+	int gtint[nidsTotal];
+	int ieq1 = 1;
+	char * snpdata = (char *) RAW(Snpdata);
+	//Rprintf("nsnps=%d\n",nsnps);
+	//Rprintf("nids=%d\n",nids);
+
+	char gtMatrix[nids][nsnps];
+	//Rprintf("1\n");
+	std::string* Genotype;
+	std::string sep="/";
+	int nbytes;
+
+	//Rprintf("nsnps=%d\n",nsnps);
+	//Rprintf("nids=%d\n",nids);
+
+	if ((nids % 4) == 0) nbytes = nidsTotal/4; else nbytes = ceil(1.*nidsTotal/4.);
+
+	if (plink) sep=" ";
+
+	if (append)
+		fileWoA.open(filename.c_str(),std::fstream::app);
+	else
+		fileWoA.open(filename.c_str(),std::fstream::trunc);
+
+	//Rprintf("A\n");
+	for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+		// collect SNP data
+		get_snps_many(snpdata+nbytes*csnp, &nidsTotal, &ieq1, gtint);
+		for (int iii=from-1;iii<to;iii++) {
+			//Rprintf(" %d",gtint[iii]);
+			gtMatrix[iii-from+1][csnp]=gtint[iii];
+		}
+		//Rprintf("\n");
+	}
+	//Rprintf("B\n");
+	for (int i=0;i<nids;i++) {
+		fileWoA << i+from << " " << ids[i] << " 0 0 " << sex[i];
+		for (int j=0;j<ntraits;j++) fileWoA << " " << 0;
+		// unwrap genotypes
+		for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+			Genotype = getGenotype(coding[csnp],sep);
+			// figure out the coding
+			fileWoA << " " << Genotype[gtMatrix[i][csnp]];
+			//fileWoA << " x" << Letter0 << Letter1 << Genotype[0] << Genotype[1] << Genotype[2] << Genotype[3];
+		}
+		// end unwrap
+		fileWoA << endl;
+	}
+	//Rprintf("C\n");
+	fileWoA.close();
+
+	//Rprintf("oooo!" );
+	//for (int i=0;i<10;i++) Rprintf("%d ",sex[i]);
+	//Rprintf("oooo!\n" );
+
+	delete [] Genotype;
+
+	return R_NilValue;
+}
+
+
+#ifdef __cplusplus
+}
+#endif


Property changes on: pkg/GenABEL/src/GAlib/export_plink.cpp
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Added: pkg/GenABEL/src/GAlib/export_plink.h
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.h	                        (rev 0)
+++ pkg/GenABEL/src/GAlib/export_plink.h	2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,27 @@
+#ifndef __export_plink_H__
+#define __export_plink_H__
+
+#include <vector.h>
+#include <iostream>
+#include <fstream>
+#include <math.h>
+#include <Rdefines.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void get_snps_many(char *a, int *Nsnps, int *Nrows, int *b);
+
+std::string* getGenotype(std::string coding, std::string sep);
+
+SEXP export_plink(SEXP idnames, SEXP snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP from, SEXP to,
+		SEXP male, SEXP traits, SEXP pedfilename, SEXP plink, SEXP append);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+


Property changes on: pkg/GenABEL/src/GAlib/export_plink.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: pkg/GenABEL/src/GAlib/gwaa.c
===================================================================
--- pkg/GenABEL/src/GAlib/gwaa.c	2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/src/GAlib/gwaa.c	2011-09-16 11:30:28 UTC (rev 784)
@@ -895,11 +895,13 @@
 	}
 }
 
+
 //
-//new new MMSCORE (2011.07.15)
+// new MMSCORE (2011.09.16)
+// efficient vector-matrix multiplication
 //
 
-void mmscore_20110715(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+void mmscore_20110916(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
 {
 	int nsnps = (*Nsnps);
 	int nstra = (*Nstra);
@@ -945,10 +947,29 @@
 				phctr[i] = pheno[i] - ePH[cstr];
 			}
 		}
+		/**
 		for (i=0;i<nids;i++) {
 			svec[i] = 0.;
-			for (j=0;j<nids;j++) svec[i] += gtctr[j]*invS[nids*i+j];
+			int offS = nids*i;
+			for (j=0;j<nids;j++) {
+				svec[i] += gtctr[j]*invS[offS+j];
+			}
 		}
+		**/
+		for (i=0;i<nids;i++) svec[i] = 0.;
+
+		double temp1,temp2;
+		for (i=0;i<nids;i++) {
+			int offS = nids*i;
+			temp1 = gtctr[i];
+			temp2 = 0.;
+			for (j=(i+1);j<nids;j++) {
+				svec[j] += temp1*invS[offS+j];
+				temp2 += invS[offS+j]*gtctr[j];
+			}
+			svec[i] += temp2 + invS[offS+i]*gtctr[i];
+		}
+
 		if (Ttotg == 0) {
 			chi2[igt] = 0.;
 			chi2[igt+nsnps] = 0.;
@@ -975,6 +996,235 @@
 	}
 }
 
+
+//
+// DOES NOT WORK!!!
+//
+
+void mmscore_20110915(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+{
+	int nsnps = (*Nsnps);
+	int nstra = (*Nstra);
+	int nids = (*Nids);
+	int gt[nids];
+	int i, j, cstr, igt, i1=1;
+	int nbytes;
+	double Ttotg,dgt,totg[nstra],eG[nstra],ePH[nstra],svec[nids],gtctr[nids];
+	double Tsg, sg[nstra],sph[nstra],nIndividuals[nstra];
+	double u, v;
+	if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+	// compute strata-specific means for y
+	// compute SUMi Yi SUMj OMEGAij and SUMij OMEGAij
+	for (j=0;j<nstra;j++) {
+		sph[j] = 0.;
+		nIndividuals[j] = 0.;
+		ePH[j] = 0.;
+	}
+	double sumOmegaJ[nids];
+	double YiSumOmegaij[nids];
+	double sumOmega = 0.;
+	double sumYiSumOmegaij = 0.;
+	for (i=0;i<nids;i++) {
+		sumOmegaJ[i] = 0.;
+		YiSumOmegaij[i] = 0.;
+		for (j=0;j<nids;j++) {
+			cstr = stra[j];
+			sph[cstr] += pheno[j];
+			nIndividuals[cstr]++;
+			sumOmegaJ[i] += invS[nids*i+j];
+		}
+		sumOmega += sumOmegaJ[i];
+		YiSumOmegaij[j] = pheno[i]*sumOmegaJ[i];
+		sumYiSumOmegaij += YiSumOmegaij[j];
+		ePH[j] = sph[j]/nIndividuals[j];
+	}
+
+	for (igt=0;igt<nsnps;igt++) {
+		get_snps_many(gdata+nbytes*igt,Nids,&i1,gt);
+		for (j=0;j<nstra;j++) {
+			totg[j] = 0.;
+			sg[j] = 0.;
+		}
+		Ttotg=Tsg=0.;
+		for (i=0;i<nids;i++)
+			if (gt[i] != 0) {
+				cstr = stra[i];
+				dgt = 1.*gt[i] - 1.0;
+				totg[cstr]+=1.0;
+				Ttotg += 1.0;
+				sg[cstr] += dgt;
+				Tsg += dgt;
+			}
+		chi2[igt+6*nsnps]=Ttotg;
+		for (j=0;j<nstra;j++) {
+			eG[j] = sg[j]/totg[j];
+		}
+		for (i=0;i<nids;i++) {
+			cstr = stra[i];
+			if (gt[i] == 0) {
+				gtctr[i] = eG[cstr];
+			} else {
+				gtctr[i] = 1.*gt[i] - 1.0;
+			}
+		}
+		double sumYSumGOmega = 0.;
+		double sumGSumGOmega = 0.;
+		double sumGSumOmega = 0.;
+		double SumSumGOmega = 0.;
+		double summand2y, summand3y, summand2g, summand3g, summand4;
+		summand2y = summand3y = summand2g = summand3g = summand4 = 0.;
+		for (i=0;i<nids;i++) {
+			cstr = stra[i];
+			svec[i] = 0.;
+			for (j=0;j<nids;j++) {
+				svec[i] += gtctr[j]*invS[nids*i+j];
+			}
+			sumYSumGOmega += pheno[i]*svec[i];
+			sumGSumGOmega += gtctr[i]*svec[i];
+			sumGSumOmega += gtctr[i]*sumOmegaJ[i];
+			SumSumGOmega += svec[i];
+			summand2y += eG[cstr]*YiSumOmegaij[i];
+			summand2g += eG[cstr]*gtctr[i]*sumOmegaJ[i];
+			summand3y += ePH[cstr]*svec[i];
+			summand3g += eG[cstr]*svec[i];
+			summand4 += ePH[cstr]*eG[cstr]*sumOmega;
+		}
+		if (Ttotg == 0) {
+			chi2[igt] = 0.;
+			chi2[igt+nsnps] = 0.;
+			chi2[igt+2*nsnps] = 0.0001;
+			chi2[igt+3*nsnps] = 0.;
+			chi2[igt+4*nsnps] = 0.;
+			chi2[igt+5*nsnps] = 0.;
+		} else {
+			u = v = 0.;
+			u = sumYSumGOmega - summand2y - summand3y + summand4;
+			v = sumGSumGOmega - summand2g - summand3g + summand4;
+			if (v<1.e-16) {
+				chi2[igt]=0.;
+				chi2[igt+3*nsnps]=0.;
+			} else {
+				chi2[igt]=u*u/v;
+				chi2[igt+3*nsnps]=u/v;
+			}
+		}
+	}
+}
+
+//
+// mmscore for the case of no stratification
+// DOES NOT WORK (AND NO POINT)
+//
+
+void mmscore_20110915_nostrat(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+{
+	int nsnps = (*Nsnps);
+	int nstra = (*Nstra);
+	if (nstra!=1) return 100;
+	int nids = (*Nids);
+	int gt[nids];
+	int i, j, igt, i1=1;
+	int nbytes;
+	double Ttotg,dgt,svec[nids],gtctr[nids];
+	double Tsg;
+	double u, v;
+	if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+	// compute strata-specific means for y
+	// compute SUMi Yi SUMj OMEGAij and SUMij OMEGAij
+	double OmegaX[nids*nids*4];
+	double sumOmegaJ[nids];
+	double YiSumOmegaij[nids];
+	double sumOmega = 0.;
+	double sumYiSumOmegaij = 0.;
+	double sumY = 0.;
+	for (i=0;i<nids;i++) {
+		sumOmegaJ[i] = 0.;
+		YiSumOmegaij[i] = 0.;
+		for (j=0;j<nids;j++) {
+			sumOmegaJ[i] += invS[nids*i+j];
+			for (int omegaMult=0;omegaMult<4;omegaMult++) {
+				OmegaX[nids*i+j+omegaMult*nids*nids] = invS[nids*i+j] * (1. * (omegaMult-1));
+			}
+		}
+		sumOmega += sumOmegaJ[i];
+		YiSumOmegaij[j] = pheno[i]*sumOmegaJ[i];
+		sumYiSumOmegaij += YiSumOmegaij[j];
+		sumY += pheno[j];
+	}
+	double meanY = sumY/(1.*nids);
+
+	for (igt=0;igt<nsnps;igt++) {
+		get_snps_many(gdata+nbytes*igt,Nids,&i1,gt);
+		Ttotg=Tsg=0.;
+		for (i=0;i<nids;i++)
+			if (gt[i] != 0) {
+				gtctr[i] = 1.*gt[i] - 1.0;
+				Ttotg += 1.0;
+				Tsg += gtctr[i];
+			}
+		double meanG = Tsg/Ttotg;
+
+		for (i=0;i<nids;i++) for (j=0;j<nids;j++) OmegaX[nids*i+j]=meanG;//*invS[nids*i+j];
+
+		chi2[igt+6*nsnps]= Ttotg;
+		for (i=0;i<nids;i++) {
+			if (gt[i] == 0) {
+				gtctr[i] = meanG;
+			}
+		}
+		double sumYSumGOmega = 0.;
+		double sumGSumGOmega = 0.;
+		double sumGSumOmega = 0.;
+		double SumSumGOmega = 0.;
+		double summand2y, summand3y, summand2g, summand3g, summand4;
+		summand2y = summand3y = summand2g = summand3g = summand4 = 0.;
+		int gtOffs[4];
+		gtOffs[0]=0;gtOffs[1]=nids*nids;gtOffs[2]=nids*nids*2;gtOffs[3]=nids*nids*3;
+		for (i=0;i<nids;i++) {
+			svec[i] = 0.;
+//			double * offS = &invS[nids*i];
+			int offS = nids*i;
+			for (j=0;j<nids;j++) {
+//				svec[i] += gtctr[j]*offS[j];
+				svec[i] += gtctr[j]*invS[offS+j];
+//				svec[i] += gtctr[j]*invS[nids*i+j];
+//				svec[i] += OmegaX[offS+j+gtOffs[gt[j]]];
+			}
+			sumYSumGOmega += pheno[i]*svec[i];
+			sumGSumGOmega += gtctr[i]*svec[i];
+			sumGSumOmega += gtctr[i]*sumOmegaJ[i];
+			SumSumGOmega += svec[i];
+		}
+		summand2g = meanG*sumGSumOmega;
+		summand2y = meanG*sumYiSumOmegaij;
+		summand3y = meanY*SumSumGOmega;
+		summand3g = meanG*SumSumGOmega;
+		summand4 = meanY*meanG*sumOmega;
+		if (Ttotg == 0) {
+			chi2[igt] = 0.;
+			chi2[igt+nsnps] = 0.;
+			chi2[igt+2*nsnps] = 0.0001;
+			chi2[igt+3*nsnps] = 0.;
+			chi2[igt+4*nsnps] = 0.;
+			chi2[igt+5*nsnps] = 0.;
+		} else {
+			u = v = 0.;
+			u = sumYSumGOmega - summand2y - summand3y + summand4;
+			v = sumGSumGOmega - summand2g - summand3g + summand4;
+			if (v<1.e-16) {
+				chi2[igt]=0.;
+				chi2[igt+3*nsnps]=0.;
+			} else {
+				chi2[igt]=u*u/v;
+				chi2[igt+3*nsnps]=u/v;
+			}
+		}
+	}
+}
+
+
 void grammar(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2) 
 {
 	int nsnps = (*Nsnps);



More information about the Genabel-commits mailing list