[Genabel-commits] r784 - in pkg/GenABEL: . R inst/unitTests man src/GAlib
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Sep 16 13:30:28 CEST 2011
Author: yurii
Date: 2011-09-16 13:30:28 +0200 (Fri, 16 Sep 2011)
New Revision: 784
Added:
pkg/GenABEL/inst/unitTests/runit.exports.R
pkg/GenABEL/inst/unitTests/runit.mmscore.R
pkg/GenABEL/src/GAlib/export_plink.cpp
pkg/GenABEL/src/GAlib/export_plink.h
Modified:
pkg/GenABEL/CHANGES.LOG
pkg/GenABEL/DESCRIPTION
pkg/GenABEL/R/export.merlin.R
pkg/GenABEL/R/mmscore.R
pkg/GenABEL/R/zzz.R
pkg/GenABEL/man/export.merlin.Rd
pkg/GenABEL/man/mmscore.Rd
pkg/GenABEL/src/GAlib/gwaa.c
Log:
speeding-up 'mmscore' by x1.8 through more efficient vector-matrix
product computations
export.merlin, export.plink now runs much faster (exports on C++ code)
Modified: pkg/GenABEL/CHANGES.LOG
===================================================================
--- pkg/GenABEL/CHANGES.LOG 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/CHANGES.LOG 2011-09-16 11:30:28 UTC (rev 784)
@@ -1,5 +1,10 @@
-*** v. 1.7-0 (2011.09.10)
+*** v. 1.7-0 (2011.09.16)
+speeding-up 'mmscore' by x1.8 through more efficient vector-matrix
+product computations
+
+export.merlin, export.plink now runs much faster (exports on C++ code)
+
Deal with exceptional situation in merge.snp.data / monomorphic part;
added case of no overlap in SNPs (skip monomorphic staff then)
Modified: pkg/GenABEL/DESCRIPTION
===================================================================
--- pkg/GenABEL/DESCRIPTION 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/DESCRIPTION 2011-09-16 11:30:28 UTC (rev 784)
@@ -2,7 +2,7 @@
Type: Package
Title: genome-wide SNP association analysis
Version: 1.7-0
-Date: 2011-09-10
+Date: 2011-09-16
Author: GenABEL developers
Maintainer: GenABEL developers <genabel.project at gmail.com>
Depends: R (>= 2.10.0), methods, MASS
Modified: pkg/GenABEL/R/export.merlin.R
===================================================================
--- pkg/GenABEL/R/export.merlin.R 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/export.merlin.R 2011-09-16 11:30:28 UTC (rev 784)
@@ -1,8 +1,18 @@
"export.merlin" <-
-function(data,pedfile="merlin.ped",datafile="merlin.dat",
- mapfile="merlin.map",format="merlin",fixstrand="no",
- extendedmap=TRUE,traits=1, order = TRUE, stepids = 100) {
+ function(data,pedfile="merlin.ped",datafile="merlin.dat",
+ mapfile="merlin.map",format="merlin",fixstrand="no",
+ extendedmap=TRUE,traits=1, order = TRUE, stepids = 100, dpieceFun="new") {
if (!is(data,"gwaa.data")) stop("Data argument should be of gwaa.data-class")
+ dpieceFunOptions <- c("old","new")
+ dpieceFunInt <- match(dpieceFun,dpieceFunOptions,nomatch=0)
+ if (dpieceFunInt<=0) {
+ out <- paste("dpieceFun argument should be one of",dpieceFunOptions,"\n")
+ stop(out)
+ } else {
+ if (dpieceFunInt==1) dump.piece=dump.piece
+ else if (dpieceFunInt==2) dump.piece=dump.piece.New
+ else stop("weird staff!")
+ }
formats <- c("merlin","plink")
if (!(match(format,formats,nomatch=0)>0)) {
out <- paste("format argument should be one of",formats,"\n")
@@ -15,27 +25,27 @@
}
if (order) {
ord <- sortmap.internal(chromosome(data),map(data))
- data <- data[,ord$ix]
+ if (any(ord$ix != c(1:length(ord$ix)))) {data <- data[,ord$ix]; gc()}
}
if (fixstrand != "no") {
-###### as.raw(1) == "+"
-###### as.raw(2) == "-"
+ ###### as.raw(1) == "+"
+ ###### as.raw(2) == "-"
if (fixstrand == "-") {tos <- as.raw(1);fos<-as.raw(2);}
- else if (fixstrand == "+") {tos <- as.raw(2);fos <- as.raw(1)}
- else {stop("strange strand...")}
+ else if (fixstrand == "+") {tos <- as.raw(2);fos <- as.raw(1)}
+ else {stop("strange strand...")}
stf <- which(as.raw(data at gtdata@strand) == tos)
if (length(stf)>0) {
- revs <- alleleID.revstrand()
- savnam <- names(data at gtdata@strand)
+ revs <- alleleID.revstrand()
+ savnam <- names(data at gtdata@strand)
#
# data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(data at gtdata@coding[stf])]))
#
# ugly fix --
#
- data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(as.raw(data at gtdata@coding[stf]))]))
- names(data at gtdata@coding) <- savnam
- data at gtdata@strand[stf] <- new("snp.strand",rep(fos,length(stf)))
- names(data at gtdata@strand) <- savnam
+ data at gtdata@coding[stf] <- new("snp.coding",as.raw(revs[as.character(as.raw(data at gtdata@coding[stf]))]))
+ names(data at gtdata@coding) <- savnam
+ data at gtdata@strand[stf] <- new("snp.strand",rep(fos,length(stf)))
+ names(data at gtdata@strand) <- savnam
}
}
bstp <- stepids #100
@@ -43,10 +53,12 @@
steps <- seq(from=0,to=data at gtdata@nids,by=bstp)
if (data at gtdata@nids != steps[length(steps)]) steps[length(steps)+1] <- data at gtdata@nids
dump.piece(data=data,from=1,to=steps[2],traits=traits,
- pedfile=pedfile,append=F,format=format)
+ pedfile=pedfile,append=FALSE,format=format)
+ cat("dumped ids",1,"to",steps[2],"\n")
for (jjj in c(2:(length(steps)-1))) {
dump.piece(data=data,from=(steps[jjj]+1),to=(steps[jjj+1]),traits=traits,
- pedfile=pedfile,append=T,format=format)
+ pedfile=pedfile,append=TRUE,format=format)
+ cat("dumped ids",steps[jjj]+1,"to",steps[jjj+1],"\n")
}
} else {
dump.piece(data=data,from=1,to=data at gtdata@nids,traits=traits,
@@ -98,3 +110,18 @@
}
write.table(x,file=pedfile,col.n=FALSE,row.n=FALSE,quote=FALSE,append=append,sep=" ")
}
+
+dump.piece.New <- function(data,fromid,toid,traits,pedfile,append,format="merlin") {
+ if (toid < fromid) stop("toid<fromid")
+ # ugly line -- should be in the C++ code
+ if (!append) unlink(pedfile);
+ if (format=="plink") {plink <- TRUE;} else {plink <- FALSE;}
+ #data <- data[c(fromid:toid),]
+ result <- .Call("export_plink",as.character(idnames(data)[c(fromid:toid)]),
+ as.raw(gtdata(data)@gtps), as.integer(nsnps(data)), as.integer(nids(data)),
+ as.character(coding(data)), as.integer(fromid), as.integer(toid),
+ as.integer(male(data)), as.integer(traits), as.character(pedfile),
+ as.logical(plink), as.logical(append))
+ #
+ return(1)
+}
Modified: pkg/GenABEL/R/mmscore.R
===================================================================
--- pkg/GenABEL/R/mmscore.R 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/mmscore.R 2011-09-16 11:30:28 UTC (rev 784)
@@ -1,22 +1,30 @@
"mmscore" <-
-function(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,
- bcast=10,clambda=TRUE,propPs=1.0) {
- if (is(data,"gwaa.data"))
+ function(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,
+ bcast=10,clambda=TRUE,propPs=1.0,cppFun="new") {
+ if (is(data,"gwaa.data"))
{
checkphengen(data)
data <- data at gtdata
}
- if (!is(data,"snp.data")) {
+ if (!is(data,"snp.data")) {
stop("wrong data class: should be gwaa.data or snp.data")
- }
+ }
if (class(h2object) != "polygenic") stop("h2object must be of polygenic-class")
if (!missing(snpsubset)) data <- data[,snpsubset]
if (!missing(idsubset)) data <- data[idsubset,]
if (missing(strata)) {nstra=1; strata <- rep(0,data at nids)}
-
+
if (length(strata)!=data at nids) stop("Strata variable and the data do not match in length")
if (any(is.na(strata))) stop("Strata variable contains NAs")
-
+
+ cppFunOptions <- c("old","new")
+ fMatch <- match(cppFun,cppFunOptions,nomatch=0)
+ if (fMatch==0) {
+ stop(paste("cppFun must be one of",cppFunOptions))
+ } else {
+ calledFun = c("mmscore_20090127","mmscore_20110916")[fMatch]
+ }
+
tmeas <- h2object$measuredIDs
resid <- h2object$residualY
if (any(tmeas == FALSE)) {
@@ -35,20 +43,20 @@
rm(tstr)
}
nstra <- length(levels(as.factor(strata)))
-
+
lenn <- data at nsnps;
out <- list()
for (j in c(1:(times+1*(times>1)))) {
if (j>1) resid <- sample(resid,replace=FALSE)
- chi2 <- .C("mmscore_20090127",as.raw(data at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(data at nids),as.integer(data at nsnps), as.integer(nstra), as.integer(strata), chi2 = double(7*data at nsnps), PACKAGE="GenABEL")$chi2
+ chi2 <- .C(calledFun,as.raw(data at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(data at nids),as.integer(data at nsnps), as.integer(nstra), as.integer(strata), chi2 = double(7*data at nsnps), PACKAGE="GenABEL")$chi2
if (any(data at chromosome=="X")) {
- ogX <- data[,data at chromosome=="X"]
- sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
- chi2X <- .C("mmscore_20090127",as.raw(ogX at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(7*ogX at nsnps), PACKAGE="GenABEL")$chi2
- revec <- (data at chromosome=="X")
- revec <- rep(revec,6)
- chi2 <- replace(chi2,revec,chi2X)
- rm(ogX,chi2X,revec);gc(verbose=FALSE)
+ ogX <- data[,data at chromosome=="X"]
+ sxstra <- strata; sxstra[ogX at male==1] <- strata[ogX at male==1]+nstra
+ chi2X <- .C(calledFun,as.raw(ogX at gtps),as.double(resid),as.double(h2object$InvSigma),as.integer(ogX at nids),as.integer(ogX at nsnps), as.integer(nstra*2), as.integer(sxstra), chi2 = double(7*ogX at nsnps), PACKAGE="GenABEL")$chi2
+ revec <- (data at chromosome=="X")
+ revec <- rep(revec,6)
+ chi2 <- replace(chi2,revec,chi2X)
+ rm(ogX,chi2X,revec);gc(verbose=FALSE)
}
if (j == 1) {
chi2.1df <- chi2[1:lenn];
@@ -105,7 +113,7 @@
}
}
if (times > bcast) cat("\n")
-
+
if (times>1) {
P1df <- pr.1df/times
P1df <- replace(P1df,(P1df==0),1/(1+times))
Modified: pkg/GenABEL/R/zzz.R
===================================================================
--- pkg/GenABEL/R/zzz.R 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/R/zzz.R 2011-09-16 11:30:28 UTC (rev 784)
@@ -1,6 +1,6 @@
.onLoad <- function(lib, pkg) {
GenABEL.version <- "1.7-0"
- cat("GenABEL v.",GenABEL.version,"(September 10, 2011) loaded\n")
+ cat("GenABEL v.",GenABEL.version,"(September 16, 2011) loaded\n")
# check for updates and news
address <- c(
Added: pkg/GenABEL/inst/unitTests/runit.exports.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.exports.R (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.exports.R 2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,65 @@
+### --- Test setup ---
+#
+# regression tests
+#
+
+if(FALSE) {
+ ## Not really needed, but can be handy when writing tests
+ library(RUnit)
+ library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.exports <- function()
+{
+ data(ge03d2.clean)
+ nTestIds <- sample(c(10:min(100,nids(ge03d2.clean))),1)
+ nTestSnps <- sample(c(10:min(1000,nsnps(ge03d2.clean))),1)
+ dta <- ge03d2.clean[sample(1:nids(ge03d2.clean),nTestIds),sample(1:nsnps(ge03d2.clean),nTestSnps)]
+
+ export.plink(dta,filebasename="tmpOld",dpieceFun="old")
+ export.plink(dta,filebasename="tmpNew",dpieceFun="new")
+ xO <- read.table(file="tmpOld.ped",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.ped",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+ xO <- read.table(file="tmpOld.map",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.map",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+ xO <- read.table(file="tmpOld.map.ext",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.map.ext",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+
+ unlink("tmpOld*")
+ unlink("tmpNew*")
+
+ export.merlin(dta,pedfile="tmpOld.ped",datafile="tmpOld.dat",
+ mapfile="tmpOld.map",dpieceFun="old")
+ export.merlin(dta,pedfile="tmpNew.ped",datafile="tmpNew.dat",
+ mapfile="tmpNew.map",dpieceFun="new")
+ xO <- read.table(file="tmpOld.ped",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.ped",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+ xO <- read.table(file="tmpOld.map",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.map",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+ xO <- read.table(file="tmpOld.map.ext",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.map.ext",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+ xO <- read.table(file="tmpOld.dat",head=FALSE,strings=FALSE)
+ xN <- read.table(file="tmpNew.dat",head=FALSE,strings=FALSE)
+ checkIdentical(xN,xO)
+
+ unlink("tmpOld*")
+ unlink("tmpNew*")
+
+}
Property changes on: pkg/GenABEL/inst/unitTests/runit.exports.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: pkg/GenABEL/inst/unitTests/runit.mmscore.R
===================================================================
--- pkg/GenABEL/inst/unitTests/runit.mmscore.R (rev 0)
+++ pkg/GenABEL/inst/unitTests/runit.mmscore.R 2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,32 @@
+### --- Test setup ---
+#
+# regression tests
+#
+
+if(FALSE) {
+ ## Not really needed, but can be handy when writing tests
+ library(RUnit)
+ library(GenABEL)
+}
+
+### do not run
+#stop("SKIP THIS TEST")
+###
+
+### ---- common functions and data -----
+
+#source(paste("../inst/unitTests/shared_functions.R"))
+#source(paste(path,"/shared_functions.R",sep=""))
+
+### --- Test functions ---
+
+test.exports <- function()
+{
+ data(ge03d2.clean)
+ dta <- ge03d2.clean[1:200,autosomal(ge03d2.clean)[1:3000]]
+ gkin <- ibs(dta[,1:2000],w="freq")
+ h2 <- polygenic(height,data=dta,kin=gkin)
+ xOld <- mmscore(h2,dta,cppFun="old")
+ xNew <- mmscore(h2,dta,cppFun="new")
+ checkEquals(results(xNew),results(xOld))
+}
Property changes on: pkg/GenABEL/inst/unitTests/runit.mmscore.R
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/GenABEL/man/export.merlin.Rd
===================================================================
--- pkg/GenABEL/man/export.merlin.Rd 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/man/export.merlin.Rd 2011-09-16 11:30:28 UTC (rev 784)
@@ -6,7 +6,8 @@
}
\usage{
export.merlin(data, pedfile = "merlin.ped", datafile = "merlin.dat", mapfile = "merlin.map",
- format = "merlin", fixstrand = "no", extendedmap = TRUE, traits = 1, order = TRUE, stepids = 100)
+ format = "merlin", fixstrand = "no", extendedmap = TRUE, traits = 1, order = TRUE,
+ stepids = 100, dpieceFun = "new")
}
%- maybe also 'usage' for other objects documented here.
\arguments{
@@ -22,6 +23,7 @@
\item{traits}{How many fake traits to insert before first column of marker data}
\item{order}{Should output be ordered by chromosome and position}
\item{stepids}{make this larger for faster processing and smaller for lower RAM use}
+ \item{dpieceFun}{function used to dump data. Do use default.}
}
\details{
The use is straightforward, with only the "fixstrand" option requiring some
Modified: pkg/GenABEL/man/mmscore.Rd
===================================================================
--- pkg/GenABEL/man/mmscore.Rd 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/man/mmscore.Rd 2011-09-16 11:30:28 UTC (rev 784)
@@ -6,7 +6,7 @@
in samples of related individuals
}
\usage{
-mmscore(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,bcast=10,clambda=TRUE,propPs=1.0)
+mmscore(h2object,data,snpsubset,idsubset,strata,times=1,quiet=FALSE,bcast=10,clambda=TRUE,propPs=1.0,cppFun="new")
}
\arguments{
\item{h2object}{An object returned by \code{\link{polygenic}} polygenic mixed model analysis
@@ -36,6 +36,7 @@
If a numeric value is provided, it is used as a correction factor.}
\item{propPs}{proportion of non-corrected P-values used to estimate the inflation factor Lambda,
passed directly to the \code{\link{estlambda}}}
+ \item{cppFun}{Serves for testing purposes; do use default.}
}
\details{
Score test is performed using the formula
Added: pkg/GenABEL/src/GAlib/export_plink.cpp
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.cpp (rev 0)
+++ pkg/GenABEL/src/GAlib/export_plink.cpp 2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,113 @@
+#include "export_plink.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+std::string* getGenotype(std::string coding,std::string sep)
+{
+ std::string* Genotype = new (std::nothrow) std::string [4];
+ std::string Letter0 = coding.substr(0,1);
+ std::string Letter1 = coding.substr(1,1);
+ Genotype[0] = "0"+sep+"0";
+ Genotype[1] = Letter0+sep+Letter0;
+ Genotype[2] = Letter0+sep+Letter1;
+ Genotype[3] = Letter1+sep+Letter1;
+ return Genotype;
+}
+
+SEXP export_plink(SEXP Ids, SEXP Snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP From, SEXP To,
+ SEXP Male, SEXP Traits, SEXP Pedfilename, SEXP Plink, SEXP Append)
+{
+ vector<unsigned short int> sex;
+ unsigned short int sx;
+ for(unsigned int i=0;i<((unsigned int) length(Male));i++) {
+ sx = INTEGER(Male)[i];
+ if (sx==0) sx=2;
+ sex.push_back(sx);
+ }
+
+ vector<std::string> ids;
+ for(unsigned int i=0;i<((unsigned int) length(Ids));i++)
+ ids.push_back(CHAR(STRING_ELT(Ids,i)));
+
+ vector<std::string> coding;
+ for(unsigned int i=0;i<((unsigned int) length(Coding));i++)
+ coding.push_back(CHAR(STRING_ELT(Coding,i)));
+
+ //Rprintf("0\n");
+ unsigned int nsnps = INTEGER(Nsnps)[0];
+ int from = INTEGER(From)[0];
+ int to = INTEGER(To)[0];
+ int nids = to - from + 1;
+ int nidsTotal = INTEGER(NidsTotal)[0];
+ int ntraits = INTEGER(Traits)[0];
+ bool append = LOGICAL(Append)[0];
+ bool plink = LOGICAL(Plink)[0];
+ std::string filename = CHAR(STRING_ELT(Pedfilename,0));
+ std::ofstream fileWoA;
+ int gtint[nidsTotal];
+ int ieq1 = 1;
+ char * snpdata = (char *) RAW(Snpdata);
+ //Rprintf("nsnps=%d\n",nsnps);
+ //Rprintf("nids=%d\n",nids);
+
+ char gtMatrix[nids][nsnps];
+ //Rprintf("1\n");
+ std::string* Genotype;
+ std::string sep="/";
+ int nbytes;
+
+ //Rprintf("nsnps=%d\n",nsnps);
+ //Rprintf("nids=%d\n",nids);
+
+ if ((nids % 4) == 0) nbytes = nidsTotal/4; else nbytes = ceil(1.*nidsTotal/4.);
+
+ if (plink) sep=" ";
+
+ if (append)
+ fileWoA.open(filename.c_str(),std::fstream::app);
+ else
+ fileWoA.open(filename.c_str(),std::fstream::trunc);
+
+ //Rprintf("A\n");
+ for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+ // collect SNP data
+ get_snps_many(snpdata+nbytes*csnp, &nidsTotal, &ieq1, gtint);
+ for (int iii=from-1;iii<to;iii++) {
+ //Rprintf(" %d",gtint[iii]);
+ gtMatrix[iii-from+1][csnp]=gtint[iii];
+ }
+ //Rprintf("\n");
+ }
+ //Rprintf("B\n");
+ for (int i=0;i<nids;i++) {
+ fileWoA << i+from << " " << ids[i] << " 0 0 " << sex[i];
+ for (int j=0;j<ntraits;j++) fileWoA << " " << 0;
+ // unwrap genotypes
+ for (unsigned int csnp=0;csnp<nsnps;csnp++) {
+ Genotype = getGenotype(coding[csnp],sep);
+ // figure out the coding
+ fileWoA << " " << Genotype[gtMatrix[i][csnp]];
+ //fileWoA << " x" << Letter0 << Letter1 << Genotype[0] << Genotype[1] << Genotype[2] << Genotype[3];
+ }
+ // end unwrap
+ fileWoA << endl;
+ }
+ //Rprintf("C\n");
+ fileWoA.close();
+
+ //Rprintf("oooo!" );
+ //for (int i=0;i<10;i++) Rprintf("%d ",sex[i]);
+ //Rprintf("oooo!\n" );
+
+ delete [] Genotype;
+
+ return R_NilValue;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
Property changes on: pkg/GenABEL/src/GAlib/export_plink.cpp
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: pkg/GenABEL/src/GAlib/export_plink.h
===================================================================
--- pkg/GenABEL/src/GAlib/export_plink.h (rev 0)
+++ pkg/GenABEL/src/GAlib/export_plink.h 2011-09-16 11:30:28 UTC (rev 784)
@@ -0,0 +1,27 @@
+#ifndef __export_plink_H__
+#define __export_plink_H__
+
+#include <vector.h>
+#include <iostream>
+#include <fstream>
+#include <math.h>
+#include <Rdefines.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void get_snps_many(char *a, int *Nsnps, int *Nrows, int *b);
+
+std::string* getGenotype(std::string coding, std::string sep);
+
+SEXP export_plink(SEXP idnames, SEXP snpdata, SEXP Nsnps, SEXP NidsTotal, SEXP Coding, SEXP from, SEXP to,
+ SEXP male, SEXP traits, SEXP pedfilename, SEXP plink, SEXP append);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
Property changes on: pkg/GenABEL/src/GAlib/export_plink.h
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: pkg/GenABEL/src/GAlib/gwaa.c
===================================================================
--- pkg/GenABEL/src/GAlib/gwaa.c 2011-09-14 18:37:31 UTC (rev 783)
+++ pkg/GenABEL/src/GAlib/gwaa.c 2011-09-16 11:30:28 UTC (rev 784)
@@ -895,11 +895,13 @@
}
}
+
//
-//new new MMSCORE (2011.07.15)
+// new MMSCORE (2011.09.16)
+// efficient vector-matrix multiplication
//
-void mmscore_20110715(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+void mmscore_20110916(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
{
int nsnps = (*Nsnps);
int nstra = (*Nstra);
@@ -945,10 +947,29 @@
phctr[i] = pheno[i] - ePH[cstr];
}
}
+ /**
for (i=0;i<nids;i++) {
svec[i] = 0.;
- for (j=0;j<nids;j++) svec[i] += gtctr[j]*invS[nids*i+j];
+ int offS = nids*i;
+ for (j=0;j<nids;j++) {
+ svec[i] += gtctr[j]*invS[offS+j];
+ }
}
+ **/
+ for (i=0;i<nids;i++) svec[i] = 0.;
+
+ double temp1,temp2;
+ for (i=0;i<nids;i++) {
+ int offS = nids*i;
+ temp1 = gtctr[i];
+ temp2 = 0.;
+ for (j=(i+1);j<nids;j++) {
+ svec[j] += temp1*invS[offS+j];
+ temp2 += invS[offS+j]*gtctr[j];
+ }
+ svec[i] += temp2 + invS[offS+i]*gtctr[i];
+ }
+
if (Ttotg == 0) {
chi2[igt] = 0.;
chi2[igt+nsnps] = 0.;
@@ -975,6 +996,235 @@
}
}
+
+//
+// DOES NOT WORK!!!
+//
+
+void mmscore_20110915(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+{
+ int nsnps = (*Nsnps);
+ int nstra = (*Nstra);
+ int nids = (*Nids);
+ int gt[nids];
+ int i, j, cstr, igt, i1=1;
+ int nbytes;
+ double Ttotg,dgt,totg[nstra],eG[nstra],ePH[nstra],svec[nids],gtctr[nids];
+ double Tsg, sg[nstra],sph[nstra],nIndividuals[nstra];
+ double u, v;
+ if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+ // compute strata-specific means for y
+ // compute SUMi Yi SUMj OMEGAij and SUMij OMEGAij
+ for (j=0;j<nstra;j++) {
+ sph[j] = 0.;
+ nIndividuals[j] = 0.;
+ ePH[j] = 0.;
+ }
+ double sumOmegaJ[nids];
+ double YiSumOmegaij[nids];
+ double sumOmega = 0.;
+ double sumYiSumOmegaij = 0.;
+ for (i=0;i<nids;i++) {
+ sumOmegaJ[i] = 0.;
+ YiSumOmegaij[i] = 0.;
+ for (j=0;j<nids;j++) {
+ cstr = stra[j];
+ sph[cstr] += pheno[j];
+ nIndividuals[cstr]++;
+ sumOmegaJ[i] += invS[nids*i+j];
+ }
+ sumOmega += sumOmegaJ[i];
+ YiSumOmegaij[j] = pheno[i]*sumOmegaJ[i];
+ sumYiSumOmegaij += YiSumOmegaij[j];
+ ePH[j] = sph[j]/nIndividuals[j];
+ }
+
+ for (igt=0;igt<nsnps;igt++) {
+ get_snps_many(gdata+nbytes*igt,Nids,&i1,gt);
+ for (j=0;j<nstra;j++) {
+ totg[j] = 0.;
+ sg[j] = 0.;
+ }
+ Ttotg=Tsg=0.;
+ for (i=0;i<nids;i++)
+ if (gt[i] != 0) {
+ cstr = stra[i];
+ dgt = 1.*gt[i] - 1.0;
+ totg[cstr]+=1.0;
+ Ttotg += 1.0;
+ sg[cstr] += dgt;
+ Tsg += dgt;
+ }
+ chi2[igt+6*nsnps]=Ttotg;
+ for (j=0;j<nstra;j++) {
+ eG[j] = sg[j]/totg[j];
+ }
+ for (i=0;i<nids;i++) {
+ cstr = stra[i];
+ if (gt[i] == 0) {
+ gtctr[i] = eG[cstr];
+ } else {
+ gtctr[i] = 1.*gt[i] - 1.0;
+ }
+ }
+ double sumYSumGOmega = 0.;
+ double sumGSumGOmega = 0.;
+ double sumGSumOmega = 0.;
+ double SumSumGOmega = 0.;
+ double summand2y, summand3y, summand2g, summand3g, summand4;
+ summand2y = summand3y = summand2g = summand3g = summand4 = 0.;
+ for (i=0;i<nids;i++) {
+ cstr = stra[i];
+ svec[i] = 0.;
+ for (j=0;j<nids;j++) {
+ svec[i] += gtctr[j]*invS[nids*i+j];
+ }
+ sumYSumGOmega += pheno[i]*svec[i];
+ sumGSumGOmega += gtctr[i]*svec[i];
+ sumGSumOmega += gtctr[i]*sumOmegaJ[i];
+ SumSumGOmega += svec[i];
+ summand2y += eG[cstr]*YiSumOmegaij[i];
+ summand2g += eG[cstr]*gtctr[i]*sumOmegaJ[i];
+ summand3y += ePH[cstr]*svec[i];
+ summand3g += eG[cstr]*svec[i];
+ summand4 += ePH[cstr]*eG[cstr]*sumOmega;
+ }
+ if (Ttotg == 0) {
+ chi2[igt] = 0.;
+ chi2[igt+nsnps] = 0.;
+ chi2[igt+2*nsnps] = 0.0001;
+ chi2[igt+3*nsnps] = 0.;
+ chi2[igt+4*nsnps] = 0.;
+ chi2[igt+5*nsnps] = 0.;
+ } else {
+ u = v = 0.;
+ u = sumYSumGOmega - summand2y - summand3y + summand4;
+ v = sumGSumGOmega - summand2g - summand3g + summand4;
+ if (v<1.e-16) {
+ chi2[igt]=0.;
+ chi2[igt+3*nsnps]=0.;
+ } else {
+ chi2[igt]=u*u/v;
+ chi2[igt+3*nsnps]=u/v;
+ }
+ }
+ }
+}
+
+//
+// mmscore for the case of no stratification
+// DOES NOT WORK (AND NO POINT)
+//
+
+void mmscore_20110915_nostrat(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
+{
+ int nsnps = (*Nsnps);
+ int nstra = (*Nstra);
+ if (nstra!=1) return 100;
+ int nids = (*Nids);
+ int gt[nids];
+ int i, j, igt, i1=1;
+ int nbytes;
+ double Ttotg,dgt,svec[nids],gtctr[nids];
+ double Tsg;
+ double u, v;
+ if ((nids % 4) == 0) nbytes = nids/4; else nbytes = ceil(1.*nids/4.);
+
+ // compute strata-specific means for y
+ // compute SUMi Yi SUMj OMEGAij and SUMij OMEGAij
+ double OmegaX[nids*nids*4];
+ double sumOmegaJ[nids];
+ double YiSumOmegaij[nids];
+ double sumOmega = 0.;
+ double sumYiSumOmegaij = 0.;
+ double sumY = 0.;
+ for (i=0;i<nids;i++) {
+ sumOmegaJ[i] = 0.;
+ YiSumOmegaij[i] = 0.;
+ for (j=0;j<nids;j++) {
+ sumOmegaJ[i] += invS[nids*i+j];
+ for (int omegaMult=0;omegaMult<4;omegaMult++) {
+ OmegaX[nids*i+j+omegaMult*nids*nids] = invS[nids*i+j] * (1. * (omegaMult-1));
+ }
+ }
+ sumOmega += sumOmegaJ[i];
+ YiSumOmegaij[j] = pheno[i]*sumOmegaJ[i];
+ sumYiSumOmegaij += YiSumOmegaij[j];
+ sumY += pheno[j];
+ }
+ double meanY = sumY/(1.*nids);
+
+ for (igt=0;igt<nsnps;igt++) {
+ get_snps_many(gdata+nbytes*igt,Nids,&i1,gt);
+ Ttotg=Tsg=0.;
+ for (i=0;i<nids;i++)
+ if (gt[i] != 0) {
+ gtctr[i] = 1.*gt[i] - 1.0;
+ Ttotg += 1.0;
+ Tsg += gtctr[i];
+ }
+ double meanG = Tsg/Ttotg;
+
+ for (i=0;i<nids;i++) for (j=0;j<nids;j++) OmegaX[nids*i+j]=meanG;//*invS[nids*i+j];
+
+ chi2[igt+6*nsnps]= Ttotg;
+ for (i=0;i<nids;i++) {
+ if (gt[i] == 0) {
+ gtctr[i] = meanG;
+ }
+ }
+ double sumYSumGOmega = 0.;
+ double sumGSumGOmega = 0.;
+ double sumGSumOmega = 0.;
+ double SumSumGOmega = 0.;
+ double summand2y, summand3y, summand2g, summand3g, summand4;
+ summand2y = summand3y = summand2g = summand3g = summand4 = 0.;
+ int gtOffs[4];
+ gtOffs[0]=0;gtOffs[1]=nids*nids;gtOffs[2]=nids*nids*2;gtOffs[3]=nids*nids*3;
+ for (i=0;i<nids;i++) {
+ svec[i] = 0.;
+// double * offS = &invS[nids*i];
+ int offS = nids*i;
+ for (j=0;j<nids;j++) {
+// svec[i] += gtctr[j]*offS[j];
+ svec[i] += gtctr[j]*invS[offS+j];
+// svec[i] += gtctr[j]*invS[nids*i+j];
+// svec[i] += OmegaX[offS+j+gtOffs[gt[j]]];
+ }
+ sumYSumGOmega += pheno[i]*svec[i];
+ sumGSumGOmega += gtctr[i]*svec[i];
+ sumGSumOmega += gtctr[i]*sumOmegaJ[i];
+ SumSumGOmega += svec[i];
+ }
+ summand2g = meanG*sumGSumOmega;
+ summand2y = meanG*sumYiSumOmegaij;
+ summand3y = meanY*SumSumGOmega;
+ summand3g = meanG*SumSumGOmega;
+ summand4 = meanY*meanG*sumOmega;
+ if (Ttotg == 0) {
+ chi2[igt] = 0.;
+ chi2[igt+nsnps] = 0.;
+ chi2[igt+2*nsnps] = 0.0001;
+ chi2[igt+3*nsnps] = 0.;
+ chi2[igt+4*nsnps] = 0.;
+ chi2[igt+5*nsnps] = 0.;
+ } else {
+ u = v = 0.;
+ u = sumYSumGOmega - summand2y - summand3y + summand4;
+ v = sumGSumGOmega - summand2g - summand3g + summand4;
+ if (v<1.e-16) {
+ chi2[igt]=0.;
+ chi2[igt+3*nsnps]=0.;
+ } else {
+ chi2[igt]=u*u/v;
+ chi2[igt+3*nsnps]=u/v;
+ }
+ }
+ }
+}
+
+
void grammar(char *gdata, double *pheno, double *invS, int *Nids, int *Nsnps, int *Nstra, int *stra, double *chi2)
{
int nsnps = (*Nsnps);
More information about the Genabel-commits
mailing list