[Genabel-commits] r1817 - in pkg/OmicABELnoMM: . OmicOUT doc examples src tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Sep 4 14:16:09 CEST 2014
Author: afrank
Date: 2014-09-04 14:16:09 +0200 (Thu, 04 Sep 2014)
New Revision: 1817
Added:
pkg/OmicABELnoMM/OmicOUT/
pkg/OmicABELnoMM/OmicOUT/main.cpp
Modified:
pkg/OmicABELnoMM/AUTHORS
pkg/OmicABELnoMM/ChangeLog
pkg/OmicABELnoMM/configure.ac
pkg/OmicABELnoMM/doc/howtocompile.txt
pkg/OmicABELnoMM/examples/CreateData.R
pkg/OmicABELnoMM/examples/XL.fvd
pkg/OmicABELnoMM/examples/XL.fvi
pkg/OmicABELnoMM/examples/XR.fvd
pkg/OmicABELnoMM/examples/XR.fvi
pkg/OmicABELnoMM/examples/Y.fvd
pkg/OmicABELnoMM/examples/Y.fvi
pkg/OmicABELnoMM/src/AIOwrapper.cpp
pkg/OmicABELnoMM/src/AIOwrapper.h
pkg/OmicABELnoMM/src/Algorithm.cpp
pkg/OmicABELnoMM/src/Algorithm.h
pkg/OmicABELnoMM/src/Definitions.h
pkg/OmicABELnoMM/src/Utility.cpp
pkg/OmicABELnoMM/src/Utility.h
pkg/OmicABELnoMM/src/main.cpp
pkg/OmicABELnoMM/tests/test.cpp
Log:
First user usable version. Several Bug-fixes. Added parameters to control thresholds of significance. Storing output as readable .txt and binary data. Both .txt and binary have different parametrization thresholds. Text-files should always store less results.Thresholds for text-files cannot include more results than the ones for binary files. Added bigger test files. Added program to transform binary result files to text files with parametrized thresholds. Added HPC functions for sum of squares calculations.
Modified: pkg/OmicABELnoMM/AUTHORS
===================================================================
--- pkg/OmicABELnoMM/AUTHORS 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/AUTHORS 2014-09-04 12:16:09 UTC (rev 1817)
@@ -1,2 +1,2 @@
-Alvaro Jesus Frank: Implementation
+Alvaro Frank: Implementation
Lennart C. Karssen: Minor updates, autotools integration
Modified: pkg/OmicABELnoMM/ChangeLog
===================================================================
--- pkg/OmicABELnoMM/ChangeLog 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/ChangeLog 2014-09-04 12:16:09 UTC (rev 1817)
@@ -0,0 +1,31 @@
+
+TODO
+---------
+Features:
+-Add exclusion lists for single sets of elements of phenotypes
+-Add exclusion lists for single sets of elements of genotypes
+-Compare ID lists of all dvi files to assure correct ordering
+-Allow for runtime dosage models
+
+Optimizations:
+-Remove individuals with covariates missing
+-Reduce memcpy overhead of XR and XR XL factors
+-Reduce computation time of XR and XR XL factors (do GEMMS)
+
+
+
+
+Changes
+-------------
+4 -9 - 2014
+
+First user usable version.
+Several Bug-fixes.
+Added parameters to control thresholds of significance.
+Storing output as readable .txt and binary data.
+Both .txt and binary have different parametrization thresholds.
+Text-files should always store less results.
+Thresholds for text-files cannot include more results than the ones for binary files.
+Added bigger test files.
+Added program to transform binary result files to text files with parametrized thresholds.
+Added HPC functions for sum of squares calculations.
\ No newline at end of file
Added: pkg/OmicABELnoMM/OmicOUT/main.cpp
===================================================================
--- pkg/OmicABELnoMM/OmicOUT/main.cpp (rev 0)
+++ pkg/OmicABELnoMM/OmicOUT/main.cpp 2014-09-04 12:16:09 UTC (rev 1817)
@@ -0,0 +1,483 @@
+#include <boost/math/special_functions/erf.hpp>
+
+#include <fstream>
+#include <stdint.h>
+#include <unistd.h>
+#include <limits.h>
+#include <queue>
+#include <list>
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <cstring>
+#include <string>
+#include <math.h>
+#include <getopt.h>
+
+
+using namespace std;
+
+struct result
+{
+ float T;
+ long double P;
+ float SE;
+ float B;
+ int AL_name_idx;
+ int AR_name_idx;
+};
+
+class resultH
+{
+ public:
+ int Y_name_idx;
+ int nUsed;
+ float nUsedPct;
+ float R2;
+ list< result > res_p;
+};
+
+void float32(float* out, const uint16_t in)
+{
+ uint32_t t1 = in;
+ t1 <<= 16;//convert to float again
+ t1 &= 0xffff0000;//keep sign and exponent only
+ *((uint32_t*)out) = t1;
+}
+
+void statistics(float b, float SE, int AL_name_idx, int AR_name_idx, resultH &res, double min_p_disp)
+{
+ long double ptemp;
+
+ long double t;
+ long double one_oversqrt2 = 0.7071067811865475244008443621048490392848359376884740;
+ float t1;
+
+ t=fabs(b/SE);
+
+ if( t < 1.6)
+ {
+ t1=4.4-t;
+ ptemp=0.5-0.1*t1*t;
+ }
+ else
+ {
+ ptemp=erfc(t*one_oversqrt2);
+ }
+
+ result rdata;
+
+ rdata.B = b;
+ rdata.T = t;
+ rdata.P = ptemp;
+ rdata.SE = SE;
+
+ rdata.AR_name_idx = AR_name_idx;
+ rdata.AL_name_idx = AL_name_idx;
+
+ if((double)ptemp < min_p_disp)
+ {
+ res.res_p.push_back(rdata);
+
+ }
+
+}
+
+
+void parse_params(int argc, char *argv[], string &source, string &dest, bool &forceAllRes, bool &includeCov, float &minP, float &minR2 )
+{
+ int c;
+
+ unsigned pos;
+
+ bool out = false;
+ bool bin = false;
+ bool pval = false;
+
+
+
+ while (1)
+ {
+ static struct option long_options[] =
+ { //-c XL --snp XR -p Y -b B -r 2 -t 2
+ // These options set a flag.
+ //{"", no_argument, &verbose_flag, 1},
+ //{"", no_argument, &verbose_flag, 0},
+ // These options don't set a flag.
+ //We distinguish them by their indices.
+ {"binsource", required_argument, 0, 'b'},
+ {"out", required_argument, 0, 'o'},
+
+ {"pdisp", required_argument, 0, 'd'},//
+ {"rdisp", required_argument, 0, 'r'},//
+ {"fdcov", no_argument, 0, 'i'},//
+ {"fdgen", no_argument, 0, 'f'},//
+ {0, 0, 0, 0}
+ };
+
+ // getopt_long stores the option index here.
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "b:o:d:r:fi", long_options, &option_index);
+
+
+ // Detect the end of the options.
+ if (c == -1)
+ break;
+
+ if (!optarg && (c != 'f' && c != 'i'))
+ {
+ cout << "\nerror with argument parameter " << (char)c << endl;
+ exit(1);
+ }
+
+ switch (c)
+ {
+ case 0:
+ /* If this option set a flag, do nothing else now. */
+ if (long_options[option_index].flag != 0)
+ break;
+ printf("Long option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf("\n");
+ break;
+
+
+ case 'o':
+ out = !out;
+ dest = string(optarg);
+
+// dest = string(optarg).find(".");
+// if (pos != string::npos)
+// dest = string(optarg).substr(0, pos);
+
+ cout << "-o Writing output text file to " << dest << endl;
+ break;
+
+ case 'b':
+ bin = !bin;
+ source = string(optarg);
+
+ pos = string(optarg).find(".");
+ if (pos != string::npos)
+ source = string(optarg).substr(0, pos);
+
+ cout << "-o Reading input from file " << source << endl;
+ break;
+
+
+ case 'd':
+ minP = fabs(atof(optarg));
+ pval = true;
+
+ cout << "-d Significance to display in .txt will be P-val's below " << minP << endl;
+ break;
+
+ case 'r':
+ minR2 = (atof(optarg));
+
+ cout << "-r Minimum R2 to display in .txt will be above " << minR2 << endl;
+ break;
+
+ case 'i':
+ includeCov = true;
+
+ cout << "-i Covariate results will be included in results" << endl;
+ break;
+
+ case 'f':
+ forceAllRes = true;
+
+ cout << "-f Forcing all includec results to be considered independently of max P-val or min R2. (Overrides -t and -d, Big file!)"<< endl;
+ break;
+
+ case '?':
+ /* getopt_long already printed an error message. */
+ break;
+
+ default:
+ abort();
+ }
+ }
+
+ /* Print any remaining command line arguments (not options). */
+ if (optind < argc)
+ {
+ printf("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf("%s ", argv[optind++]);
+
+ putchar('\n');
+ }
+
+ //required arguments
+ if (!bin || !out || !pval)
+ {
+ cout << "Missing arguments!:"
+ << " o:"<< out << " b:" << bin << " d:" << pval
+ << endl;
+ exit(1);
+ }
+}
+
+int main(int argc, char *argv[] )
+{
+ string source;
+ string dest;
+ bool forceAllRes = false;
+ bool includeCov = false;
+ float minP;
+ float minR2 = -100.0;
+
+ parse_params(argc,argv,source,dest,forceAllRes,includeCov,minP,minR2);
+
+ ifstream fp_allResults;
+ fp_allResults.open((source + ".dbin").c_str(),ios::in | ios::binary );
+ if(fp_allResults == 0)
+ {
+ cout << "Error reading File "<< (source + ".dbin") << endl;
+ exit(1);
+ }
+ ifstream fp_InfoResults;
+ fp_InfoResults.open((source + ".ibin").c_str(),ios::in | ios::binary );
+ if(fp_InfoResults == 0)
+ {
+ cout << "Error Creating File " << (source + ".ibin") << endl;
+ exit(1);
+ }
+ ofstream fp_sigResults;
+ fp_sigResults.open((dest + ".txt").c_str(),ios::out | ios::trunc);
+ if(fp_sigResults == 0)
+ {
+ cout << "Error Creating File "<< (dest + ".txt") << endl;
+ exit(1);
+ }
+
+
+ int n,l,r,p,t,m,namelength;
+ bool disp_cov, storePInd;
+
+ bool new_disp_cov = includeCov;
+
+ double min_p_disp = minP;
+
+ if(forceAllRes)
+ {
+ min_p_disp = 2.0;
+ minR2 = -100.0;
+ }
+
+ bool only_y_selected = false;
+ list < string > y_selected;//TODO a list of selceted Y to display, onyl these values are meant to be displayed
+
+ char* ALnames;
+ char* ARnames;
+ char* Ynames;
+
+ //info
+ {
+
+ fp_InfoResults.read( (char*)&n,sizeof(int));
+ fp_InfoResults.read( (char*)&l,sizeof(int));
+ fp_InfoResults.read( (char*)&r,sizeof(int));
+ fp_InfoResults.read( (char*)&t,sizeof(int));
+ fp_InfoResults.read( (char*)&m,sizeof(int));
+ fp_InfoResults.read( (char*)&namelength,sizeof(int));
+ fp_InfoResults.read( (char*)&disp_cov,sizeof(bool));
+ fp_InfoResults.read( (char*)&storePInd,sizeof(bool));
+
+ new_disp_cov = new_disp_cov && disp_cov;
+
+ if(!(n >0 && l > 1 && r > 0 && m > 0 && t > 0 && namelength > 0))
+ {
+ cout << "Input data had invalid info data!" << endl;
+ exit(1);
+ }
+
+ if(l>1)
+ {
+ ALnames = new char[namelength*(l-1)];
+ fp_InfoResults.read( ALnames,namelength*(l-1));
+ }
+
+ ARnames = new char[namelength*(r*m)];
+ Ynames = new char[namelength*(t)];
+
+
+
+ fp_InfoResults.read( ARnames,namelength*r*m);
+ fp_InfoResults.read( Ynames,namelength*t);
+
+ }
+
+ p=l+r;
+
+
+
+ //if(storePInd)
+ {
+ int h_max = r;
+ if(disp_cov)
+ h_max+=l-1;
+
+ uint16_t n_Used;
+ float R2;
+ uint16_t R2loaded;
+
+ float B;
+ float SE;
+
+
+ uint8_t AL_name_idx;
+ uint16_t AR_name_idx;
+ int Y_name_idx;
+
+ int size_block = m;
+
+ fp_allResults.seekg (0, fp_allResults.end);
+ unsigned long int max_to_read = fp_allResults.tellg();
+ fp_allResults.seekg (0, fp_allResults.beg);
+
+
+
+ unsigned long int read=0;
+
+ for(int j=0; j < t && read < max_to_read; j++)
+ {
+ Y_name_idx = j;
+
+ int ARoffset=0;
+
+ if(!storePInd)
+ {
+ fp_allResults.read((char*)&Y_name_idx,sizeof(int));
+ fp_allResults.read((char*)&size_block,sizeof(int));
+ fp_allResults.read((char*)&ARoffset,sizeof(int));
+ read+=sizeof(int)*3;
+ }
+
+ if(only_y_selected)
+ {
+ list < string >::iterator iter = find( y_selected.begin(),y_selected.end() , string(&Ynames[Y_name_idx]));
+ if( iter == y_selected.end() )
+ {
+ size_block = 0;//this means that the entire list of X for this y wont be even considered int he for loop below
+ }
+ }
+
+ for(int i=0; i < size_block; i++)
+ {
+ fp_allResults.read((char*)&n_Used,sizeof(uint16_t));
+ fp_allResults.read((char*)&R2loaded,sizeof(uint16_t));
+ read+=sizeof(uint16_t)*2;
+
+ float32(&R2, R2loaded);
+
+ uint8_t size_p = (uint8_t)h_max;
+
+ if(!storePInd)
+ {
+ fp_allResults.read((char*)&size_p,sizeof(uint8_t));
+ read+=sizeof(uint8_t);
+ }
+
+ resultH res;
+ res.R2 = R2;
+ res.nUsed = n_Used;
+ res.Y_name_idx = Y_name_idx;
+ res.nUsedPct = (float)n_Used/(float)n;
+ int p_max = (int) size_p;
+
+
+ for(int h=0; h < p_max; h++)//intercept was not stored so size_p will be at most p-1
+ {
+
+ if(disp_cov && !storePInd)
+ {
+ fp_allResults.read((char*) &AL_name_idx,sizeof(uint8_t));
+ read+=sizeof(uint8_t);
+ }
+ if(!storePInd)
+ {
+ fp_allResults.read((char*) &AR_name_idx,sizeof(uint16_t));
+ read+=sizeof(uint16_t);
+ AR_name_idx += ARoffset;
+ }
+ else
+ {
+ AR_name_idx = i*r+h;
+ if(disp_cov)
+ {
+ AR_name_idx-=l-1;
+ if(h < (l-1))
+ {
+ AR_name_idx = i*r;
+ AL_name_idx = h;
+ }
+ else
+ {
+ AL_name_idx = -1;//255 in ubyte form
+ }
+
+ }
+
+
+ }
+
+ fp_allResults.read((char*)&B,sizeof(float));
+ fp_allResults.read((char*)&SE,sizeof(float));
+
+ read+=sizeof(float)*2;
+
+ bool skip_cov = (!new_disp_cov && AL_name_idx != 255);
+
+ if( !disp_cov || !skip_cov )//-1;//255 in ubyte form
+ statistics(B,SE,AL_name_idx,AR_name_idx,res, min_p_disp);
+ }
+
+
+ for (list< result >::iterator it2=res.res_p.begin(); it2 != res.res_p.end(); ++it2)
+ {
+ if(res.R2 > minR2)
+ {
+ result res_p = *it2;
+
+ string Aname = " ";
+
+ if(new_disp_cov && disp_cov && res_p.AL_name_idx != 255 )
+ {
+ Aname = string(&ALnames[namelength*(res_p.AL_name_idx)]) + ":";
+ }
+
+ Aname += string(&ARnames[namelength*res_p.AR_name_idx]);
+
+
+ fp_sigResults << string(&Ynames[namelength*res.Y_name_idx]) << "\t" << Aname << "\t"
+ << std::fixed << std::setprecision(2) << res.nUsed << "\t" << res.nUsedPct << "\t"
+ << std::setprecision(std :: numeric_limits<float> :: digits10 + 1) << res_p.B << "\t" << std::fixed << std::setprecision(4) << res.R2
+ << "\t" << std::setprecision(std :: numeric_limits<float> :: digits10 + 1) << res_p.SE << "\t" << res_p.T
+ << "\t" << std::scientific << res_p.P << std::fixed << std::setprecision(std :: numeric_limits<float> :: digits10 + 1) << endl;
+ }
+ }
+
+
+
+
+ }
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+}
Modified: pkg/OmicABELnoMM/configure.ac
===================================================================
--- pkg/OmicABELnoMM/configure.ac 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/configure.ac 2014-09-04 12:16:09 UTC (rev 1817)
@@ -18,7 +18,8 @@
# Set some default compile flags
if test -z "$CXXFLAGS"; then
# User did not set CXXFLAGS, so we can put in our own defaults
- CXXFLAGS=""
+ CXXFLAGS=" -O3"
+ #CXXFLAGS="-g -ggdb"
fi
if test -z "$CPPFLAGS"; then
# User did not set CPPFLAGS, so we can put in our own defaults
@@ -36,8 +37,8 @@
AC_OPENMP
AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
-AM_CXXFLAGS="-O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
-
+AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
+#AM_CXXFLAGS="-static -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
# Checks for libraries.
# pthread library
AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
===================================================================
--- pkg/OmicABELnoMM/doc/howtocompile.txt 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/doc/howtocompile.txt 2014-09-04 12:16:09 UTC (rev 1817)
@@ -71,7 +71,7 @@
-o --out filename file where betas will be written in fv (no need for the .fvd/.fvi, both should have the same name!)
Optional:
--n --ngred if the file in snp has a different model like direct data genotipic (2 columns per snp) specify -n 2, default is 1
+-n --ngpred if the file in snp has a different model like direct data genotipic (2 columns per snp) specify -n 2, default is 1
-t --thr amount of threads to use, use = as the amount of physical processors in the system (max)
@@ -98,8 +98,23 @@
sudo apt-get install liblapacke-dev
+---------------------------Sample Usage-----------------------------
+./omicabelnomm -c examples/XL --geno examples/XR -p examples/Y -o examples/B -n 2 -t 2 -b -i -d 0.09 -r -10 -e -10 -s 0.2
+-c Reading covariates from file examples/XL
+-g Reading with genotype data from file examples/XR
+-p Reading phenotypes from file examples/Y
+-o Writing output files to examples/B
+-n Using columns per snp as 2
+-t Using 2 CPU threads for parallelism
+-b Results will be stored in binary format too
+-i Covariate results will be included in results
+-d Significance to display in .txt will be P-val's below 0.09
+-r Minimum R2 to display in .txt will be above -10
+-e Minimum R2 to store in .bin will be above -10
+-s Significance to store in .bin will be P-val's below 0.2
+
Modified: pkg/OmicABELnoMM/examples/CreateData.R
===================================================================
--- pkg/OmicABELnoMM/examples/CreateData.R 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/examples/CreateData.R 2014-09-04 12:16:09 UTC (rev 1817)
@@ -1,35 +1,73 @@
+rm(list = setdiff(ls(), lsf.str()))
+
library(DatABEL)
-n = 10 # number of individuals
-l = 2 # number of covariates
-m = 10 # number of snps
-t = 5 # number of traits
+n = 1000 # number of individuals
+l = 4 # number of covariates+1 for intercept
+r = 2
+m = r*1000 # number of snps
+t = 1000 # number of traits
+
set.seed(1001)
runif(3)
XL <- matrix(rnorm((l+1)*n),ncol=(l+1)) # first column should be ones (intercept)
-for(i in 1:n*l){ if(sample(1:10,1) > 5) XL[i]=0/0}
+for(i in 1:(n*(l+1))){ if(sample(1:100,1) > 95){XL[i]=0/0} }
+for(i in 1:n){ XL[i]=1}
colnames(XL) <- c("intercept", paste("cov",1:l,sep=""))
rownames(XL) <- paste("ind",1:n,sep="")
XL_db <- matrix2databel(XL,filename="XL",type="FLOAT")
#XL[1:n,1:(l+1)]
-XL
+#XL
+Y <- matrix(rnorm(t*n),ncol=t)
+for(i in 1:(n*t)){ if(sample(1:100,1) > 85) {Y[i]=0/0} }
+colnames(Y) <- paste("ph",1:t,sep="")
+rownames(Y) <- paste("ind",1:n,sep="")
+Y_db <- matrix2databel(Y,filename="Y",type="FLOAT")
+
+#Y[1:n,1:t]
+#Y
+
#prob= TRUE or FALSE
prob=FALSE
if (!prob){
XR <- matrix(rnorm(m*n),ncol=m)
- for(i in 1:n*m){ if(sample(1:10,1) > 5) XR[i]=0/0}
+
+ for(i in 1 + r*(0:((m-2)/r)) )
+ {
+ yIdx=sample(1:t, 1)
+ #print(i)
+ #print(yIdx)
+ for(j in 1:n)
+ {
+ XR[j,i]=Y[j,yIdx]
+ for(k in 1:l)
+ {
+ XR[j,i]=XR[j,i]-XL[j,k]*0.01
+ }
+ for(k in 1:(r-1))
+ {
+ XR[j,i]=XR[j,i]-XR[j,i+k]*0.01
+ }
+ #XR[j,i]=XR[j,i]/2.8888
+ }
+ }
+
+ for(i in 1:(n*m)){ if(sample(1:100,1) > 85) XR[i]=0/0}
+
+
+
colnames(XR) <- paste("snp",1:m,sep="")
rownames(XR) <- paste("ind",1:n,sep="")
XR_db <- matrix2databel(XR,filename="XR",type="FLOAT")
} else{
XR <- matrix(runif(2*m*n),ncol=2*m)
- for(i in 1:n*m){ if(sample(1:10,1) > 5) XR[i]=0/0}
+ for(i in 1:(n*m)){ if(sample(1:100,1) > 85) {XR[i]=0/0}}
colnames(XR) <- paste("snp",(1:(2*m)),sep="")
colnames(XR)[seq(from=1,to=2*m,by=2)] <- paste("snp",1:m,"_11",sep="")
colnames(XR)[seq(from=2,to=2*m,by=2)] <- paste("snp",1:m,"_01",sep="")
@@ -43,13 +81,58 @@
}
#XR[1:n,1:m]
-XR
+#XR
-Y <- matrix(rnorm(t*n),ncol=t)
-for(i in 1:n*t){ if(sample(1:10,1) > 5) Y[i]=0/0}
-colnames(Y) <- paste("ph",1:t,sep="")
-rownames(Y) <- paste("ind",1:n,sep="")
-Y_db <- matrix2databel(Y,filename="Y",type="FLOAT")
+XR[is.nan(XR)] <- 0
+for(i in 1:r )
+{
+ Avg=sum(XR[,i])/(n-sum(0==XR[,i]))
+ XR[,i][ XR[,i] == 0 ] <- Avg
+}
-#Y[1:n,1:t]
-Y
+
+b=c()
+for(k in 1:t )
+{
+ for(i in 1 + r*(0:((m-2)/r)) )
+ {
+ Xtemp=cbind(XL,XR[,i])
+ for(j in 1:(r-1))
+ {
+ Xtemp=cbind(Xtemp,XR[,i+j])
+ }
+
+ to_remove=c()
+ idx=1
+ for(j in 1:n)
+ {
+ if(sum(is.nan(XL[j,])) > 0 || is.nan(Y[j,k]))
+ {
+ to_remove[idx]=j
+ idx=idx+1
+ }
+ }
+ X=Xtemp[-c(to_remove),]
+ y=Y[-c(to_remove),k]
+
+ S=base::t(X)%*%X
+ Xy = base::t(X)%*%y
+
+ btemp=solve(S,Xy)
+ #print(y)
+ #print(X)
+ #print(btemp)
+ b=c(b,btemp)
+
+
+
+
+ }
+}
+
+to.write = file("bpre.fvd", "wb")
+writeBin(b, to.write,size=4)
+close(to.write)
+
+
+
Modified: pkg/OmicABELnoMM/examples/XL.fvd
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/examples/XL.fvi
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/examples/XR.fvd
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/examples/XR.fvi
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/examples/Y.fvd
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/examples/Y.fvi
===================================================================
(Binary files differ)
Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-08-28 05:49:17 UTC (rev 1816)
+++ pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-09-04 12:16:09 UTC (rev 1817)
@@ -31,32 +31,79 @@
Fhandler->fakefiles = params.use_fake_files;
-
+ Fhandler->not_done = true;
-
if(!Fhandler->fakefiles)
{
Fhandler->fnameAL = params.fnameAL;
Fhandler->fnameAR = params.fnameAR;
Fhandler->fnameY = params.fnameY;
Fhandler->fnameOutFiles = params.fnameOutFiles;
+
+ Fhandler->storeBin = params.storeBin;
+
+ Fhandler->disp_cov = params.disp_cov;
+ Fhandler->storePInd = params.storePInd;
+
+ Fhandler->min_p_disp = params.minPdisp;
+ Fhandler->min_R2_disp = params.minR2disp;
-
Yfvi = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
ALfvi = load_databel_fvi( (Fhandler->fnameAL+".fvi").c_str() );
ARfvi = load_databel_fvi( (Fhandler->fnameAR+".fvi").c_str() );
params.n = ALfvi->fvi_header.numObservations;
params.m = ARfvi->fvi_header.numVariables/params.r;
params.t = Yfvi->fvi_header.numVariables;
- params.l = ALfvi->fvi_header.numVariables;
+ params.l = ALfvi->fvi_header.numVariables;
+
+ int yname_idx=0;//starting idx for names on ALfvi->data
+ for(int i = 0; i < params.n; i++)
+ {
+ //Nnames.push_back(string(&(Yfvi->fvi_data[yname_idx])));
+ yname_idx += Yfvi->fvi_header.namelength;
+
+ //cout << i << ": " << string(&(Yfvi->fvi_data[yname_idx])) << "\t";
+ //cout << i << ": " << Ynames[i] << "\t";
+ }
+
+
+ for(int i = 0; i < params.t; i++)
+ {
+ Fhandler->Ynames.push_back(string(&(Yfvi->fvi_data[yname_idx])));
+ yname_idx += Yfvi->fvi_header.namelength;
+ }
+
+
+
+ int Aname_idx=params.n*ARfvi->fvi_header.namelength;//skip the names of the rows
+ for(int i = 0; i < params.m*params.r; i++)
+ {
+ Fhandler->ARnames.push_back(string(&(ARfvi->fvi_data[Aname_idx])));
+ Aname_idx += ARfvi->fvi_header.namelength;
+ }
+
+ Aname_idx=params.n*ALfvi->fvi_header.namelength;
+ for(int i = 0; i < params.l; i++)
+ {
+ Fhandler->ALnames.push_back(string(&(ALfvi->fvi_data[Aname_idx])));
+ Aname_idx += ALfvi->fvi_header.namelength;
+
+// cout << i << ": " << string(&(ARfvi->fvi_data[Aname_idx])) << "\t";
+// cout << i << ": " << ALnames[i] << "\t";
+ }
+
+
int opt_tb = 1000;
int opt_mb = 1000;
params.mb = min(params.m, opt_tb);
- params.tb = min(params.t, opt_mb);
+ params.tb = min(params.t, opt_mb);
+
+
+
}
else
{
@@ -78,9 +125,52 @@
params.n -= excl_count;
- params.p = params.l + params.r;
+ params.p = params.l + params.r;
+
+ if(!Fhandler->fakefiles)
+ {
+
+ if(params.l > 255 || params.p > 255 || params.n > 65535)//can remove if fixed for output files
+ {
+ cout << "Warning, output binary format does not yet support current problem sizes for the provided p, l, r and n." << endl;
+ cout << "Omitting outputfile." << endl;
+ }
+
+
+ //!write info file for results
+ ofstream fp_InfoResults;
+
+ fp_InfoResults.open((Fhandler->fnameOutFiles + "_sto.ibin").c_str(),ios::out | ios::binary | ios::trunc);
+ if(fp_InfoResults == 0)
+ {
+ cout << "Error Creating File InfoResults.bin" << endl;
+ exit(1);
+ }
+ fp_InfoResults.write( (char*)¶ms.n,sizeof(int));
+ fp_InfoResults.write( (char*)¶ms.l,sizeof(int));
+ fp_InfoResults.write( (char*)¶ms.r,sizeof(int));
+ fp_InfoResults.write( (char*)¶ms.t,sizeof(int));
+ fp_InfoResults.write( (char*)¶ms.m,sizeof(int));
+ fp_InfoResults.write( (char*)&(ARfvi->fvi_header.namelength),sizeof(int));
+ fp_InfoResults.write( (char*)¶ms.disp_cov,sizeof(bool));
+ fp_InfoResults.write( (char*)¶ms.storePInd,sizeof(bool));
+
+
+ int Aname_idx=(params.n+1)*ALfvi->fvi_header.namelength;//skip the names of the rows + intercept
+ fp_InfoResults.write( (char*)&ALfvi->fvi_data[Aname_idx],ALfvi->fvi_header.namelength*(params.l-1)*sizeof(char));
+
+ Aname_idx=params.n*ARfvi->fvi_header.namelength;//skip the names of the rows
+ fp_InfoResults.write( (char*)&ARfvi->fvi_data[Aname_idx],ARfvi->fvi_header.namelength*params.r*params.m*sizeof(char));
+
+ int Yname_idx=params.n*Yfvi->fvi_header.namelength;//skip the names of the rows
+ fp_InfoResults.write( (char*)&Yfvi->fvi_data[Yname_idx],Yfvi->fvi_header.namelength*params.t*sizeof(char));
+
+
+ }
+
+
//block size to keep mem under 1 gigabyte
// int opt_block = params.n/(4*1000^3)*(1/(2*params.r));
// int opt_tb = max(4*2000,opt_block);
@@ -103,8 +193,6 @@
void AIOwrapper::finalize()
{
- //cout << "f";
- //void *status;
Fhandler->not_done = false;
pthread_mutex_lock(&(Fhandler->m_more));
@@ -128,10 +216,13 @@
pthread_mutex_destroy(&(Fhandler->m_read));
pthread_cond_destroy(&(Fhandler->condition_read));
- delete Fhandler->excl_List;
+ delete Fhandler->excl_List;
+
+
}
+
@@ -149,11 +240,10 @@
struct timespec timeToWait;
FILE* fp_Y;
- FILE* fp_B;
- FILE* fp_R;
- FILE* fp_SD2;
- FILE* fp_P;
- FILE* fp_Ar;
+ FILE* fp_Ar;
+ ofstream fp_sigResults;
+ ofstream fp_allResults;
+
if(!Fhandler->fakefiles)
{
fp_Y = fopen((Fhandler->fnameY+".fvd").c_str(), "rb");
@@ -169,31 +259,33 @@
cout << "Error Reading File Xr " << Fhandler->fnameAR << endl;
exit(1);
}
-
- fp_B = fopen((Fhandler->fnameOutFiles+"_B.fvd").c_str(), "w+b");
- if(fp_B == 0)
+
+
+ fp_sigResults.open((Fhandler->fnameOutFiles + "_dis.txt").c_str(),ios::out | ios::trunc);
+ if(fp_sigResults == 0)
{
- cout << "Error Opening File B " << Fhandler->fnameOutFiles << "_B" << endl;
+ cout << "Error Creating File " << (Fhandler->fnameOutFiles + "_dis.txt") << endl;
exit(1);
}
- fp_R = fopen((Fhandler->fnameOutFiles+"_R.fvd").c_str(), "w+b");
- if(fp_R == 0)
- {
- cout << "Error Opening File R " << Fhandler->fnameOutFiles << "_R" << endl;
- exit(1);
+
+ fp_sigResults << "Phe\tSNP\tn\tnPct\tB\tR2\tSE\tT\tP" << endl;
+
+ //!*************************************************
+
+ if(Fhandler->storeBin)
+ {
+
+ fp_allResults.open((Fhandler->fnameOutFiles + "_sto.dbin").c_str(),ios::out | ios::binary | ios::trunc);
+ if(fp_allResults == 0)
+ {
+ cout << "Error Creating File "<< (Fhandler->fnameOutFiles + "_sto.bin") << endl;
+ exit(1);
+ }
+
}
- fp_SD2 = fopen((Fhandler->fnameOutFiles+"_SD2.fvd").c_str(), "w+b");
- if(fp_SD2 == 0)
- {
- cout << "Error Opening File SD2 " << Fhandler->fnameOutFiles << "_SD2" << endl;
- exit(1);
- }
- fp_P = fopen((Fhandler->fnameOutFiles+"_P.fvd").c_str(), "w+b");
- if(fp_P == 0)
- {
- cout << "Error Opening File P " << Fhandler->fnameOutFiles << "_P" << endl;
- exit(1);
- }
+
+
+
}
else
{
@@ -221,30 +313,7 @@
//fclose(fp_Ar);
delete []tempbuff2;
- fp_B = fopen("tempB.bin", "w+b");
- if(fp_B == 0)
- {
- cout << "Error setting up temp File B " << endl;
- exit(1);
- }
- fp_R = fopen("tempR.bin", "w+b");
- if(fp_R == 0)
- {
- cout << "Error setting up temp File R " << endl;
- exit(1);
- }
- fp_SD2 = fopen("tempSD2.bin", "w+b");
- if(fp_SD2 == 0)
- {
- cout << "Error setting up temp File SD2 " << endl;
- exit(1);
- }
- fp_P = fopen("tempP.bin", "w+b");
- if(fp_P == 0)
- {
- cout << "Error setting up temp File P " << endl;
- exit(1);
- }
+
//cout << "\nEnd preping files\n" << flush;
}
@@ -252,14 +321,14 @@
//pthread_barrier_wait(&(Fhandler->finalize_barrier));//for testing only
- Fhandler->not_done = true;
+ bool Local_not_done = true;
Fhandler->reset_wait = false;
- while(Fhandler->not_done)
+ while(Local_not_done)
{
- while(!Fhandler->empty_buffers.empty() && Fhandler->y_to_readSize)
+ while(!Fhandler->empty_buffers.empty() && Fhandler->y_to_readSize && Fhandler->not_done)
{
tmp_y_blockSize = Fhandler->y_blockSize;
@@ -334,7 +403,7 @@
}
- while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize )
+ while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize && Fhandler->not_done)
{
@@ -407,41 +476,129 @@
}
//B write
- while(!Fhandler->write_full_buffers.empty())
- {
+ while(!Fhandler->write_full_buffers.empty() && Local_not_done)
+ {
-
pthread_mutex_lock(&(Fhandler->m_buff_upd));
- type_buffElement* tobeWritten = Fhandler->write_full_buffers.front();
+ list < resultH >* tobeWritten = Fhandler->write_full_buffers.front();
Fhandler->write_full_buffers.pop();
- int size = Fhandler->p*Fhandler->b_blockSize;
+
if(Fhandler->fakefiles)
{
- fseek ( fp_B , 0 , SEEK_SET );
- fseek ( fp_R , 0 , SEEK_SET );
- fseek ( fp_SD2 , 0 , SEEK_SET );
- fseek ( fp_P , 0 , SEEK_SET );
+
+ }
+
+ if(!Fhandler->fakefiles && !tobeWritten->empty())
+ {
+
+ if(!Fhandler->storePInd && Fhandler->storeBin)
+ {
+ resultH first = tobeWritten->front();
+ fp_allResults.write( (char*)&first.Y_name_idx,sizeof(int));
+
+ int size_block = tobeWritten->size();
+ fp_allResults.write( (char*)&size_block,sizeof(int));
+ fp_allResults.write( (char*)&first.ARoffset,sizeof(int));
+ }
+
+
+ for (list< resultH >::iterator it=tobeWritten->begin(); it != tobeWritten->end(); ++it)
+ {
+ resultH current = *it;
+ uint8_t size_p = current.res_p.size();
+
+ uint16_t R2=-166;
+ float16(R2, current.R2);
+
+ //cout << current.R2 << ":";
+// current.R2=-1.0;
+//
+// float32(&(current.R2),R2);
+ //cout << current.R2 << "\t";
+
+
+ if(Fhandler->storeBin)
+ {
+ fp_allResults.write( (char*)¤t.nUsed,sizeof(uint16_t));
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1817
More information about the Genabel-commits
mailing list