[Genabel-commits] r1114 - in pkg/OmicABEL/src: . reshuffle
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Mar 5 12:52:44 CET 2013
Author: sharapovsodbo
Date: 2013-03-05 12:52:43 +0100 (Tue, 05 Mar 2013)
New Revision: 1114
Added:
pkg/OmicABEL/src/reshuffle/
pkg/OmicABEL/src/reshuffle/Parameters.cpp
pkg/OmicABEL/src/reshuffle/Parameters.h
pkg/OmicABEL/src/reshuffle/README.txt
pkg/OmicABEL/src/reshuffle/iout_file.cpp
pkg/OmicABEL/src/reshuffle/iout_file.h
pkg/OmicABEL/src/reshuffle/main.cpp
pkg/OmicABEL/src/reshuffle/reshuffle.cpp
pkg/OmicABEL/src/reshuffle/reshuffle.h
Log:
ver 0.00
without tests
Added: pkg/OmicABEL/src/reshuffle/Parameters.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.cpp (rev 0)
+++ pkg/OmicABEL/src/reshuffle/Parameters.cpp 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,117 @@
+/*
+ * Parameters.cpp
+ *
+ * Created on: 28.02.2013
+ * Author: lima
+ */
+#include "Parameters.h"
+#include <stdlib.h>
+#include <string>
+using namespace std;
+
+string sep = "--"; // Set separator for cmdline
+
+// Convert cmdline to string
+string Parameters::get_cmd_line(int argc, char* argv[]) {
+ string cmd_line = "";
+ for (int i = 1; i < argc; i++) {
+ cmd_line += argv[i];
+ }
+ cmd_line += sep; // Need for cmdline parsing
+ return cmd_line;
+}
+//Default constructor
+Parameter::Parameter() {
+ name = "None";
+ use = false;
+ value = "all";
+}
+
+ostream &operator <<(ostream &os, Parameter par) {
+ os << "PARAMETR" << "\t" << par.name << "\t" << "USE" << "\t" << par.use
+ << "\t" << "VALUE" << "\t" << par.value;
+ cout<<"\tValue set ";
+ for (set<int>::iterator it= par.valueset.begin();it!=par.valueset.end();it++)
+ os <<*it<<" ";
+ os<<endl;
+ return os;
+}
+
+ostream &operator <<(ostream &os, Parameters par) {
+ os << "IOUT file is "<<par.iout_fname<<endl;
+ os << "OUT file is "<<par.out_fname<<endl;
+ os << par.datadims;
+ os << par.snpnames;
+ os << par.traitnames;
+ os << par.traits;
+ os << par.snps;
+ os << par.heritabilities;
+ os << par.chi;
+ os << par.dataslim;
+ return os;
+}
+
+//Constructor-parser from command line
+Parameter::Parameter(string cmdline, string paramname) {
+
+ name = paramname;
+ int parpos = cmdline.find(name); // position of substring with param's name
+ if (parpos != string::npos) { // check [FOUND OR NOT]
+ string val = "";
+ unsigned int iter = parpos + name.size() + 1; // iterator, which run from "=" to sep
+ unsigned int seppos = cmdline.find(sep, parpos); // separator position
+ while (iter < seppos) {
+ val += cmdline.at(iter); // build value of parameter
+ iter++;
+ }
+ use = true;
+ if (val.size() != 0) {
+ value = val;
+ string val_d=val+",";
+ string str_tmp="";
+ for(unsigned int i=0;i<val_d.size();i++){
+ if(val_d[i]!=','){
+ str_tmp+=val_d[i];
+ }else {
+ string str2="";
+ int postire=str_tmp.find("-");
+ if(postire!=string::npos){
+ string start="";
+ string end="";
+ for(int i=0;i<postire;i++){
+ start+=str_tmp[i];
+ }
+ for(int i=(postire+1);i<str_tmp.size();i++){
+ end+=str_tmp[i];
+ }
+ for(int i=atoi(start.c_str())-1;i<atoi(end.c_str());i++){
+ valueset.insert(i);
+ }
+ }else valueset.insert(atoi(str_tmp.c_str())-1);
+ str_tmp="";
+ }
+ }
+ } else
+ value = "all"; // default value for parameters is "all"
+ } else { // If parameter aren't in cmdline
+ use = false;
+ value = "None";
+ }
+}
+
+// Constructor, which gets info about all parameters from cmdline
+Parameters::Parameters(int argc,char* argv[]) {
+ string cmdline=get_cmd_line(argc,argv);
+ unsigned int seppos = cmdline.find(sep); // first separator's position
+ string filename = cmdline.substr(0, seppos);
+ iout_fname = filename + ".iout";
+ out_fname = filename + ".out";
+ datadims = Parameter(cmdline, "datadims");
+ snpnames = Parameter(cmdline, "snpnames");
+ traitnames = Parameter(cmdline, "traitnames");
+ traits = Parameter(cmdline, "traits");
+ snps = Parameter(cmdline, "snps");
+ heritabilities = Parameter(cmdline, "heritabilities");
+ chi = Parameter(cmdline, "chi");
+ dataslim= Parameter(cmdline, "dataslim");
+}
Added: pkg/OmicABEL/src/reshuffle/Parameters.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.h (rev 0)
+++ pkg/OmicABEL/src/reshuffle/Parameters.h 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,48 @@
+/*
+ * Parameters.h
+ *
+ * Created on: 28.02.2013
+ * Author: lima
+ */
+
+#ifndef PARAMETERS_H_
+#define PARAMETERS_H_
+
+#include <iostream>
+#include <string>
+#include <set>
+using namespace std;
+
+class Parameter {
+
+public:
+ string name; //name of parametr
+ bool use; // Parameter use or not (Is paameter in command line?)
+ string value; // value of parametr,chars after "=" symbol
+ Parameter(string,string); //constructor
+ Parameter(); //default constructor
+ set<int> valueset;
+};
+
+ostream &operator <<(ostream &, Parameter);
+
+class Parameters {
+public:
+ string iout_fname;
+ string out_fname;
+ Parameter datadims;
+ Parameter snpnames;
+ Parameter traitnames;
+ Parameter traits;
+ Parameter snps;
+ Parameter heritabilities;
+ Parameter chi;
+ Parameter dataslim;
+ Parameters(int, char*[]); // Constructor from cmdline
+ string get_cmd_line(int,char*[]);
+
+};
+
+ostream &operator <<(ostream &, Parameters);
+
+#endif /* PARAMETERS_H_ */
Added: pkg/OmicABEL/src/reshuffle/README.txt
===================================================================
--- pkg/OmicABEL/src/reshuffle/README.txt (rev 0)
+++ pkg/OmicABEL/src/reshuffle/README.txt 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,60 @@
+Reshuffle ver 0.000
+
+Parameters:
+
+For the first it should be file name withiut iout and out extensions:
+ (--B_eigen_1000)
+ if your files are B_eigen.iout and B_eigen.out
+
+Data dimensions
+ (--datadims) Gives back t, m, p
+output: "datadims//datadims.txt"
+
+SNP names
+ default: (--snpnames) : all
+ by index (--snpnames=27) : name of snp#27
+ by index range, combination (--snpnames=27,2-12,4-20) : name of snp #2-20,27
+output: "snpnames//snpnames.txt"
+
+Trait names
+ default (--traitnames): all
+ by index (--traitnames=27) : name of trait #27
+ by index range, combination (--traitnames=27,2-12,15)
+output: "traitnames//traitnames.txt"
+
+Heritabilities, sigma, res_sigma, estimates
+ Default: (--heritabilities) : all traits
+ Reange,indexes (--heritabilities=1-10,4,5-12) : for traits #1-12
+output: "estimates//estimates.txt"
+Results - association
+ by SNP
+ Dafault (--snp) : for all snp for all traits
+ indexes, ranges (--snp=12,100-1000,500-10000,22000) : for all traits
+ output: "data//[trait].txt"
+ by trait
+ Dafault (--trait) : for all snp for all traits
+ indexes,range (--trait=1-10,12) : for snp #12 for all traits
+ output: "data//[trait].txt"
+ Chi2 more than some threshold
+ (--chi=20) for snps,which chi>20 for all traits
+ output: "chi_data//[trait].txt"
+ All combinations of traits,snps, Chi2
+ (--traits=1-2--snps=1-1000--chi=15) for traits #1-2 for snps#1-1000, which chi>15
+ output: "chi_data//[trait].txt"
+
+Just write data with chi
+ (--chi) : write data(with chi column) for all snps for all traits
+ all combinations are supported
+ output: "chi_data//[trait].txt"
+Dataslim
+ Creating a sub-matrix by Chi2>X (--chi=X--dataslim)
+ output: "slim_data//slim_[trait].txt"
+
+Examples:
+Reshuffle.exe B_1112_NA_clear_RNA_nocovar --traitnames=1-4,1--snpnames=1-10,20-30,46--traits=1--snps=1-100
+
+outputs from B_1112_NA_clear_RNA_nocovar.iout and B_1112_NA_clear_RNA_nocovar.out:
+
+ traitnames//traitnames.txt : with trait's names hgta,tga,tca,ldla
+ snpnames//snpnames.txt Names of snps #1-10,20-30,46
+ data//trait_hgta.txt : result of association for trait "hgta" for snps #1-100
\ No newline at end of file
Added: pkg/OmicABEL/src/reshuffle/iout_file.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/iout_file.cpp (rev 0)
+++ pkg/OmicABEL/src/reshuffle/iout_file.cpp 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,93 @@
+/*
+ * iout_file.cpp
+ *
+ * Created on: 28.02.2013
+ * Author: lima
+ */
+#include "iout_file.h"
+
+iout_file::iout_file(Parameters& Params){
+ //Fill iout_header
+ ifstream iout_f(Params.iout_fname.c_str(), ios::binary | ios::in);
+ if (!iout_f) {
+ cout << "Error opening " << Params.iout_fname << " file" << endl;
+ cout << "Maybe, file doesn't exist" << endl;
+ exit(1);
+ }
+ iout_f.read((char *)&header.type, sizeof(int));
+ iout_f.read((char *)&header.nbytes, sizeof(int));
+ iout_f.read((char *)&header.p, sizeof(int));
+ iout_f.read((char *)&header.m, sizeof(int));
+ iout_f.read((char *)&header.t, sizeof(int));
+ iout_f.read((char *)&header.tile_m, sizeof(int));
+ iout_f.read((char *)&header.tile_t, sizeof(int));
+ iout_f.read((char *)&header.namelength, sizeof(int));
+
+ //Fill labels_data
+ iout_f.seekg(sizeof(header), ios_base::beg); //Change position of read after iout_header
+ labels.number =(header.p * 2 + header.m + header.t + header.p * (header.p - 1) / 2);
+ char *labels_char = new char[labels.number * header.namelength];
+ iout_f.read(labels_char, labels.number * header.namelength);
+ int beta_end = header.p;
+ int se_end = beta_end+header.p;
+ int cov_end = se_end+header.p * (header.p - 1) / 2;
+ int snp_end = cov_end+header.m;
+ int traits_end = snp_end+header.t;
+ int i=0;
+
+ labels.beta=new vector<string>;
+ labels.se=new vector<string>;
+ labels.cov=new vector<string>;
+ labels.snp_names=new vector<string>;
+ labels.trait_names=new vector<string>;
+
+ for (; i < beta_end; i++)
+ (*labels.beta).push_back(string(labels_char + i * header.namelength));
+ for (; i < se_end; i++)
+ (*labels.se).push_back(string(labels_char + i * header.namelength));
+ for (; i < cov_end;i++)
+ (*labels.cov).push_back(string(labels_char + i * header.namelength));
+ for (; i < snp_end; i++)
+ (*labels.snp_names).push_back(string(labels_char + i * header.namelength));
+ for (; i< traits_end; i++)
+ (*labels.trait_names).push_back(string(labels_char + i * header.namelength));
+
+ delete labels_char;
+ iout_f.close();
+}
+
+//Overloading operator cout for iout_header
+ostream &operator <<(ostream &os,iout_header header) {
+ os << "IOUT_HEADER [ type: " << header.type << "]" << endl;
+ os << "IOUT_HEADER [ nbytes: " << header.nbytes << "]" << endl;
+ os << "IOUT_HEADER [ p: " << header.p << "]" << endl;
+ os << "IOUT_HEADER [ m: " << header.m << "]" << endl;
+ os << "IOUT_HEADER [ t: " << header.t << "]" << endl;
+ os << "IOUT_HEADER [ tile_m: " << header.tile_m << "]" << endl;
+ os << "IOUT_HEADER [ tile_t: " << header.tile_t << "]" << endl;
+ os << "IOUT_HEADER [ namelength: " << header.namelength << "]" << endl;
+
+ return os;
+}
+
+ostream &operator <<(ostream &os,labels_data labels) {
+ os <<"NUMBER OF BETAS\t"<<(*labels.beta).size()<<endl;
+ os <<"NUMBER OF SE\t"<<(*labels.se).size()<<endl;
+ os <<"NUMBER OF COVS\t"<<(*labels.cov).size()<<endl;
+ os <<"NUMBER OF SNPNAMES\t"<<(*labels.snp_names).size()<<endl;
+ os <<"NUMBER OF TRAITNAMES\t"<<(*labels.trait_names).size()<<endl;
+}
+
+int iout_file::tilecoordinates(int traitNo, int snpNo) {
+ int tileCoor = 0;
+ int t_tile = traitNo / header.tile_t;
+ int t_off = traitNo % header.tile_t;
+ int m_tile = snpNo / header.tile_m;
+ int m_off = snpNo % header.tile_m;
+ int per_trait_per_snp = header.p + header.p * (header.p + 1) / 2;
+ tileCoor = (t_tile * (header.m * header.tile_t)
+ + m_tile* (header.tile_m* min(header.tile_t,header.t - header.tile_t * t_tile))
+ + t_off * (min(header.tile_m, header.m - header.tile_m * m_tile))
+ + m_off) * per_trait_per_snp * sizeof(double);
+ return tileCoor;
+}
Added: pkg/OmicABEL/src/reshuffle/iout_file.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/iout_file.h (rev 0)
+++ pkg/OmicABEL/src/reshuffle/iout_file.h 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,58 @@
+/*
+ * iout_file.h
+ *
+ * Created on: 28.02.2013
+ * Author: lima
+ */
+
+#ifndef IOUT_FILE_H_
+#define IOUT_FILE_H_
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include "Parameters.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+
+using namespace std;
+
+//Contain meta data
+class iout_header {
+public:
+ int type; // Double -> 6 as in Databel
+ int nbytes; // Double -> 8
+ int p; // # of covariates + 2 (intercept and SNP)
+ int m; // # of SNPs
+ int t; // # of traits
+ int tile_m; // More on these two later
+ int tile_t;
+ int namelength; // 32 as in Databel
+
+};
+
+//Overloading "cout" operator
+ostream &operator <<(ostream &,iout_header);
+
+//Contain labels
+class labels_data {
+public:
+ int number; //number of labels
+ vector<string>* beta;
+ vector<string>* se;
+ vector<string>* cov;
+ vector<string>* snp_names;
+ vector<string>* trait_names;
+};
+
+ostream &operator <<(ostream &,labels_data);
+
+class iout_file{
+public:
+ iout_header header;
+ labels_data labels;
+ iout_file(Parameters &);
+ int tilecoordinates(int, int);
+};
+#endif /* IOUT_FILE_H_ */
Added: pkg/OmicABEL/src/reshuffle/main.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/main.cpp (rev 0)
+++ pkg/OmicABEL/src/reshuffle/main.cpp 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,29 @@
+/*
+ * main.cpp
+ *
+ * Created on: 31.01.2013
+ * Author: Ñîäáî
+ */
+#include <iostream>
+#include <ctime>
+#include "iout_file.h"
+#include "Parameters.h"
+#include "Reshuffle.h"
+#include <iterator>
+
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+ cout << "Every day I'm [re]shuffling!" << endl;
+ Parameters Params(argc, argv);
+ cout << Params;
+ iout_file iout_F(Params);
+ cout << "finish iout_file read " << double(clock()) / CLOCKS_PER_SEC << endl;
+ cout<<iout_F.header;
+ cout<<iout_F.labels;
+ Reshuffle reshh(iout_F,Params);
+ reshh.run();
+ cout << "finish_reshuffling " << double(clock()) / CLOCKS_PER_SEC << endl;
+ return 0;
+}
Added: pkg/OmicABEL/src/reshuffle/reshuffle.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/reshuffle.cpp (rev 0)
+++ pkg/OmicABEL/src/reshuffle/reshuffle.cpp 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,349 @@
+/*
+ * reshuffle.cpp
+ *
+ * Created on: 01.03.2013
+ * Author: lima
+ */
+#include "reshuffle.h"
+#include <dir.h>
+#include <ctime>
+#include <math.h>
+#include <iterator>
+#include <list>
+
+using namespace std;
+
+#define PRECISION_DOUBLE 15//Precision of double
+
+Reshuffle::Reshuffle(iout_file &iout,Parameters &Params){
+ p_iout_file = &iout;
+ p_Parameters = &Params;
+ per_trait_per_snp = (*p_iout_file).header.p + (*p_iout_file).header.p * ((*p_iout_file).header.p + 1) / 2;
+ herest_startpos = (p_iout_file->header.m * p_iout_file->header.t*
+ (p_iout_file->header.p + p_iout_file->header.p * (p_iout_file->header.p + 1) / 2))
+ * sizeof(double);
+}
+
+string Reshuffle::create_filename(string prename, string name) {
+ string path = prename + "_"+name+".txt";
+ return path;
+}
+
+void Reshuffle::write_datadims(ofstream& txt_datadims){
+
+ txt_datadims << "Number of traits\t" << (*p_iout_file).header.t << endl;
+ txt_datadims << "Number of SNP\t" << (*p_iout_file).header.m << endl;
+ txt_datadims << "Number of covariates\t" << ((*p_iout_file).header.p - 2);
+}
+
+void Reshuffle::write_snpnames(ofstream& txt_snpnames){
+
+ if ((*p_Parameters).snpnames.value == "all"){
+ for (int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
+ (*p_Parameters).snpnames.valueset.insert(i);
+ cout<<"SNPNAMES VALUE SET CHANGED TO ALL"<<endl;
+ }
+ for(set<int>::iterator it= (*p_Parameters).snpnames.valueset.begin();it!=(*p_Parameters).snpnames.valueset.end();++it)
+ txt_snpnames << "SNP #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.snp_names)[*it]<<endl;
+ cout<<"END WRITE SNPNAMES"<<endl;
+}
+
+void Reshuffle::write_traitnames(ofstream& txt_traitnames){
+
+ if ((*p_Parameters).traitnames.value == "all"){
+ for (int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
+ (*p_Parameters).traitnames.valueset.insert(i);
+ cout<<"TRAITNAMES VALUE SET CHANGED TO ALL"<<endl;
+ }
+ for(std::set<int>::iterator it= (*p_Parameters).traitnames.valueset.begin();it!=p_Parameters->traitnames.valueset.end();++it)
+ txt_traitnames<<"TRAIT #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.trait_names)[*it]<<endl;
+ cout<<"END WRITE TRAITNAMES"<<endl;
+}
+
+void Reshuffle::write_data(ifstream& out_file){
+ out_file.seekg(0, ios_base::beg);
+ cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+ for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+ ofstream txt_trait(create_filename("data//trait", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+ //Set precision of double
+ cout<<(*(*p_iout_file).labels.trait_names)[*trait]<<endl;
+ txt_trait.precision(PRECISION_DOUBLE);
+ txt_trait << "\t";
+ for (int beta = 0; beta < (*(*p_iout_file).labels.beta).size(); beta++)
+ txt_trait << (*(*p_iout_file).labels.beta)[beta] << "\t";
+ for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+ txt_trait << (*(*p_iout_file).labels.se)[se] << "\t";
+ for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+ txt_trait << (*(*p_iout_file).labels.cov)[cov] << "\t";
+ txt_trait << endl;
+ cout << "endwritetrait_colnames" << *trait << clock() << endl;
+ double* buf = new double[per_trait_per_snp];
+ int oldPos = 0;
+ char s[30];
+ for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+ txt_trait << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+ int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+ //cout << oldPos << "-" << pos << endl;
+ if(pos != oldPos)
+ {
+ out_file.seekg(pos,ios_base::beg);
+ }
+ oldPos=pos+sizeof(double)*per_trait_per_snp;
+ out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+ for (int i = 0; i < per_trait_per_snp; i++) {
+ sprintf(s, "%.15g", buf[i]);
+ txt_trait << s << "\t";
+ }
+ txt_trait << endl;
+ }
+ delete buf;
+ txt_trait.close();
+ cout << "endwritetrait " << (*(*p_iout_file).labels.trait_names)[*trait] << " "<< double(clock()) / CLOCKS_PER_SEC << endl;
+ }
+ cout << "finishwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+}
+
+void Reshuffle::write_data_chi(ifstream& out_file){
+ out_file.seekg(0, ios_base::beg);
+ double chi = 0;
+ double CheckChi = (*p_Parameters).chi.value == "all" ? -1.0 : atof((*p_Parameters).chi.value.c_str());
+ cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+ for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+ ofstream txt_chi(create_filename("chi_data//chi", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+ //Set precision of double
+ txt_chi.precision(PRECISION_DOUBLE);
+ txt_chi << "\t";
+ for (int beta = 0; beta < (*(*p_iout_file).labels.beta).size(); beta++)
+ txt_chi << (*(*p_iout_file).labels.beta)[beta] << "\t";
+ for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+ txt_chi << (*(*p_iout_file).labels.se)[se] << "\t";
+ for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+ txt_chi << (*(*p_iout_file).labels.cov)[cov] << "\t";
+ txt_chi << "Chi2" << endl;
+ double* buf = new double[per_trait_per_snp];
+ int oldPos = 0;
+ char s[30];
+ for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+ int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+ //cout << oldPos << "-" << pos << endl;
+ if(pos != oldPos)
+ {
+ out_file.seekg(pos,ios_base::beg);
+ }
+ oldPos=pos+sizeof(double)*per_trait_per_snp;
+ out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+ chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+ if(chi>CheckChi){
+ txt_chi << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+ for (int i = 0; i < per_trait_per_snp; i++) {
+ sprintf(s, "%.15g", buf[i]);
+ txt_chi << s << "\t";
+ }
+ txt_chi << chi << endl;
+ }
+ }
+ delete buf;
+ txt_chi.close();
+ cout << "endwritechitrait " << (*(*p_iout_file).labels.trait_names)[*trait] << " "<< double(clock()) / CLOCKS_PER_SEC << endl;
+ }
+ cout << "finishwritechitxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+}
+
+void Reshuffle::write_slim_data(ifstream& out_file){
+ out_file.seekg(0, ios_base::beg);
+ set<int> goodtraits;
+ set<int> goodsnps;
+ double chi = 0;
+ if((*p_Parameters).chi.value=="all"||(*p_Parameters).chi.value=="None"){
+ cout << "ERROR" << "Chi value doesn't set"<<endl;
+ cout << "Please, set Chi value to get slim data" << endl;
+ exit(1);
+ }
+ double CheckChi = atof((*p_Parameters).chi.value.c_str());
+ for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+ double* buf = new double[per_trait_per_snp];
+ int oldPos = 0;
+ char s[30];
+ for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+ int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+ //cout << oldPos << "-" << pos << endl;
+ if(pos != oldPos)
+ {
+ out_file.seekg(pos,ios_base::beg);
+ }
+ oldPos=pos+sizeof(double)*per_trait_per_snp;
+ out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+ chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+ if(chi>CheckChi){
+ goodtraits.insert(*trait);
+ goodsnps.insert(*snp);
+ }
+ }
+ delete buf;
+ }
+ out_file.seekg(0, ios_base::beg);
+ for (set<int>::iterator trait= goodtraits.begin();trait!=goodtraits.end();trait++) {
+ ofstream txt_slim(create_filename("slimdata//slim", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+ //Set precision of double
+ txt_slim.precision(PRECISION_DOUBLE);
+ txt_slim << "\t";
+ for (int beta = 0; beta < (*(*p_iout_file).labels.beta).size(); beta++)
+ txt_slim << (*(*p_iout_file).labels.beta)[beta] << "\t";
+ for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+ txt_slim << (*(*p_iout_file).labels.se)[se] << "\t";
+ for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+ txt_slim << (*(*p_iout_file).labels.cov)[cov] << "\t";
+ txt_slim << "Chi2" << endl;
+ double* buf = new double[per_trait_per_snp];
+ int oldPos = 0;
+ char s[30];
+ for (set<int>::iterator snp= goodsnps.begin();snp!=goodsnps.end();snp++) {
+ txt_slim << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+ int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+ //cout << oldPos << "-" << pos << endl;
+ if(pos != oldPos)
+ {
+ out_file.seekg(pos,ios_base::beg);
+ }
+ oldPos=pos+sizeof(double)*per_trait_per_snp;
+ out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+ chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+ for (int i = 0; i < per_trait_per_snp; i++) {
+ sprintf(s, "%.15g", buf[i]);
+ txt_slim << s << "\t";
+ }
+ txt_slim << chi << endl;
+ }
+ delete buf;
+ txt_slim.close();
+ }
+}
+
+int Reshuffle::est_shift(int counter){
+ int shift = ((p_iout_file->header.m * p_iout_file->header.t* (p_iout_file->header.p +
+ p_iout_file->header.p * (p_iout_file->header.p + 1) / 2))
+ +counter*p_iout_file->header.t)* sizeof(double);
+ return shift;
+}
+
+int Reshuffle::est_beta_shift(int counter){
+ int shift = est_shift(3)+counter*(p_iout_file->header.p-1)*sizeof(double);
+ return shift;
+}
+
+void Reshuffle::write_herest(ifstream& out_file){
+ ofstream txt_est("estimates//estimates.txt");
+ out_file.seekg(herest_startpos, ios_base::beg);
+ if (p_Parameters->heritabilities.value == "all")
+ for(int i=0;i<(*(p_iout_file->labels.trait_names)).size();i++)
+ p_Parameters->heritabilities.valueset.insert(i);
+ txt_est.precision(PRECISION_DOUBLE);
+ txt_est<<"\t";
+ for (set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();trait++)
+ txt_est << (*(p_iout_file->labels.trait_names))[*trait] << "\t";
+ txt_est << endl;
+ list<string> est_names;
+ est_names.insert(est_names.end(), "heritabilities");
+ est_names.insert(est_names.end(), "sigma");
+ est_names.insert(est_names.end(), "res_sigma");
+ double tmp_number = 0;
+ int counter=0;
+ for (list<string>::iterator name = est_names.begin();name != est_names.end(); ++name) {
+ txt_est << *name << "\t";
+ for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+ out_file.seekg(*trait*sizeof(double),ios_base::cur);
+ out_file.read((char *) &tmp_number, sizeof(double));
+ txt_est << tmp_number << "\t";
+ out_file.seekg(est_shift(counter), ios_base::beg);
+ }
+ counter++;
+ txt_est << "\n";
+ out_file.seekg(est_shift(counter), ios_base::beg);
+ }
+ out_file.seekg(est_shift(3), ios_base::beg);
+ counter=0;
+ for (int beta=0;beta<(*(p_iout_file->labels.beta)).size();beta++) {
+ beta++;
+ if (beta != (*(p_iout_file->labels.beta)).size()) {
+ beta--;
+ txt_est << (*(p_iout_file->labels).beta)[beta] << "\t";
+ for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+ out_file.seekg(*trait*sizeof(double),ios_base::cur);
+ out_file.read((char *) &tmp_number, sizeof(double));
+ txt_est << tmp_number << "\t";
+ out_file.seekg(est_beta_shift(counter), ios_base::beg);
+ }
+ counter++;
+ out_file.seekg(est_beta_shift(counter), ios_base::beg);
+ txt_est << "\n";
+ beta++;
+ }
+ }
+}
+
+void Reshuffle::run(){
+ if((*p_Parameters).datadims.use){
+ mkdir("datadims");
+ ofstream datadims("datadims//datadims.txt");
+ write_datadims(datadims);
+ }
+ if((*p_Parameters).snpnames.use){
+ mkdir("snpnames");
+ ofstream snpnames("snpnames//snpnames.txt");
+ write_snpnames(snpnames);
+ }
+
+ if((*p_Parameters).traitnames.use){
+ mkdir("traitnames");
+ ofstream* traitnames = new ofstream("traitnames//traitnames.txt");
+ write_traitnames(*traitnames);
+ delete traitnames;
+ }
+
+ //Open *.out to read data
+ ifstream out_file((*p_Parameters).out_fname.c_str(), ios::binary | ios::in);
+ if (!out_file) {
+ cout << "Error open " << (*p_Parameters).out_fname << " file";
+ cout << "Maybe, file doesn't exist" << endl;
+ exit(1);
+ }
+
+ //If any of parameters traits||snps||chi use, this block fill traits.valueset and snps.valueset
+ //(if their values are default all)
+ if((*p_Parameters).traits.use||(*p_Parameters).snps.use||(*p_Parameters).chi.use){
+
+ if((*p_Parameters).traits.value=="all"||(*p_Parameters).traits.value=="None"){
+ for(int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
+ (*p_Parameters).traits.valueset.insert(i);
+ cout<<"TRAITS VALUE SET CHANGED TO ALL"<<endl;
+ }
+
+ if((*p_Parameters).snps.value=="all"||(*p_Parameters).snps.value=="None"){
+ for(int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
+ (*p_Parameters).snps.valueset.insert(i);
+ cout<<"SNPS VALUE SET CHANGED TO ALL"<<endl;
+ }
+ }
+
+ if(((*p_Parameters).traits.use||(*p_Parameters).snps.use)&&!(*p_Parameters).chi.use){
+ mkdir("data");
+ write_data(out_file);
+ }
+
+ if((*p_Parameters).chi.use&&!(*p_Parameters).dataslim.use){
+ mkdir("chi_data");
+ write_data_chi(out_file);
+
+ }
+
+ if((*p_Parameters).dataslim.use){
+ mkdir("slimdata");
+ write_slim_data(out_file);
+
+ }
+
+ if((*p_Parameters).heritabilities.use){
+ mkdir("estimates");
+ write_herest(out_file);
+ }
+}
Added: pkg/OmicABEL/src/reshuffle/reshuffle.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/reshuffle.h (rev 0)
+++ pkg/OmicABEL/src/reshuffle/reshuffle.h 2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,37 @@
+/*
+ * reshuffle.h
+ *
+ * Created on: 01.03.2013
+ * Author: lima
+ */
+
+#ifndef RESHUFFLE_H_
+#define RESHUFFLE_H_
+#include "Parameters.h"
+#include "iout_file.h"
+
+using namespace std;
+
+class Reshuffle{
+public:
+ iout_file * p_iout_file;
+ Parameters * p_Parameters;
+ int per_trait_per_snp;
+ set<int> traits2write;
+ set<int> snps2write;
+ string create_filename(string, string);
+ void write_datadims(ofstream&);
+ void write_snpnames(ofstream&);
+ void write_traitnames(ofstream&);
+ void write_data(ifstream&);
+ void write_data_chi(ifstream&);
+ void write_slim_data(ifstream&);
+ void write_herest(ifstream&);
+ int herest_startpos;
+ int est_shift(int);
+ int est_beta_shift(int);
+ void run(Parameters);
+ Reshuffle(iout_file &,Parameters &);
+ void run();
+};
+#endif /* RESHUFFLE_H_ */
More information about the Genabel-commits
mailing list