[Genabel-commits] r1114 - in pkg/OmicABEL/src: . reshuffle

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Mar 5 12:52:44 CET 2013


Author: sharapovsodbo
Date: 2013-03-05 12:52:43 +0100 (Tue, 05 Mar 2013)
New Revision: 1114

Added:
   pkg/OmicABEL/src/reshuffle/
   pkg/OmicABEL/src/reshuffle/Parameters.cpp
   pkg/OmicABEL/src/reshuffle/Parameters.h
   pkg/OmicABEL/src/reshuffle/README.txt
   pkg/OmicABEL/src/reshuffle/iout_file.cpp
   pkg/OmicABEL/src/reshuffle/iout_file.h
   pkg/OmicABEL/src/reshuffle/main.cpp
   pkg/OmicABEL/src/reshuffle/reshuffle.cpp
   pkg/OmicABEL/src/reshuffle/reshuffle.h
Log:
ver 0.00 
without tests

Added: pkg/OmicABEL/src/reshuffle/Parameters.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.cpp	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/Parameters.cpp	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,117 @@
+/*
+ * Parameters.cpp
+ *
+ *  Created on: 28.02.2013
+ *      Author: lima
+ */
+#include "Parameters.h"
+#include <stdlib.h>
+#include <string>
+using namespace std;
+
+string sep = "--"; // Set separator for cmdline
+
+//	Convert cmdline to string
+string Parameters::get_cmd_line(int argc, char* argv[]) {
+	string cmd_line = "";
+	for (int i = 1; i < argc; i++) {
+		cmd_line += argv[i];
+	}
+	cmd_line += sep; // Need for cmdline parsing
+	return cmd_line;
+}
+//Default constructor
+Parameter::Parameter() {
+	name = "None";
+	use = false;
+	value = "all";
+}
+
+ostream &operator <<(ostream &os, Parameter par) {
+	os << "PARAMETR" << "\t" << par.name << "\t" << "USE" << "\t" << par.use
+			<< "\t" << "VALUE" << "\t" << par.value;
+	cout<<"\tValue set ";
+	for (set<int>::iterator it= par.valueset.begin();it!=par.valueset.end();it++)
+		os <<*it<<" ";
+	os<<endl;
+	return os;
+}
+
+ostream &operator <<(ostream &os, Parameters par) {
+	os << "IOUT file is "<<par.iout_fname<<endl;
+	os << "OUT file is "<<par.out_fname<<endl;
+	os << par.datadims;
+	os << par.snpnames;
+	os << par.traitnames;
+	os << par.traits;
+	os << par.snps;
+	os << par.heritabilities;
+	os << par.chi;
+	os << par.dataslim;
+	return os;
+}
+
+//Constructor-parser from command line
+Parameter::Parameter(string cmdline, string paramname) {
+
+	name = paramname;
+	int parpos = cmdline.find(name); //	position of substring with param's name
+	if (parpos != string::npos) { //	check  [FOUND OR NOT]
+		string val = "";
+		unsigned int iter = parpos + name.size() + 1; //	iterator, which run from "=" to sep
+		unsigned int seppos = cmdline.find(sep, parpos); // 	separator position
+		while (iter < seppos) {
+			val += cmdline.at(iter); // build value of parameter
+			iter++;
+		}
+		use = true;
+		if (val.size() != 0) {
+			value = val;
+			string val_d=val+",";
+			string str_tmp="";
+			for(unsigned int i=0;i<val_d.size();i++){
+				if(val_d[i]!=','){
+				str_tmp+=val_d[i];
+				}else {
+					string str2="";
+					int postire=str_tmp.find("-");
+					if(postire!=string::npos){
+						string start="";
+						string end="";
+						for(int i=0;i<postire;i++){
+							start+=str_tmp[i];
+						}
+						for(int i=(postire+1);i<str_tmp.size();i++){
+							end+=str_tmp[i];
+						}
+						for(int i=atoi(start.c_str())-1;i<atoi(end.c_str());i++){
+							valueset.insert(i);
+						}
+					}else valueset.insert(atoi(str_tmp.c_str())-1);
+					str_tmp="";
+				}
+			}
+		} else
+			value = "all"; // default value for parameters is "all"
+	} else { // If parameter aren't in cmdline
+		use = false;
+		value = "None";
+	}
+}
+
+//	Constructor, which gets info about all parameters from cmdline
+Parameters::Parameters(int argc,char* argv[]) {
+	string cmdline=get_cmd_line(argc,argv);
+	unsigned int seppos = cmdline.find(sep); // 	first separator's position
+	string filename = cmdline.substr(0, seppos);
+	iout_fname = filename + ".iout";
+	out_fname = filename + ".out";
+	datadims = Parameter(cmdline, "datadims");
+	snpnames = Parameter(cmdline, "snpnames");
+	traitnames = Parameter(cmdline, "traitnames");
+	traits = Parameter(cmdline, "traits");
+	snps = Parameter(cmdline, "snps");
+	heritabilities = Parameter(cmdline, "heritabilities");
+	chi = Parameter(cmdline, "chi");
+	dataslim= Parameter(cmdline, "dataslim");
+}

Added: pkg/OmicABEL/src/reshuffle/Parameters.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.h	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/Parameters.h	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,48 @@
+/*
+ * Parameters.h
+ *
+ *  Created on: 28.02.2013
+ *      Author: lima
+ */
+
+#ifndef PARAMETERS_H_
+#define PARAMETERS_H_
+
+#include <iostream>
+#include <string>
+#include <set>
+using namespace std;
+
+class Parameter {
+
+public:
+	string name; 	//name of parametr
+	bool use; 	// Parameter use or not (Is paameter in command line?)
+	string value; 	// value of parametr,chars after "=" symbol
+	Parameter(string,string); 	//constructor
+	Parameter();		//default constructor
+	set<int> valueset;
+};
+
+ostream &operator <<(ostream &, Parameter);
+
+class Parameters {
+public:
+	string iout_fname;
+	string out_fname;
+	Parameter datadims;
+	Parameter snpnames;
+	Parameter traitnames;
+	Parameter traits;
+	Parameter snps;
+	Parameter heritabilities;
+	Parameter chi;
+	Parameter dataslim;
+	Parameters(int, char*[]);		//	Constructor from cmdline
+	string get_cmd_line(int,char*[]);
+
+};
+
+ostream &operator <<(ostream &, Parameters);
+
+#endif /* PARAMETERS_H_ */

Added: pkg/OmicABEL/src/reshuffle/README.txt
===================================================================
--- pkg/OmicABEL/src/reshuffle/README.txt	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/README.txt	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,60 @@
+Reshuffle ver 0.000
+
+Parameters:
+
+For the first it should be file name withiut iout and out extensions:
+	(--B_eigen_1000) 
+	if your files are B_eigen.iout and B_eigen.out
+
+Data dimensions 
+	(--datadims)  Gives back t, m, p
+output: "datadims//datadims.txt"
+
+SNP names
+	default: (--snpnames) : all
+	by index (--snpnames=27) : name of snp#27
+	by index range, combination (--snpnames=27,2-12,4-20) : name of snp #2-20,27
+output: "snpnames//snpnames.txt"
+	
+Trait names 
+	default (--traitnames): all
+	by index (--traitnames=27) : name of trait #27
+	by index range, combination (--traitnames=27,2-12,15)
+output: "traitnames//traitnames.txt"
+
+Heritabilities, sigma, res_sigma, estimates
+	Default: (--heritabilities) : all traits
+	Reange,indexes (--heritabilities=1-10,4,5-12) : for traits #1-12
+output: "estimates//estimates.txt"	
+Results - association
+	by SNP
+		Dafault (--snp) : for all snp for all traits
+		indexes, ranges (--snp=12,100-1000,500-10000,22000) :  for all traits
+	output: "data//[trait].txt"
+	by trait
+		Dafault (--trait) : for all snp for all traits
+		indexes,range (--trait=1-10,12) : for snp #12 for all traits
+	output: "data//[trait].txt"
+	Chi2 more than some threshold
+		(--chi=20) for snps,which chi>20 for all traits
+	output: "chi_data//[trait].txt"
+	All combinations of traits,snps, Chi2
+		(--traits=1-2--snps=1-1000--chi=15) for traits #1-2 for snps#1-1000, which chi>15
+		output: "chi_data//[trait].txt"
+		
+Just write data with chi
+	(--chi) : write data(with chi column) for all snps for all traits
+	all combinations are supported
+	output: "chi_data//[trait].txt"
+Dataslim
+	Creating a sub-matrix by Chi2>X (--chi=X--dataslim)
+	output: "slim_data//slim_[trait].txt"
+
+Examples:
+Reshuffle.exe B_1112_NA_clear_RNA_nocovar --traitnames=1-4,1--snpnames=1-10,20-30,46--traits=1--snps=1-100
+
+outputs from B_1112_NA_clear_RNA_nocovar.iout and B_1112_NA_clear_RNA_nocovar.out:
+
+	traitnames//traitnames.txt : with trait's names hgta,tga,tca,ldla
+	snpnames//snpnames.txt Names of snps #1-10,20-30,46
+	data//trait_hgta.txt : result of association for trait "hgta" for snps #1-100
\ No newline at end of file

Added: pkg/OmicABEL/src/reshuffle/iout_file.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/iout_file.cpp	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/iout_file.cpp	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,93 @@
+/*
+ * iout_file.cpp
+ *
+ *  Created on: 28.02.2013
+ *      Author: lima
+ */
+#include "iout_file.h"
+
+iout_file::iout_file(Parameters& Params){
+	//Fill iout_header
+	ifstream iout_f(Params.iout_fname.c_str(), ios::binary | ios::in);
+	if (!iout_f) {
+		cout << "Error opening " << Params.iout_fname << " file" << endl;
+		cout << "Maybe, file doesn't exist" << endl;
+		exit(1);
+	}
+	iout_f.read((char *)&header.type, sizeof(int));
+	iout_f.read((char *)&header.nbytes, sizeof(int));
+	iout_f.read((char *)&header.p, sizeof(int));
+	iout_f.read((char *)&header.m, sizeof(int));
+	iout_f.read((char *)&header.t, sizeof(int));
+	iout_f.read((char *)&header.tile_m, sizeof(int));
+	iout_f.read((char *)&header.tile_t, sizeof(int));
+	iout_f.read((char *)&header.namelength, sizeof(int));
+
+	//Fill labels_data
+	iout_f.seekg(sizeof(header), ios_base::beg); //Change position of read after iout_header
+	labels.number =(header.p * 2 + header.m + header.t + header.p * (header.p - 1) / 2);
+	char *labels_char = new char[labels.number * header.namelength];
+	iout_f.read(labels_char, labels.number * header.namelength);
+	int beta_end = header.p;
+	int se_end = beta_end+header.p;
+	int cov_end = se_end+header.p * (header.p - 1) / 2;
+	int snp_end = cov_end+header.m;
+	int traits_end = snp_end+header.t;
+	int i=0;
+
+	labels.beta=new vector<string>;
+	labels.se=new vector<string>;
+	labels.cov=new vector<string>;
+	labels.snp_names=new vector<string>;
+	labels.trait_names=new vector<string>;
+
+	for (; i < beta_end; i++)
+		(*labels.beta).push_back(string(labels_char + i * header.namelength));
+	for (; i < se_end; i++)
+		(*labels.se).push_back(string(labels_char + i * header.namelength));
+	for (; i < cov_end;i++)
+		(*labels.cov).push_back(string(labels_char + i * header.namelength));
+	for (; i < snp_end; i++)
+		(*labels.snp_names).push_back(string(labels_char + i * header.namelength));
+	for (; i< traits_end; i++)
+		(*labels.trait_names).push_back(string(labels_char + i * header.namelength));
+
+	delete labels_char;
+	iout_f.close();
+}
+
+//Overloading operator cout for iout_header
+ostream &operator <<(ostream &os,iout_header header) {
+	os << "IOUT_HEADER [ type: " << header.type << "]" << endl;
+	os << "IOUT_HEADER [ nbytes: " << header.nbytes << "]" << endl;
+	os << "IOUT_HEADER [ p: " << header.p << "]" << endl;
+	os << "IOUT_HEADER [ m: " << header.m << "]" << endl;
+	os << "IOUT_HEADER [ t: " << header.t << "]" << endl;
+	os << "IOUT_HEADER [ tile_m: " << header.tile_m << "]" << endl;
+	os << "IOUT_HEADER [ tile_t: " << header.tile_t << "]" << endl;
+	os << "IOUT_HEADER [ namelength: " << header.namelength << "]" << endl;
+
+	return os;
+}
+
+ostream &operator <<(ostream &os,labels_data labels) {
+	os <<"NUMBER OF BETAS\t"<<(*labels.beta).size()<<endl;
+	os <<"NUMBER OF SE\t"<<(*labels.se).size()<<endl;
+	os <<"NUMBER OF COVS\t"<<(*labels.cov).size()<<endl;
+	os <<"NUMBER OF SNPNAMES\t"<<(*labels.snp_names).size()<<endl;
+	os <<"NUMBER OF TRAITNAMES\t"<<(*labels.trait_names).size()<<endl;
+}
+
+int iout_file::tilecoordinates(int traitNo, int snpNo) {
+	int tileCoor = 0;
+	int t_tile = traitNo / header.tile_t;
+	int t_off = traitNo % header.tile_t;
+	int m_tile = snpNo / header.tile_m;
+	int m_off = snpNo % header.tile_m;
+	int per_trait_per_snp = header.p + header.p * (header.p + 1) / 2;
+	tileCoor = (t_tile * (header.m * header.tile_t)
+			+ m_tile* (header.tile_m* min(header.tile_t,header.t - header.tile_t * t_tile))
+			+ t_off * (min(header.tile_m, header.m - header.tile_m * m_tile))
+			+ m_off) * per_trait_per_snp * sizeof(double);
+	return tileCoor;
+}

Added: pkg/OmicABEL/src/reshuffle/iout_file.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/iout_file.h	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/iout_file.h	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,58 @@
+/*
+ * iout_file.h
+ *
+ *  Created on: 28.02.2013
+ *      Author: lima
+ */
+
+#ifndef IOUT_FILE_H_
+#define IOUT_FILE_H_
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include "Parameters.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+
+using namespace std;
+
+//Contain meta data
+class iout_header {
+public:
+	int type; // Double -> 6 as in Databel
+	int nbytes; // Double -> 8
+	int p; // # of covariates + 2 (intercept and SNP)
+	int m; // # of SNPs
+	int t; // # of traits
+	int tile_m; // More on these two later
+	int tile_t;
+	int namelength; // 32 as in Databel
+
+};
+
+//Overloading "cout" operator
+ostream &operator <<(ostream &,iout_header);
+
+//Contain labels
+class labels_data {
+public:
+	int number; //number of labels
+	vector<string>* beta;
+	vector<string>* se;
+	vector<string>* cov;
+	vector<string>* snp_names;
+	vector<string>* trait_names;
+};
+
+ostream &operator <<(ostream &,labels_data);
+
+class iout_file{
+public:
+	iout_header header;
+	labels_data labels;
+	iout_file(Parameters &);
+	int tilecoordinates(int, int);
+};
+#endif /* IOUT_FILE_H_ */

Added: pkg/OmicABEL/src/reshuffle/main.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/main.cpp	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/main.cpp	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,29 @@
+/*
+ * main.cpp
+ *
+ *  Created on: 31.01.2013
+ *      Author: Ñîäáî
+ */
+#include <iostream>
+#include <ctime>
+#include "iout_file.h"
+#include "Parameters.h"
+#include "Reshuffle.h"
+#include <iterator>
+
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+	cout << "Every day I'm [re]shuffling!" << endl;
+	Parameters Params(argc, argv);
+	cout << Params;
+	iout_file iout_F(Params);
+	cout << "finish iout_file read " << double(clock()) / CLOCKS_PER_SEC << endl;
+	cout<<iout_F.header;
+	cout<<iout_F.labels;
+	Reshuffle reshh(iout_F,Params);
+	reshh.run();
+	cout << "finish_reshuffling " << double(clock()) / CLOCKS_PER_SEC << endl;
+	return 0;
+}

Added: pkg/OmicABEL/src/reshuffle/reshuffle.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/reshuffle.cpp	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/reshuffle.cpp	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,349 @@
+/*
+ * reshuffle.cpp
+ *
+ *  Created on: 01.03.2013
+ *      Author: lima
+ */
+#include "reshuffle.h"
+#include <dir.h>
+#include <ctime>
+#include <math.h>
+#include <iterator>
+#include <list>
+
+using namespace std;
+
+#define PRECISION_DOUBLE 15//Precision of double
+
+Reshuffle::Reshuffle(iout_file &iout,Parameters &Params){
+	p_iout_file = &iout;
+	p_Parameters = &Params;
+	per_trait_per_snp = (*p_iout_file).header.p + (*p_iout_file).header.p * ((*p_iout_file).header.p + 1) / 2;
+	herest_startpos = (p_iout_file->header.m * p_iout_file->header.t*
+			(p_iout_file->header.p + p_iout_file->header.p * (p_iout_file->header.p + 1) / 2))
+					* sizeof(double);
+}
+
+string Reshuffle::create_filename(string prename, string name) {
+	string path = prename + "_"+name+".txt";
+	return path;
+}
+
+void Reshuffle::write_datadims(ofstream& txt_datadims){
+
+	txt_datadims << "Number of traits\t" << (*p_iout_file).header.t << endl;
+	txt_datadims << "Number of SNP\t" << (*p_iout_file).header.m << endl;
+	txt_datadims << "Number of covariates\t" << ((*p_iout_file).header.p - 2);
+}
+
+void Reshuffle::write_snpnames(ofstream& txt_snpnames){
+
+	if ((*p_Parameters).snpnames.value == "all"){
+		for (int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
+			(*p_Parameters).snpnames.valueset.insert(i);
+		cout<<"SNPNAMES VALUE SET CHANGED TO ALL"<<endl;
+	}
+	for(set<int>::iterator it= (*p_Parameters).snpnames.valueset.begin();it!=(*p_Parameters).snpnames.valueset.end();++it)
+		txt_snpnames << "SNP #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.snp_names)[*it]<<endl;
+	cout<<"END WRITE SNPNAMES"<<endl;
+}
+
+void Reshuffle::write_traitnames(ofstream& txt_traitnames){
+
+	if ((*p_Parameters).traitnames.value == "all"){
+		for (int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
+			(*p_Parameters).traitnames.valueset.insert(i);
+		cout<<"TRAITNAMES VALUE SET CHANGED TO ALL"<<endl;
+	}
+	for(std::set<int>::iterator it= (*p_Parameters).traitnames.valueset.begin();it!=p_Parameters->traitnames.valueset.end();++it)
+		txt_traitnames<<"TRAIT #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.trait_names)[*it]<<endl;
+	cout<<"END WRITE TRAITNAMES"<<endl;
+}
+
+void Reshuffle::write_data(ifstream& out_file){
+	out_file.seekg(0, ios_base::beg);
+	cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+		ofstream txt_trait(create_filename("data//trait", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+		//Set precision of double
+		cout<<(*(*p_iout_file).labels.trait_names)[*trait]<<endl;
+		txt_trait.precision(PRECISION_DOUBLE);
+		txt_trait << "\t";
+		for (int beta = 0;	beta < (*(*p_iout_file).labels.beta).size(); beta++)
+			txt_trait << (*(*p_iout_file).labels.beta)[beta] << "\t";
+		for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+			txt_trait << (*(*p_iout_file).labels.se)[se] << "\t";
+		for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+			txt_trait << (*(*p_iout_file).labels.cov)[cov] << "\t";
+		txt_trait << endl;
+		cout << "endwritetrait_colnames" << *trait << clock() << endl;
+		double* buf = new double[per_trait_per_snp];
+		int oldPos = 0;
+		char s[30];
+		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+			txt_trait << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+			//cout << oldPos << "-" << pos << endl;
+			if(pos != oldPos)
+			{
+				out_file.seekg(pos,ios_base::beg);
+			}
+			oldPos=pos+sizeof(double)*per_trait_per_snp;
+			out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+			for (int i = 0; i < per_trait_per_snp; i++) {
+				sprintf(s, "%.15g", buf[i]);
+				txt_trait << s << "\t";
+			}
+			txt_trait << endl;
+		}
+		delete buf;
+		txt_trait.close();
+		cout << "endwritetrait " << (*(*p_iout_file).labels.trait_names)[*trait] << " "<< double(clock()) / CLOCKS_PER_SEC << endl;
+	}
+	cout << "finishwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+}
+
+void Reshuffle::write_data_chi(ifstream& out_file){
+	out_file.seekg(0, ios_base::beg);
+	double chi = 0;
+	double CheckChi = (*p_Parameters).chi.value == "all" ? -1.0 : atof((*p_Parameters).chi.value.c_str());
+	cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+		ofstream txt_chi(create_filename("chi_data//chi", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+		//Set precision of double
+		txt_chi.precision(PRECISION_DOUBLE);
+		txt_chi << "\t";
+		for (int beta = 0;	beta < (*(*p_iout_file).labels.beta).size(); beta++)
+			txt_chi << (*(*p_iout_file).labels.beta)[beta] << "\t";
+		for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+			txt_chi << (*(*p_iout_file).labels.se)[se] << "\t";
+		for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+			txt_chi << (*(*p_iout_file).labels.cov)[cov] << "\t";
+		txt_chi << "Chi2" << endl;
+		double* buf = new double[per_trait_per_snp];
+		int oldPos = 0;
+		char s[30];
+		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+			//cout << oldPos << "-" << pos << endl;
+			if(pos != oldPos)
+			{
+				out_file.seekg(pos,ios_base::beg);
+			}
+			oldPos=pos+sizeof(double)*per_trait_per_snp;
+			out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+			chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+			if(chi>CheckChi){
+				txt_chi << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+				for (int i = 0; i < per_trait_per_snp; i++) {
+					sprintf(s, "%.15g", buf[i]);
+					txt_chi << s << "\t";
+				}
+				txt_chi << chi << endl;
+			}
+		}
+		delete buf;
+		txt_chi.close();
+		cout << "endwritechitrait " << (*(*p_iout_file).labels.trait_names)[*trait] << " "<< double(clock()) / CLOCKS_PER_SEC << endl;
+	}
+	cout << "finishwritechitxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
+}
+
+void Reshuffle::write_slim_data(ifstream& out_file){
+	out_file.seekg(0, ios_base::beg);
+	set<int> goodtraits;
+	set<int> goodsnps;
+	double chi = 0;
+	if((*p_Parameters).chi.value=="all"||(*p_Parameters).chi.value=="None"){
+		cout << "ERROR" << "Chi value doesn't set"<<endl;
+		cout << "Please, set Chi value to get slim data" << endl;
+		exit(1);
+	}
+	double CheckChi = atof((*p_Parameters).chi.value.c_str());
+	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+		double* buf = new double[per_trait_per_snp];
+		int oldPos = 0;
+		char s[30];
+		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+			//cout << oldPos << "-" << pos << endl;
+			if(pos != oldPos)
+			{
+				out_file.seekg(pos,ios_base::beg);
+			}
+			oldPos=pos+sizeof(double)*per_trait_per_snp;
+			out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+			chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+			if(chi>CheckChi){
+				goodtraits.insert(*trait);
+				goodsnps.insert(*snp);
+			}
+		}
+		delete buf;
+	}
+	out_file.seekg(0, ios_base::beg);
+	for (set<int>::iterator trait= goodtraits.begin();trait!=goodtraits.end();trait++) {
+		ofstream txt_slim(create_filename("slimdata//slim", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
+		//Set precision of double
+		txt_slim.precision(PRECISION_DOUBLE);
+		txt_slim << "\t";
+		for (int beta = 0;	beta < (*(*p_iout_file).labels.beta).size(); beta++)
+			txt_slim << (*(*p_iout_file).labels.beta)[beta] << "\t";
+		for (int se = 0;se < (*(*p_iout_file).labels.se).size(); se++)
+			txt_slim << (*(*p_iout_file).labels.se)[se] << "\t";
+		for (int cov = 0;cov < (*(*p_iout_file).labels.cov).size(); cov++)
+			txt_slim << (*(*p_iout_file).labels.cov)[cov] << "\t";
+		txt_slim << "Chi2" << endl;
+		double* buf = new double[per_trait_per_snp];
+		int oldPos = 0;
+		char s[30];
+		for (set<int>::iterator snp= goodsnps.begin();snp!=goodsnps.end();snp++) {
+			txt_slim << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
+			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
+			//cout << oldPos << "-" << pos << endl;
+			if(pos != oldPos)
+			{
+				out_file.seekg(pos,ios_base::beg);
+			}
+			oldPos=pos+sizeof(double)*per_trait_per_snp;
+			out_file.read((char *)buf, sizeof(double)*per_trait_per_snp);
+			chi=pow((buf[(*(*p_iout_file).labels.beta).size()-1]/buf[(*(*p_iout_file).labels.beta).size()+(*(*p_iout_file).labels.se).size()-1]),2);
+			for (int i = 0; i < per_trait_per_snp; i++) {
+				sprintf(s, "%.15g", buf[i]);
+				txt_slim << s << "\t";
+			}
+			txt_slim << chi << endl;
+		}
+		delete buf;
+		txt_slim.close();
+	}
+}
+
+int Reshuffle::est_shift(int counter){
+	int shift = ((p_iout_file->header.m * p_iout_file->header.t* (p_iout_file->header.p +
+			p_iout_file->header.p * (p_iout_file->header.p + 1) / 2))
+			+counter*p_iout_file->header.t)* sizeof(double);
+	return shift;
+}
+
+int Reshuffle::est_beta_shift(int counter){
+	int shift = est_shift(3)+counter*(p_iout_file->header.p-1)*sizeof(double);
+	return shift;
+}
+
+void Reshuffle::write_herest(ifstream& out_file){
+	ofstream txt_est("estimates//estimates.txt");
+	out_file.seekg(herest_startpos, ios_base::beg);
+	if (p_Parameters->heritabilities.value == "all")
+		for(int i=0;i<(*(p_iout_file->labels.trait_names)).size();i++)
+			p_Parameters->heritabilities.valueset.insert(i);
+	txt_est.precision(PRECISION_DOUBLE);
+	txt_est<<"\t";
+	for (set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();trait++)
+		txt_est << (*(p_iout_file->labels.trait_names))[*trait] << "\t";
+	txt_est << endl;
+	list<string> est_names;
+	est_names.insert(est_names.end(), "heritabilities");
+	est_names.insert(est_names.end(), "sigma");
+	est_names.insert(est_names.end(), "res_sigma");
+	double tmp_number = 0;
+	int counter=0;
+	for (list<string>::iterator name = est_names.begin();name != est_names.end(); ++name) {
+		txt_est << *name << "\t";
+		for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+			out_file.seekg(*trait*sizeof(double),ios_base::cur);
+			out_file.read((char *) &tmp_number, sizeof(double));
+			txt_est << tmp_number << "\t";
+			out_file.seekg(est_shift(counter), ios_base::beg);
+		}
+		counter++;
+		txt_est << "\n";
+		out_file.seekg(est_shift(counter), ios_base::beg);
+	}
+	out_file.seekg(est_shift(3), ios_base::beg);
+	counter=0;
+	for (int beta=0;beta<(*(p_iout_file->labels.beta)).size();beta++) {
+		beta++;
+		if (beta != (*(p_iout_file->labels.beta)).size()) {
+			beta--;
+			txt_est << (*(p_iout_file->labels).beta)[beta] << "\t";
+			for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+				out_file.seekg(*trait*sizeof(double),ios_base::cur);
+				out_file.read((char *) &tmp_number, sizeof(double));
+				txt_est << tmp_number << "\t";
+				out_file.seekg(est_beta_shift(counter), ios_base::beg);
+			}
+			counter++;
+			out_file.seekg(est_beta_shift(counter), ios_base::beg);
+			txt_est << "\n";
+			beta++;
+		}
+	}
+}
+
+void Reshuffle::run(){
+	if((*p_Parameters).datadims.use){
+		mkdir("datadims");
+		ofstream datadims("datadims//datadims.txt");
+		write_datadims(datadims);
+	}
+	if((*p_Parameters).snpnames.use){
+		mkdir("snpnames");
+		ofstream snpnames("snpnames//snpnames.txt");
+		write_snpnames(snpnames);
+	}
+
+	if((*p_Parameters).traitnames.use){
+		mkdir("traitnames");
+		ofstream* traitnames = new ofstream("traitnames//traitnames.txt");
+		write_traitnames(*traitnames);
+		delete traitnames;
+	}
+
+	//Open *.out to read data
+	ifstream out_file((*p_Parameters).out_fname.c_str(), ios::binary | ios::in);
+	if (!out_file) {
+		cout << "Error open " << (*p_Parameters).out_fname << " file";
+		cout << "Maybe, file doesn't exist" << endl;
+		exit(1);
+	}
+
+	//If any of parameters traits||snps||chi use, this block fill traits.valueset and snps.valueset
+	//(if their values are default all)
+	if((*p_Parameters).traits.use||(*p_Parameters).snps.use||(*p_Parameters).chi.use){
+
+			if((*p_Parameters).traits.value=="all"||(*p_Parameters).traits.value=="None"){
+				for(int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
+				(*p_Parameters).traits.valueset.insert(i);
+			cout<<"TRAITS VALUE SET CHANGED TO ALL"<<endl;
+		}
+
+		if((*p_Parameters).snps.value=="all"||(*p_Parameters).snps.value=="None"){
+			for(int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
+				(*p_Parameters).snps.valueset.insert(i);
+			cout<<"SNPS VALUE SET CHANGED TO ALL"<<endl;
+		}
+	}
+
+	if(((*p_Parameters).traits.use||(*p_Parameters).snps.use)&&!(*p_Parameters).chi.use){
+		mkdir("data");
+		write_data(out_file);
+	}
+
+	if((*p_Parameters).chi.use&&!(*p_Parameters).dataslim.use){
+		mkdir("chi_data");
+		write_data_chi(out_file);
+
+	}
+
+	if((*p_Parameters).dataslim.use){
+		mkdir("slimdata");
+		write_slim_data(out_file);
+
+	}
+
+	if((*p_Parameters).heritabilities.use){
+		mkdir("estimates");
+		write_herest(out_file);
+	}
+}

Added: pkg/OmicABEL/src/reshuffle/reshuffle.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/reshuffle.h	                        (rev 0)
+++ pkg/OmicABEL/src/reshuffle/reshuffle.h	2013-03-05 11:52:43 UTC (rev 1114)
@@ -0,0 +1,37 @@
+/*
+ * reshuffle.h
+ *
+ *  Created on: 01.03.2013
+ *      Author: lima
+ */
+
+#ifndef RESHUFFLE_H_
+#define RESHUFFLE_H_
+#include "Parameters.h"
+#include "iout_file.h"
+
+using namespace std;
+
+class Reshuffle{
+public:
+	iout_file * p_iout_file;
+	Parameters * p_Parameters;
+	int per_trait_per_snp;
+	set<int> traits2write;
+	set<int> snps2write;
+	string create_filename(string, string);
+	void write_datadims(ofstream&);
+	void write_snpnames(ofstream&);
+	void write_traitnames(ofstream&);
+	void write_data(ifstream&);
+	void write_data_chi(ifstream&);
+	void write_slim_data(ifstream&);
+	void write_herest(ifstream&);
+	int herest_startpos;
+	int est_shift(int);
+	int est_beta_shift(int);
+	void run(Parameters);
+	Reshuffle(iout_file &,Parameters &);
+	void run();
+};
+#endif /* RESHUFFLE_H_ */



More information about the Genabel-commits mailing list