[Genabel-commits] r1128 - pkg/OmicABEL/src/reshuffle

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Mar 11 16:25:47 CET 2013


Author: sharapovsodbo
Date: 2013-03-11 16:25:47 +0100 (Mon, 11 Mar 2013)
New Revision: 1128

Modified:
   pkg/OmicABEL/src/reshuffle/DONE.txt
   pkg/OmicABEL/src/reshuffle/Parameters.cpp
   pkg/OmicABEL/src/reshuffle/Parameters.h
   pkg/OmicABEL/src/reshuffle/README.txt
   pkg/OmicABEL/src/reshuffle/main.cpp
   pkg/OmicABEL/src/reshuffle/reshuffle.cpp
   pkg/OmicABEL/src/reshuffle/test.cpp
   pkg/OmicABEL/src/reshuffle/test.h
Log:
ver 0.003
all parameters from specification complete (instead of DatABEL output)
README.txt updated 

Modified: pkg/OmicABEL/src/reshuffle/DONE.txt
===================================================================
--- pkg/OmicABEL/src/reshuffle/DONE.txt	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/DONE.txt	2013-03-11 15:25:47 UTC (rev 1128)
@@ -14,4 +14,15 @@
 		heritabilities->estimates.txt
 		dataslim,chi->slim_data.txt
 		
+ver 0.003
+	bug in traitsbynames test fixed
+	Write data by trait's names
+	Write data by snp's names
+	Write estimates by trait's names
+	All combinations with this parameters
+	Write data by snp's name and snps around it 
+	Write data by regexp in the begining of trait's	names
+	
+	
+		
 		
\ No newline at end of file

Modified: pkg/OmicABEL/src/reshuffle/Parameters.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.cpp	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/Parameters.cpp	2013-03-11 15:25:47 UTC (rev 1128)
@@ -31,9 +31,12 @@
 ostream &operator <<(ostream &os, Parameter par) {
 	os << "PARAMETR" << "\t" << par.name << "\t" << "USE" << "\t" << par.use
 			<< "\t" << "VALUE" << "\t" << par.value;
-	cout<<"\tValue set ";
-	for (set<int>::iterator it= par.valueset.begin();it!=par.valueset.end();it++)
+	cout<<"\tNumbers set ";
+	for (set<int>::iterator it= par.numbersset.begin();it!=par.numbersset.end();it++)
 		os <<*it<<" ";
+	cout<<"\tNames set ";
+	for (set<string>::iterator it= par.namesset.begin();it!=par.namesset.end();it++)
+		os <<*it<<" ";
 	os<<endl;
 	return os;
 }
@@ -90,10 +93,13 @@
 							end+=str_tmp[i];
 						}
 						for(int i=atoi(start.c_str())-1;i<atoi(end.c_str());i++){
-							valueset.insert(i);
+							numbersset.insert(i);
 						}
-					}else if(atoi(str_tmp.c_str())!=0)
-						valueset.insert(atoi(str_tmp.c_str())-1);
+					}else if(atoi(str_tmp.c_str())!=0){
+						numbersset.insert(atoi(str_tmp.c_str())-1);
+					}else{
+						namesset.insert(str_tmp);
+					}
 
 					str_tmp="";
 				}
@@ -145,3 +151,66 @@
 	test= Parameter(cmdline, "test");
 	cmdline=test.delfromcmdline(cmdline);
 }
+
+void Parameter::setbynames(vector<string> names){
+	int find=0;
+	int before=0;
+	int after=0;
+	string regexp="";
+	if(name=="snps"){
+		for (set<string>::iterator nameit=namesset.begin();nameit!=namesset.end();++nameit){
+			string tmp=*nameit;
+			if(tmp.find("before")!=string::npos){
+				tmp.erase(0,7);
+				before = atoi(tmp.c_str());
+			}
+			if(tmp.find("after")!=string::npos){
+				tmp.erase(0,6);
+				after = atoi(tmp.c_str());
+			}
+			if(((*nameit).find("before")==string::npos)&&((*nameit).find("after")==string::npos)){
+				for(int i=0;i<names.size();i++){
+					if(names[i]==*nameit){
+						for(int k=(i-before);k<(i+after+1);k++){
+							numbersset.insert(k);
+						}
+						find=1;
+						break;
+					}
+				}
+				if(find==1)
+					continue;
+			}
+		}
+	}
+	if(name=="traits"||name=="heritabilities"){
+		for (set<string>::iterator nameit=namesset.begin();nameit!=namesset.end();++nameit){
+			string tmp=*nameit;
+			if(tmp.find("regexp")!=string::npos){
+				tmp.erase(0,7);
+				regexp = tmp;
+				cout<<"REGEXP="<<regexp<<endl;
+			}
+		}
+			for (set<string>::iterator nameit=namesset.begin();nameit!=namesset.end();++nameit){
+				if((*nameit).find("regexp")==string::npos){
+					for(int i=0;i<names.size();i++){
+						if(names[i]==*nameit){
+							numbersset.insert(i);
+							find=1;
+							break;
+						}
+					}
+					if(find==1)
+						continue;
+				}else{
+					for(int i=0;i<names.size();i++){
+						if((names[i]).find(regexp)==0){
+							numbersset.insert(i);
+							find=1;
+						}
+					}
+				}
+			}
+		}
+}

Modified: pkg/OmicABEL/src/reshuffle/Parameters.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/Parameters.h	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/Parameters.h	2013-03-11 15:25:47 UTC (rev 1128)
@@ -11,6 +11,7 @@
 #include <iostream>
 #include <string>
 #include <set>
+#include <vector>
 using namespace std;
 
 class Parameter {
@@ -21,8 +22,10 @@
 	string value; 	// value of parametr,chars after "=" symbol
 	Parameter(string,string); 	//constructor
 	Parameter();		//default constructor
-	set<int> valueset;
+	set<int> numbersset;
+	set<string> namesset;
 	string delfromcmdline(string);
+	void setbynames(vector<string>);
 };
 
 ostream &operator <<(ostream &, Parameter);

Modified: pkg/OmicABEL/src/reshuffle/README.txt
===================================================================
--- pkg/OmicABEL/src/reshuffle/README.txt	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/README.txt	2013-03-11 15:25:47 UTC (rev 1128)
@@ -8,57 +8,72 @@
 
 Data dimensions 
 	(--datadims)  Gives back t, m, p
-output: "datadims//datadims.txt"
+output: "datadims.txt"
 
 SNP names
 	default: (--snpnames) : all
 	by index (--snpnames=27) : name of snp#27
 	by index range, combination (--snpnames=27,2-12,4-20) : name of snp #2-20,27
-output: "snpnames//snpnames.txt"
+output: "snpnames.txt"
 	
 Trait names 
 	default (--traitnames): all
 	by index (--traitnames=27) : name of trait #27
 	by index range, combination (--traitnames=27,2-12,15)
-output: "traitnames//traitnames.txt"
+output: "traitnames.txt"
 
 Heritabilities, sigma, res_sigma, estimates
 	Default: (--heritabilities) : all traits
 	Reange,indexes (--heritabilities=1-10,4,5-12) : for traits #1-12
-output: "estimates//estimates.txt"	
+	trait's names (--heritabilities=hgta,hdla) : for traits hgta,hdla
+	regexp in trait's names (--heritabilities=regexp=t) : for traits, in the start of which is "t" : tga,tca (in data_4test)
+	all combinations (--heritabilities=1-10,4,5-12,hgta,lipid_tca,regexp=lipid_)
+output: "estimates.txt"	
 Results - association
 	by SNP
 		Dafault (--snp) : for all snp for all traits
 		indexes, ranges (--snp=12,100-1000,500-10000,22000) :  for all traits
-	output: "data//[trait].txt"
+		names (--snp=rs3121561,rs6687776) : for snps rs3121561,rs6687776
+		names+around (--snp=rs3121561,rs6687776,before=10,after=15)
+			for  (-10,9,...,-1,rs3121561,1,2,...,15,16) + (-10,9,...,-1,rs6687776,1,2,...,15,16)
+		indexes+names+focus (--snp=1-20,50,rs3121561,rs6687776,before=10,after=15)
+			for snps : #1-20,50 + (-10,9,...,-1,rs324001,1,2,...,15,16) + (-10,9,...,-1,rs123123,1,2,...,15,16)
+			NB!: focus only for snp's names, not for indexes
+	output: "data.txt"
 	by trait
 		Dafault (--trait) : for all snp for all traits
 		indexes,range (--trait=1-10,12) : for snp #12 for all traits
-	output: "data//[trait].txt"
+		by names (--traits=tca,hdla) : for traits tca,hdla
+		regexp in trait's names (traits=regexp=lipids_) : for all traits, which name in the beginig has "lipids_"
+		all combinations
+	output: "data.txt"
 	Chi2 more than some threshold
 		(--chi=20) for snps,which chi>20 for all traits
-	output: "chi_data//[trait].txt"
+	output: "chi_data.txt"
 	All combinations of traits,snps, Chi2
 		(--traits=1-2--snps=1-1000--chi=15) for traits #1-2 for snps#1-1000, which chi>15
-		output: "chi_data//[trait].txt"
+		output: "chi_data.txt"
 		
 Just write data with chi
 	(--chi) : write data(with chi column) for all snps for all traits
 	all combinations are supported
-	output: "chi_data//[trait].txt"
+	output: "chi_data.txt"
 Dataslim
 	Creating a sub-matrix by Chi2>X (--chi=X--dataslim)
-	output: "slim_data//slim_[trait].txt"
+	output: "slim_data.txt"
 	
 Test
 	(--test) run all tests
 	output: test.txt
+	
+Info 
+	(--info) write some information about programm's run
 
 Examples:
 Reshuffle.exe B_1112_NA_clear_RNA_nocovar --traitnames=1-4,1--snpnames=1-10,20-30,46--traits=1--snps=1-100
 
 outputs from B_1112_NA_clear_RNA_nocovar.iout and B_1112_NA_clear_RNA_nocovar.out:
 
-	traitnames//traitnames.txt : with trait's names hgta,tga,tca,ldla
-	snpnames//snpnames.txt Names of snps #1-10,20-30,46
-	data//trait_hgta.txt : result of association for trait "hgta" for snps #1-100
\ No newline at end of file
+	traitnames.txt : with trait's names hgta,tga,tca,ldla
+	snpnames.txt Names of snps #1-10,20-30,46
+	trait_hgta.txt : result of association for trait "hgta" for snps #1-100
\ No newline at end of file

Modified: pkg/OmicABEL/src/reshuffle/main.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/main.cpp	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/main.cpp	2013-03-11 15:25:47 UTC (rev 1128)
@@ -105,6 +105,12 @@
 		cout<<iout_F.header;
 		cout<<iout_F.labels;
 	}
+	if(Params.traits.use)
+		Params.traits.setbynames(*(iout_F.labels.trait_names));
+	if(Params.snps.use)
+		Params.snps.setbynames(*(iout_F.labels.snp_names));
+	if(Params.heritabilities.use)
+		Params.heritabilities.setbynames(*(iout_F.labels.trait_names));
 	Reshuffle reshh(iout_F,Params);
 	reshh.run();
 	cout << "finish_reshuffling " << double(clock()) / CLOCKS_PER_SEC << endl;

Modified: pkg/OmicABEL/src/reshuffle/reshuffle.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/reshuffle.cpp	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/reshuffle.cpp	2013-03-11 15:25:47 UTC (rev 1128)
@@ -45,10 +45,10 @@
 
 	if ((*p_Parameters).snpnames.value == "all"){
 		for (int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
-			(*p_Parameters).snpnames.valueset.insert(i);
+			(*p_Parameters).snpnames.numbersset.insert(i);
 		cout<<"SNPNAMES VALUE SET CHANGED TO ALL"<<endl;
 	}
-	for(set<int>::iterator it= (*p_Parameters).snpnames.valueset.begin();it!=(*p_Parameters).snpnames.valueset.end();++it)
+	for(set<int>::iterator it= (*p_Parameters).snpnames.numbersset.begin();it!=(*p_Parameters).snpnames.numbersset.end();++it)
 		txt_snpnames << "SNP #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.snp_names)[*it]<<endl;
 	cout<<"END WRITE SNPNAMES"<<endl;
 }
@@ -57,10 +57,10 @@
 
 	if ((*p_Parameters).traitnames.value == "all"){
 		for (int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
-			(*p_Parameters).traitnames.valueset.insert(i);
+			(*p_Parameters).traitnames.numbersset.insert(i);
 		cout<<"TRAITNAMES VALUE SET CHANGED TO ALL"<<endl;
 	}
-	for(std::set<int>::iterator it= (*p_Parameters).traitnames.valueset.begin();it!=p_Parameters->traitnames.valueset.end();++it)
+	for(std::set<int>::iterator it= (*p_Parameters).traitnames.numbersset.begin();it!=p_Parameters->traitnames.numbersset.end();++it)
 		txt_traitnames<<"TRAIT #"<<(*it+1)<<"\t"<<(*(*p_iout_file).labels.trait_names)[*it]<<endl;
 	cout<<"END WRITE TRAITNAMES"<<endl;
 }
@@ -69,7 +69,7 @@
 	out_file.seekg(0, ios_base::beg);
 	cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
 	ofstream txt_trait(create_filename("data").c_str());
-	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+	for (set<int>::iterator trait= (*p_Parameters).traits.numbersset.begin();trait!=(*p_Parameters).traits.numbersset.end();trait++) {
 
 		//Set precision of double
 		cout<<(*(*p_iout_file).labels.trait_names)[*trait]<<endl;
@@ -86,7 +86,7 @@
 		double* buf = new double[per_trait_per_snp];
 		int oldPos = 0;
 		char s[30];
-		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+		for (set<int>::iterator snp= (*p_Parameters).snps.numbersset.begin();snp!=(*p_Parameters).snps.numbersset.end();snp++) {
 			txt_trait << (*(*p_iout_file).labels.snp_names)[*snp] << "\t";
 			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
 			//cout << oldPos << "-" << pos << endl;
@@ -115,7 +115,7 @@
 	double CheckChi = (*p_Parameters).chi.value == "all" ? -1.0 : atof((*p_Parameters).chi.value.c_str());
 	cout << "startwritetxt=" << double(clock()) / CLOCKS_PER_SEC << endl;
 	ofstream txt_chi(create_filename("chi_data").c_str());
-	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+	for (set<int>::iterator trait= (*p_Parameters).traits.numbersset.begin();trait!=(*p_Parameters).traits.numbersset.end();trait++) {
 		//ofstream txt_chi(create_filename("chi_data//chi", (*(*p_iout_file).labels.trait_names)[*trait]).c_str());
 		//Set precision of double
 		txt_chi.precision(PRECISION_DOUBLE);
@@ -130,7 +130,7 @@
 		double* buf = new double[per_trait_per_snp];
 		int oldPos = 0;
 		char s[30];
-		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+		for (set<int>::iterator snp= (*p_Parameters).snps.numbersset.begin();snp!=(*p_Parameters).snps.numbersset.end();snp++) {
 			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
 			//cout << oldPos << "-" << pos << endl;
 			if(pos != oldPos)
@@ -167,11 +167,11 @@
 		exit(1);
 	}
 	double CheckChi = atof((*p_Parameters).chi.value.c_str());
-	for (set<int>::iterator trait= (*p_Parameters).traits.valueset.begin();trait!=(*p_Parameters).traits.valueset.end();trait++) {
+	for (set<int>::iterator trait= (*p_Parameters).traits.numbersset.begin();trait!=(*p_Parameters).traits.numbersset.end();trait++) {
 		double* buf = new double[per_trait_per_snp];
 		int oldPos = 0;
 		char s[30];
-		for (set<int>::iterator snp= (*p_Parameters).snps.valueset.begin();snp!=(*p_Parameters).snps.valueset.end();snp++) {
+		for (set<int>::iterator snp= (*p_Parameters).snps.numbersset.begin();snp!=(*p_Parameters).snps.numbersset.end();snp++) {
 			int pos = (*p_iout_file).tilecoordinates(*trait, *snp);
 			//cout << oldPos << "-" << pos << endl;
 			if(pos != oldPos)
@@ -244,10 +244,10 @@
 	out_file.seekg(herest_startpos, ios_base::beg);
 	if (p_Parameters->heritabilities.value == "all")
 		for(int i=0;i<(*(p_iout_file->labels.trait_names)).size();i++)
-			p_Parameters->heritabilities.valueset.insert(i);
+			p_Parameters->heritabilities.numbersset.insert(i);
 	txt_est.precision(PRECISION_DOUBLE);
 	txt_est<<"\t";
-	for (set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();trait++)
+	for (set<int>::iterator trait= p_Parameters->heritabilities.numbersset.begin();trait!=p_Parameters->heritabilities.numbersset.end();trait++)
 		txt_est << (*(p_iout_file->labels.trait_names))[*trait] << "\t";
 	txt_est << endl;
 	list<string> est_names;
@@ -258,7 +258,7 @@
 	int counter=0;
 	for (list<string>::iterator name = est_names.begin();name != est_names.end(); ++name) {
 		txt_est << *name << "\t";
-		for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+		for (std::set<int>::iterator trait= p_Parameters->heritabilities.numbersset.begin();trait!=p_Parameters->heritabilities.numbersset.end();++trait) {
 			out_file.seekg(*trait*sizeof(double),ios_base::cur);
 			out_file.read((char *) &tmp_number, sizeof(double));
 			txt_est << tmp_number << "\t";
@@ -275,7 +275,7 @@
 		if (beta != (*(p_iout_file->labels.beta)).size()) {
 			beta--;
 			txt_est << (*(p_iout_file->labels).beta)[beta] << "\t";
-			for (std::set<int>::iterator trait= p_Parameters->heritabilities.valueset.begin();trait!=p_Parameters->heritabilities.valueset.end();++trait) {
+			for (std::set<int>::iterator trait= p_Parameters->heritabilities.numbersset.begin();trait!=p_Parameters->heritabilities.numbersset.end();++trait) {
 				out_file.seekg(*trait*sizeof(double),ios_base::cur);
 				out_file.read((char *) &tmp_number, sizeof(double));
 				txt_est << tmp_number << "\t";
@@ -316,19 +316,19 @@
 		exit(1);
 	}
 
-	//If any of parameters traits||snps||chi use, this block fill traits.valueset and snps.valueset
+	//If any of parameters traits||snps||chi use, this block fill traits.numbersset and snps.numbersset
 	//(if their values are default all)
 	if((*p_Parameters).traits.use||(*p_Parameters).snps.use||(*p_Parameters).chi.use){
 
 			if((*p_Parameters).traits.value=="all"||(*p_Parameters).traits.value=="None"){
 				for(int i=0;i<(*(*p_iout_file).labels.trait_names).size();i++)
-				(*p_Parameters).traits.valueset.insert(i);
+				(*p_Parameters).traits.numbersset.insert(i);
 			cout<<"TRAITS VALUE SET CHANGED TO ALL"<<endl;
 		}
 
 		if((*p_Parameters).snps.value=="all"||(*p_Parameters).snps.value=="None"){
 			for(int i=0;i<(*(*p_iout_file).labels.snp_names).size();i++)
-				(*p_Parameters).snps.valueset.insert(i);
+				(*p_Parameters).snps.numbersset.insert(i);
 			cout<<"SNPS VALUE SET CHANGED TO ALL"<<endl;
 		}
 	}

Modified: pkg/OmicABEL/src/reshuffle/test.cpp
===================================================================
--- pkg/OmicABEL/src/reshuffle/test.cpp	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/test.cpp	2013-03-11 15:25:47 UTC (rev 1128)
@@ -52,6 +52,12 @@
 
 	test_txt<<"START TEST "<<name<<"\t";
 	iout_file iout_F(*Params_test);
+	if(Params_test->traits.use)
+		Params_test->traits.setbynames(*(iout_F.labels.trait_names));
+	if(Params_test->snps.use)
+		Params_test->snps.setbynames(*(iout_F.labels.snp_names));
+	if(Params_test->heritabilities.use)
+		Params_test->heritabilities.setbynames(*(iout_F.labels.trait_names));
 	Reshuffle reshh(iout_F,*Params_test);
 	reshh.run();
 	//result="datadims/datadims.txt";
@@ -61,11 +67,13 @@
 	string str_res="";
 	string str_che="";
 	int checker=0;
-	while (getline(result_f,str_res)){
-		getline(check_f,str_che);
-		if(strcmp(str_che.c_str(),str_res.c_str()))
+	while (getline(check_f,str_che)){
+		getline(result_f,str_res);
+		if(strcmp(str_che.c_str(),str_res.c_str())!=0)
 			checker++;
 	}
+	if(getline(result_f,str_res))
+		checker++;
 	if(checker!=0){
 		test_txt<<"Test "<<name<<" FAILED!!!"<<endl;
 	}else

Modified: pkg/OmicABEL/src/reshuffle/test.h
===================================================================
--- pkg/OmicABEL/src/reshuffle/test.h	2013-03-10 21:21:22 UTC (rev 1127)
+++ pkg/OmicABEL/src/reshuffle/test.h	2013-03-11 15:25:47 UTC (rev 1128)
@@ -19,7 +19,6 @@
 	string result;
 	string check;
 	Parameters Params;
-
 	Parameters *Params_test=&Params;
 	test(string,string,string,string);
 	void run();



More information about the Genabel-commits mailing list