[Genabel-commits] r670 - in pkg/VariABEL/src: . VARlib

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Mar 7 11:42:29 CET 2011


Author: maksim
Date: 2011-03-07 11:42:29 +0100 (Mon, 07 Mar 2011)
New Revision: 670

Added:
   pkg/VariABEL/src/VARlib/
   pkg/VariABEL/src/VARlib/constants.h
   pkg/VariABEL/src/VARlib/gtps_container.cpp
   pkg/VariABEL/src/VARlib/gtps_container.h
   pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp
   pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h
   pkg/VariABEL/src/VARlib/linear_regression.cpp
   pkg/VariABEL/src/VARlib/linear_regression.h
   pkg/VariABEL/src/VARlib/supplementary_functions.cpp
   pkg/VariABEL/src/VARlib/supplementary_functions.h
   pkg/VariABEL/src/VARlib/tags
   pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
   pkg/VariABEL/src/VARlib/var_homogeneity_tests.cpp
   pkg/VariABEL/src/VARlib/var_homogeneity_tests.h
   pkg/VariABEL/src/VARlib/var_meta_gwaa_C.cpp
Log:
dirs added

Added: pkg/VariABEL/src/VARlib/constants.h
===================================================================
--- pkg/VariABEL/src/VARlib/constants.h	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/constants.h	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,11 @@
+
+#ifndef SMV_CONSTANTS_H
+#define SMV_CONSTANTS_H
+const VARIABLE_TYPE NA_value = -999.999; // which numeric value is ised as NA
+const int NA_value_int = -999; // which numeric value is ised as NA
+const unsigned precision_output=4;
+const double median_table_chi2_df2 = 1.386294;
+
+
+
+#endif

Added: pkg/VariABEL/src/VARlib/gtps_container.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/gtps_container.cpp	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/gtps_container.cpp	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,213 @@
+//#=====================================================================================
+//#
+//#       Filename:  gtps_container.cpp
+//#
+//#    Description:  Container for storaging genotype data. Use in function for merging of two snp.data class objects.
+//#
+//#        Version:  1.0
+//#        Created:  18-March-2008
+//#       Revision:  none
+//#       
+//#
+//#         Author:  Maksim V. Struchalin, Yurii S. Aulchenko
+//#        Company:  ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//#          Email:  m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#include "gtps_container.h"
+#include <cstdlib>  
+#include <math.h>
+
+
+
+
+gtps_container::gtps_container(char * gtps_array_, char * strand_array_, char * coding_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;  
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;	
+strand_array = strand_array_;
+coding_array = coding_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+
+//std::cout<<"gtps_container::gtps_container: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+}
+//------------------------------------------------------------------
+
+
+
+//------------------------------------------------------------------
+gtps_container::gtps_container(char * gtps_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;  
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;	
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+gtps_container::~gtps_container(void)
+{
+delete[] rearrangement_array;
+}
+
+
+
+
+//------------------------------------------------------------------
+void gtps_container::get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position)
+{
+our_byte_number = int(ceil(id_position/4.)+0.5) + (snp_position-1)*nbytes_for_one_snp;   //What byte is our id in? 
+
+local_person_number = id_position - ((our_byte_number-(snp_position-1)*nbytes_for_one_snp-1)*4); //What position of our id is in byte?
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------
+char gtps_container::get(unsigned id_position, unsigned snp_position)
+{
+get_our_byte_number_and_local_person_number(id_position, snp_position); //calculate our_byte_number value
+
+//char our_byte_vallue = gtps_array[our_byte_number-1];
+
+
+
+return char((gtps_array[our_byte_number-1] >> rearrangement_array[local_person_number-1]) & 3); 
+}
+//------------------------------------------------------------------
+
+
+
+
+//------------------------------------------------------------------
+//Attention! This class does not carry about allocated in this function memory.
+//In order to avoid memory leak "delete" must be performed for every array created in this function.
+char* gtps_container::get_gtps_array_for_snp(unsigned snp_position)
+{
+char* gtps_for_one_snp = new char(nbytes_for_one_snp);
+get_our_byte_number_and_local_person_number(1, snp_position); //calculate our_byte_number value
+
+std::cout<<"gtps_container::get_gtps_array_for_snp:  our_byte_number="<<our_byte_number<<"\n";
+std::cout<<"gtps_container::get_gtps_array_for_snp:  nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+	
+std::cout<<"gtps_array[0]="<<int(gtps_array[0])<<"\n";
+
+for(unsigned int i=0 ; i<nbytes_for_one_snp; i++)
+	{
+	gtps_for_one_snp[i]=gtps_array[our_byte_number-1+i];
+	}
+
+
+return gtps_for_one_snp;
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+char gtps_container::get_strand(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return strand_array[snp_position-1]; 
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+return char(0);
+}
+
+
+
+char gtps_container::get_coding(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return coding_array[snp_position-1]; 
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+return char(0);
+}
+
+
+//------------------------------------------------------------------
+void gtps_container::set(unsigned id_position, unsigned snp_position, char data)
+{
+//for(int i=0 ; i<4 ; i++)
+//std::cout<<"rearrangement_array["<<i<<"]="<<rearrangement_array[i]<<"\n";
+
+		
+static const char clear_info_for_person[]={63, 207, 243, 252};
+		
+//std::cout<<"set: id_position="<<id_position<<", snp_position="<<snp_position<<", data="<<int(data)<<"\n";
+get_our_byte_number_and_local_person_number(id_position, snp_position);
+
+
+//std::cout<<"set: (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])="<<int(gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])<<"\n";
+
+//char tmp = (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1]) | data;
+//std::cout<<"set: tmp="<<int(tmp)<<"\n";
+
+
+//std::cout<<"gtps_array["<<our_byte_number-1<<"] = (gtps_array["<<our_byte_number-1<<"]&"<<int(clear_info_for_person[local_person_number-1])<<" |"<< int((data&3)<<rearrangement_array[local_person_number-1])<<"\n";
+
+
+gtps_array[our_byte_number-1] = (gtps_array[our_byte_number-1]&clear_info_for_person[local_person_number-1]) | (data&3)<<rearrangement_array[local_person_number-1];
+//std::cout<<"gtps_array["<<our_byte_number<<"-1]="<<int(gtps_array[our_byte_number-1])<<"\n";
+}
+//------------------------------------------------------------------
+
+
+
+
+/*
+gtps_container::checking(unsigned id_position, unsigned snp_position)
+{
+if (id_position > id_numbers || id_position<0)
+	{
+	std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+	}
+
+if(snp_position > snp_position || snp_position <0)
+	{
+	std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+	}
+}
+*/
+
+
+

Added: pkg/VariABEL/src/VARlib/gtps_container.h
===================================================================
--- pkg/VariABEL/src/VARlib/gtps_container.h	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/gtps_container.h	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,73 @@
+//#=====================================================================================
+//#
+//#       Filename:  gtps_container.h
+//#
+//#    Description:  Set of functions for merging two snp.data class objects.
+//#
+//#        Version:  1.0
+//#        Created:  18-March-2008
+//#       Revision:  none
+//#       
+//#
+//#         Author:  Maksim V. Struchalin, Yurii S. Aulchenko
+//#        Company:  ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//#          Email:  m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_GTPS_CONTAINER_H
+#define SMV_GTPS_CONTAINER_H
+
+
+
+#include<iostream>
+#include<vector>
+#include<memory>
+
+
+
+
+
+
+
+
+//==================================================================
+// Class gtps_container - container for gtps data. Class facilitates access to gtps array.
+class gtps_container
+	{
+	public:
+		gtps_container(char * gtps_array_raw, char * strand_array, char *  coding_array, unsigned id_numbers, unsigned snp_numbers);
+		gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers);
+		~gtps_container(void);
+
+		//Get polymorphism for person who have ID=id_position for SNP=snp_position
+		//Here the first id_position equal zerro, the second - one etc...
+		//id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+		char get(unsigned id_position, unsigned snp_position);
+		char* get_gtps_array_for_snp(unsigned snp_position);
+		char get_strand(unsigned snp_position);
+		char get_coding(unsigned snp_position);
+		
+		
+		//id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+		void set(unsigned id_position, unsigned snp_position, char data);
+
+	private:
+		void get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position);
+		bool do_we_have_strand_and_codding_arrays;
+		char *gtps_array; //pointer to array where we stotages our data (passed from R)
+		char * strand_array;
+		char * coding_array;
+		unsigned id_numbers, snp_numbers;
+		unsigned nbytes_for_one_snp;
+
+		unsigned our_byte_number, 
+						 local_person_number; //can have vallues: 1, 2, 3 or 4. Show what is position in the byte.
+//	  const unsigned rearrangement_array[];
+		unsigned *rearrangement_array;
+
+
+	};
+//==================================================================
+
+#endif

Added: pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,70 @@
+//#=====================================================================================
+//#
+//#       Filename:  inverse_variance_metaanalysis.cpp
+//#
+//#    Description:  Function for meta analysis with inverse variance method. 
+//#
+//#        Version:  1.0
+//#        Created:  06-July-2009
+//#       Revision:  none
+//#				 Modifed:  26-Apr-2010
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+
+
+#include<iostream>
+//#include<math>
+
+#include <Rinternals.h>
+
+#include <R.h>  // to include Rconfig.h 
+
+#ifdef ENABLE_NLS
+#include <libintl.h>
+#define _(String) dgettext ("pkg", String)
+// replace pkg as appropriate 
+#else
+#define _(String) (String)
+#endif
+
+extern "C" {
+#include "inverse_variance_metaanalysis.h"
+
+
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+			 				double *sebeta_set1, double *sebeta_set2,
+			 				unsigned *num, //number of betas for current cohort
+							double *mbeta,
+							double *mse,
+							char *testname)
+{
+
+unsigned num_el = *num;
+double se_set1, se_set2, wt2_set1, wt2_set2, invsumwt2;
+
+
+
+	for(unsigned i=0 ; i<num_el ; i++)
+		{
+		se_set1 = sqrt(sebeta_set1[i]*sebeta_set1[i]);
+		se_set2 = sqrt(sebeta_set2[i]*sebeta_set2[i]);
+	
+		wt2_set1 = 1./(sebeta_set1[i]*sebeta_set1[i]);
+		wt2_set2 = 1./(sebeta_set2[i]*sebeta_set2[i]);
+
+		invsumwt2 = 1./(wt2_set1+wt2_set2);
+		mbeta[i] = (beta_set1[i]*wt2_set1 + beta_set2[i]*wt2_set2)*invsumwt2;
+		mse[i] = sqrt(invsumwt2);
+		}
+
+}// end of function 
+
+
+
+}//end of extern "C"

Added: pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h
===================================================================
--- pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,33 @@
+//#=====================================================================================
+//#
+//#       Filename:  inverse_variance_metaanalysis.h
+//#
+//#    Description:  Function for meta analysis with inverse variance method
+//#
+//#        Version:  1.0
+//#        Created:  06-July-2009
+//#       Revision:  none
+//#				Modifed:  26-Apr-2010
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_DOMETA_C_H
+#define SMV_DOMETA_C_H
+
+
+extern "C" {
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+              double *sebeta_set1, double *sebeta_set2,
+              unsigned *num,
+              double *mbeta,
+              double *mse,
+							char *testname);
+
+
+}
+#endif
+

Added: pkg/VariABEL/src/VARlib/linear_regression.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/linear_regression.cpp	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/linear_regression.cpp	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,97 @@
+#include "linear_regression.h"
+#include <math.h>
+#include "iostream"
+
+
+//#=====================================================================================
+//#
+//#       Filename:  linear_regression.cpp
+//#
+//#    Description:  The function "linear_regression" computes least squares solutions to the system (1). Based on "dqrfit" and "ch2inv" fortran subroutine. 
+//#									 (1) design_matrix * betas = y
+//#
+//#        Version:  0.1
+//#        Created:  2-Dec-2010
+//#       Revision:  none
+//#				 Modifed:  
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl, m.v.struchalin at mail.ru
+//#
+//#=====================================================================================
+
+
+
+//Manual of input parameters:
+// "y" - dependable variable - array with length "num_observations"
+// "design_matrix" - design (module) matrix with dimension ("num_observations" x "p"). The first column is filled with one's (for intercept). The following columns are covariates.
+// "p" - number of covariates plus 1. "p" is number of columns in the design matrix.
+// "num_observations" - number of observation. "num_observations" is length of "y" and number of rows of design matrix.
+// "betas" - solution of equation (1). The array of length "p" which contains effects of intercect and covariates.
+// "se" - solution of equation (1). The array of length "p" which contains standart errors of effects of intercect and covariates.
+// "residuals" - residuals of y ("residuals"="y" - "betas"*"design_matrix"). 
+//
+// auxiliary variables: explanation of "qty", "jpvt", "qraux", "work" can be found in manual for dqrls (http://svn.r-project.org/R/trunk/src/appl/dqrls.f)
+// "qty" - array of length "num_observations"
+// "jpvt" - array of length "p"
+// "qraux" - array of length "p"
+// "work" - array of length "2*p"
+// "v" - array of length "2*p". 
+// "x_for_ch2inv" - array of length "2*p"
+
+
+void linear_regression(
+				/*input variables:*/     double *y, double *design_matrix, int *p_/*cov num + 1*/, long unsigned* num_observations, 
+				/*return variables:*/    double *betas, double *se, double *residuals,
+				/*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
+{
+	int n=*num_observations;
+	int p = *p_;
+//	int i_start = n*(p-1);
+//	int i_stop  = n*p;
+
+	static int ny = 1; //numeber of dependable variables (y)
+	static double tol=1e-07;
+	static int k;
+
+
+
+
+	dqrls_(design_matrix, &n, &p, y, &ny, &tol, betas, residuals, qty, &k, jpvt, qraux, work);
+
+	for(int col=0 ; col<p ; col++)
+		{
+		for(int row=0 ; row<p ; row++)
+			{
+			x_for_ch2inv[row + p*col] = design_matrix[row+n*col];
+			}
+		
+		}
+
+	int info;
+	ch2inv_(x_for_ch2inv, &p, &p, v, &info);
+
+
+	//calculate rss (residuals square sum)
+	double rss=0;
+	for(int i=0 ; i<n ; i++)
+		{
+		rss += residuals[i]*residuals[i];
+		}
+	
+	static double rdf;
+	rdf = n - p;
+	static double resvar;
+ 	resvar = rss/rdf;
+
+
+	for(int i=0 ; i<p ; i++)
+		{
+		se[i] = sqrt(v[i*p+i] * resvar); //diagonal ellements of v multiply by resvar
+		}
+
+
+}
+
+

Added: pkg/VariABEL/src/VARlib/linear_regression.h
===================================================================
--- pkg/VariABEL/src/VARlib/linear_regression.h	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/linear_regression.h	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,38 @@
+#ifndef SMV_LINEAR_REGRESSION_H
+#define SMV_LINEAR_REGRESSION_H
+
+
+
+//#=====================================================================================
+//#
+//#       Filename:  linear_regression.h
+//#
+//#    Description:  The function "linear_regression" computes least squares solutions to the system (1). Based on "dqrfit" and "ch2inv" fortran subroutine. 
+//#									 (1) design_matrix * betas = y
+//#
+//#        Version:  0.1
+//#        Created:  2-Dec-2010
+//#       Revision:  none
+//#				 Modifed:  
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl, m.v.struchalin at mail.ru
+//#
+//#=====================================================================================
+
+
+
+extern "C" {
+
+extern int dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work);
+extern void ch2inv_(double *x, int *ldx, int *n, double *v, int *info);
+
+}
+
+void linear_regression(
+				        /*input variables:*/     double *y, double *design_matrix, int *p_/*cov num + 1*/, long unsigned* num_observations,
+								/*return variables:*/    double *betas, double *se, double *residuals,
+								/*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv);
+
+#endif

Added: pkg/VariABEL/src/VARlib/supplementary_functions.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/supplementary_functions.cpp	                        (rev 0)
+++ pkg/VariABEL/src/VARlib/supplementary_functions.cpp	2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,1021 @@
+//#=====================================================================================
+//#
+//#       Filename:  supplementary_functions.cpp
+//#
+//#    Description:  Supplementary functions for variance homogeneity test and for metaanalysis.
+//#
+//#        Version:  1.0
+//#        Created:  28-Apr-2010
+//#       Revision:  none
+//#
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+#include "supplementary_functions.h"
+#include "inverse_variance_metaanalysis.h"
+#include <stdlib.h>
+#include <math.h>
+
+
+
+
+
+
+
+//___________________________________________________________
+//Set genotypes in common view GA -> AG
+bool unify_snp(snp_var_data* snp, std::ofstream & warnings_file)
+{
+
+
+static std::string alleles_snp;
+alleles_snp="";
+
+for(unsigned i=0 ; i<GENO_TYPES_NUM ; i++) {alleles_snp += snp->GENO[i];}//collect all alleles 
+
+alleles_snp = get_uniq_symbols(alleles_snp);
+for(unsigned i=0 ; i<alleles_snp.size() ; i++) if(alleles_snp[i] == '/') {alleles_snp.erase(i,1);}
+
+if(alleles_snp.size() > 2 && alleles_snp.size() <=1)
+	{
+	//Rprintf("SNP %s has more than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has more than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+	return false;	
+	}
+
+if(alleles_snp.size() <=1)
+	{
+	//Rprintf("SNP %s has less than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has less than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+	return false;	
+	}
+
+
+if(alleles_snp[0]=='0' && alleles_snp[1]=='0')
+	{
+	//Rprintf("SNP %s has undefind allels. SNP skiped..\n", snp->snpname.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has undefind allels. SNP skiped\n";
+	return false;
+	}
+
+
+
+
+//exclude those geno group which has only one id
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+
+	if(is_na(snp->SD[i]) || is_na(snp->MEAN[i]) || is_na(snp->COUNTS[i]) || snp->COUNTS[i]<=1 )
+		{
+		snp->SD[i] = NA_value;
+		snp->MEAN[i] = NA_value;
+		snp->COUNTS[i] = 0;
+		continue;
+		}
+
+	static VARIABLE_TYPE var;
+	var = snp->SD[i]*snp->SD[i];	
+	if(var <= 1.E-32)
+		{
+		snp->SD[i] = NA_value;
+		snp->MEAN[i] = NA_value;
+		snp->COUNTS[i] = 0;
+		
+		//Rprintf("warning: genotypic group %s in snp %s has too small variance (variance=%f). This genotypic group is excluded from analysis.\n", 
+		warnings_file<<"warning: genotypic group "<<snp->GENO[i]<<" in snp "<<snp->snpname<<" has too small variance (variance="<<var<<"). This genotypic group is excluded from analysis.\n"; 
+		
+		}
+	}
+
+
+
+
+
+
+std::sort(alleles_snp.begin(), alleles_snp.end());
+
+
+if(alleles_snp[0] == '0')
+	{
+	alleles_snp[0] = alleles_snp[1];
+	alleles_snp[1] = '0';
+	}
+
+static std::string geno[3];
+
+
+geno[0].push_back(alleles_snp[0]);
+geno[0].push_back('/');
+geno[0].push_back(alleles_snp[0]);
+		
+geno[1].push_back(alleles_snp[0]);
+geno[1].push_back('/');
+geno[1].push_back(alleles_snp[1]);
+
+geno[2].push_back(alleles_snp[1]);
+geno[2].push_back('/');
+geno[2].push_back(alleles_snp[1]);
+
+
+static std::string geno_hemozyg_switched;
+geno_hemozyg_switched.push_back(geno[1][2]);
+geno_hemozyg_switched.push_back('/');
+geno_hemozyg_switched.push_back(geno[1][0]);
+
+
+if(snp->GENO[0] == geno[0] && snp->GENO[1] == geno[1] && snp->GENO[2] == geno[2])
+	{
+	geno_hemozyg_switched = "";
+	geno[0] = "";
+	geno[1] = "";
+	geno[2] = "";
+	return true; //nothing to change
+	}
+
+
+static snp_var_data snp_new;
+
+//snp_new->snpname = snp->snpname;
+//snp_new->chromosome = snp->chromosome;
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	if(snp->GENO[i] == geno[0])
+		{snp_new.GENO[0] = geno[0]; snp_new.COUNTS[0]=snp->COUNTS[i]; snp_new.MEAN[0]=snp->MEAN[i]; snp_new.SD[0]=snp->SD[i];}
+	if(snp->GENO[i] == geno[1] || snp->GENO[i]==geno_hemozyg_switched) 
+		{snp_new.GENO[1] = geno[1]; snp_new.COUNTS[1]=snp->COUNTS[i]; snp_new.MEAN[1]=snp->MEAN[i]; snp_new.SD[1]=snp->SD[i];}
+	if(snp->GENO[i] == geno[2]) 
+		{snp_new.GENO[2] = geno[2]; snp_new.COUNTS[2]=snp->COUNTS[i]; snp_new.MEAN[2]=snp->MEAN[i]; snp_new.SD[2]=snp->SD[i];}
+	}
+
+geno_hemozyg_switched = "";
+geno[0] = "";
+geno[1] = "";
+geno[2] = "";
+
+
+
+
+
+
+
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	snp->GENO[i] = snp_new.GENO[i];
+	snp->COUNTS[i] = snp_new.COUNTS[i];
+	snp->MEAN[i] = snp_new.MEAN[i];
+	snp->SD[i] = snp_new.SD[i];
+	}
+
+snp_new.reset();
+
+
+
+
+return true;
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+//put new snp into the storage. If this snp is there already tham metaanalyse it
+bool include_snp(Snp_store_type * snps_storage, snp_var_data* snp, std::ofstream & warnings_file, char *testname)
+{
+
+
+Snp_store_type::iterator iter_map = snps_storage->find(snp->snpname);
+
+//char delim=' ';
+
+
+
+if(iter_map == snps_storage->end()) 
+	{
+	snps_storage->insert(std::pair<std::string, snp_var_data*>(snp->snpname, snp));
+	}
+else
+	{
+	snp_var_data* snp_current = iter_map->second;
+	static bool is_snp_ok;
+	is_snp_ok = check(snp_current, snp, warnings_file);
+	
+	if(!is_snp_ok) {return false;}
+	
+	(*snps_storage)[snp->snpname.c_str()] = snp_var_meta(iter_map->second, snp, testname);
+	
+	//bless god souls of these objects...
+	delete snp_current;
+	delete snp;
+	}
+
+return true;
+}
+//___________________________________________________________
+
+
+
+//metaanalysis of data from two snps
+//___________________________________________________________
+snp_var_data* snp_var_meta(snp_var_data* snp1, snp_var_data* snp2, char* testname)
+{
+snp_var_data* snp_meta = new snp_var_data;
+
+
+
+snp_meta->chromosome = snp1->chromosome;
+snp_meta->snpname = snp1->snpname;
+
+VARIABLE_TYPE meta_mean_beta[GENO_TYPES_NUM], meta_mean_se[GENO_TYPES_NUM];
+	
+
+static unsigned one = 1;
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	//metanalysis for variance
+
+	static std::string meta_codding;
+			
+	snp_meta->GENO[i] = snp1->GENO[i];
+	
+	if(snp1->COUNTS[i] != 0	&& snp2->COUNTS[i] != 0 )
+		{
+
+		static VARIABLE_TYPE SD_of_the_mean_snp1, SD_of_the_mean_snp2;
+		
+		SD_of_the_mean_snp1 = snp1->SD[i]/sqrt(double(snp1->COUNTS[i]));
+		SD_of_the_mean_snp2 = snp2->SD[i]/sqrt(double(snp2->COUNTS[i]));
+
+		//metanalysis for mean
+		inverse_variance_metaanalysis(&snp1->MEAN[i], &snp2->MEAN[i],
+						 &SD_of_the_mean_snp1, &SD_of_the_mean_snp2,
+							&one,
+							meta_mean_beta,
+							meta_mean_se, testname);
+	
+		snp_meta->MEAN[i] = meta_mean_beta[0];
+		snp_meta->COUNTS[i] = snp1->COUNTS[i] + snp2->COUNTS[i];
+		snp_meta->SD[i] = meta_mean_se[0]*sqrt(double(snp_meta->COUNTS[i]));
+		}
+	else
+		{
+		if(snp1->COUNTS[i] == 0)
+			{
+			snp_meta->SD[i] = snp2->SD[i];
+			snp_meta->COUNTS[i] = snp2->COUNTS[i];
+			snp_meta->MEAN[i] = snp2->MEAN[i];
+			}
+		else if(snp2->COUNTS[i] == 0)
+			{
+			snp_meta->SD[i] = snp1->SD[i];
+			snp_meta->COUNTS[i] = snp1->COUNTS[i];
+			snp_meta->MEAN[i] = snp1->MEAN[i];
+			}
+		else
+			{
+			error("Upss.. :-) something strange occured... wrong file format probably. Create small example of your data where you have an error and send it to developer.\n");
+			}
+		}
+	}
+return snp_meta;		
+}
+//___________________________________________________________
+
+
+
+
+
+
+
+//___________________________________________________________
+bool is_na(const VARIABLE_TYPE val, const VARIABLE_TYPE na_reference)
+{
+static VARIABLE_TYPE delta = fabs(na_reference/1E6);
+if(val > na_reference-delta && val < na_reference+delta) return true;
+else return false;
+}
+//___________________________________________________________
+
+
+
+
+
+bool is_na(const int val, const int na_reference) //is numerical value recognized as NA 
+{
+if(val == na_reference) return true;
+else return false;
+}
+
+
+
+//save all data into plink like format
+//___________________________________________________________
+void save_snps_data_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+	
+const unsigned pp_maxsnp=10;
+		
+std::ofstream file;
+	
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << std::setw(4) << chromosome_column_name.c_str() << delim
+		 << std::setw(pp_maxsnp) << snpname_column_name.c_str() << delim
+		 << std::setw(6) << value_column_name.c_str() << delim
+		 << std::setw(8) << g11_column_name.c_str() << delim
+		 << std::setw(8) << g12_column_name.c_str() << delim
+		 << std::setw(8) << g22_column_name.c_str() << "\n";
+
+
+		
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+	{
+	//geno:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << geno_value_name << delim
+			 << std::setw(8) << i->second->GENO[0] << delim
+			 << std::setw(8) << i->second->GENO[1] << delim
+			 << std::setw(8) << i->second->GENO[2] << "\n";
+
+
+
+	//counts:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << counts_value_name << delim;
+
+		 if(is_na(i->second->COUNTS[0]))
+			 {
+			 file <<  std::setw(8) << "NA" << delim;
+			 }
+			else
+			 {
+			 file <<  std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[0] << delim;
+			 }
+
+		 if(is_na(i->second->COUNTS[1]))
+			 {
+			 file <<  std::setw(8) << "NA" << delim;
+			 }
+			else
+			 {
+			 file <<  std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[1] << delim;
+			 }
+
+		 if(is_na(i->second->COUNTS[2]))
+			 {
+			 file <<  std::setw(8) << "NA" << delim;
+			 }
+			else
+			 {
+			 file <<  std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[2] << '\n';
+			 }
+
+
+
+	//freq:
+	static VARIABLE_TYPE total_id_num;
+ 	total_id_num = i->second->COUNTS[0] + i->second->COUNTS[1] + i->second->COUNTS[2];	
+	
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << freq_value_name << delim
+			 << std::setw(8) << (is_na(i->second->COUNTS[0])? "NA":double_2_str(i->second->COUNTS[0]/total_id_num)) << delim
+			 << std::setw(8) << (is_na(i->second->COUNTS[1])? "NA":double_2_str(i->second->COUNTS[1]/total_id_num)) << delim
+			 << std::setw(8) << (is_na(i->second->COUNTS[2])? "NA":double_2_str(i->second->COUNTS[2]/total_id_num)) << "\n";
+
+
+	//mean:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << mean_value_name << delim
+			 << std::setw(8) << (is_na(i->second->MEAN[0])? "NA":double_2_str(i->second->MEAN[0])) << delim
+			 << std::setw(8) << (is_na(i->second->MEAN[1])? "NA":double_2_str(i->second->MEAN[1])) << delim
+			 << std::setw(8) << (is_na(i->second->MEAN[2])? "NA":double_2_str(i->second->MEAN[2])) << "\n";
+
+	//sd:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << sd_value_name << delim
+			 << std::setw(8) << (is_na(i->second->SD[0])? "NA":double_2_str(i->second->SD[0])) << delim
+			 << std::setw(8) << (is_na(i->second->SD[1])? "NA":double_2_str(i->second->SD[1])) << delim
+			 << std::setw(8) << (is_na(i->second->SD[2])? "NA":double_2_str(i->second->SD[2])) << "\n";
+
+
+	}
+
+
+file.close();
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+void save_snps_tests_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+	
+//const unsigned pp_maxsnp=10;
+		
+std::ofstream file;
+	
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << chromosome_column_name << snpname_column_name << delim << "Z" << delim << "Z_2df\n";
+
+
+		
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+	{
+	//geno:
+	file << i->second->chromosome << delim
+			 << i->second->snpname << delim 
+			 << (is_na(i->second->Z)? "NA":double_2_str(i->second->Z)) << delim
+			 << (is_na(i->second->Z_2df)? "NA":double_2_str(i->second->Z_2df)) << "\n"; 
+	}
+
+
+file.close();
+}
+//___________________________________________________________
+
+
+
+//convert from double to string
+//___________________________________________________________
+std::string double_2_str(double val, const unsigned precision)
+{
+static std::stringstream stream;
+stream.str(""); stream.clear();
+stream.precision(precision);
+stream << val;
+return stream.str();
+}
+//___________________________________________________________
+
+
+
+//___________________________________________________________
+//Check whether snps have same genotypes and in same columns. If one of snp has allele 0 but another have real than replace 0 by real one.
+bool check(snp_var_data* snp2, snp_var_data* snp1, std::ofstream & warnings_file)
+{
+//check snp name
+if(snp1->snpname != snp2->snpname) 
+	{
+	error("snp_var_meta: unexpected error; atempt to pool two different snps");
+	}
+
+//check chromosome name
+if(snp1->chromosome != snp2->chromosome)
+	{
+	//Rprintf("warning: SNP %s has different chromosome number in different files. Previos value is %i, current one is %i. Value %i is used.\n",
+	//			 	snp2->snpname.c_str(), snp2->chromosome, snp1->chromosome, snp2->chromosome);
+	
+	warnings_file<<"warning: SNP "<<snp2->snpname<<" has different chromosome number in different files. Previos value is "<<snp2->chromosome<<
+			", current one is "<<snp1->chromosome<<". Value "<<snp2->chromosome<<" is used.\n";
+	}
+
+
+
+//check coddings
+if(snp1->GENO[1] == snp2->GENO[1])
+	{
+	return true;
+	}
+
+
+
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 670


More information about the Genabel-commits mailing list