[Genabel-commits] r670 - in pkg/VariABEL/src: . VARlib
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Mar 7 11:42:29 CET 2011
Author: maksim
Date: 2011-03-07 11:42:29 +0100 (Mon, 07 Mar 2011)
New Revision: 670
Added:
pkg/VariABEL/src/VARlib/
pkg/VariABEL/src/VARlib/constants.h
pkg/VariABEL/src/VARlib/gtps_container.cpp
pkg/VariABEL/src/VARlib/gtps_container.h
pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp
pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h
pkg/VariABEL/src/VARlib/linear_regression.cpp
pkg/VariABEL/src/VARlib/linear_regression.h
pkg/VariABEL/src/VARlib/supplementary_functions.cpp
pkg/VariABEL/src/VARlib/supplementary_functions.h
pkg/VariABEL/src/VARlib/tags
pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
pkg/VariABEL/src/VARlib/var_homogeneity_tests.cpp
pkg/VariABEL/src/VARlib/var_homogeneity_tests.h
pkg/VariABEL/src/VARlib/var_meta_gwaa_C.cpp
Log:
dirs added
Added: pkg/VariABEL/src/VARlib/constants.h
===================================================================
--- pkg/VariABEL/src/VARlib/constants.h (rev 0)
+++ pkg/VariABEL/src/VARlib/constants.h 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,11 @@
+
+#ifndef SMV_CONSTANTS_H
+#define SMV_CONSTANTS_H
+const VARIABLE_TYPE NA_value = -999.999; // which numeric value is ised as NA
+const int NA_value_int = -999; // which numeric value is ised as NA
+const unsigned precision_output=4;
+const double median_table_chi2_df2 = 1.386294;
+
+
+
+#endif
Added: pkg/VariABEL/src/VARlib/gtps_container.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/gtps_container.cpp (rev 0)
+++ pkg/VariABEL/src/VARlib/gtps_container.cpp 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,213 @@
+//#=====================================================================================
+//#
+//# Filename: gtps_container.cpp
+//#
+//# Description: Container for storaging genotype data. Use in function for merging of two snp.data class objects.
+//#
+//# Version: 1.0
+//# Created: 18-March-2008
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin, Yurii S. Aulchenko
+//# Company: ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//# Email: m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#include "gtps_container.h"
+#include <cstdlib>
+#include <math.h>
+
+
+
+
+gtps_container::gtps_container(char * gtps_array_, char * strand_array_, char * coding_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;
+strand_array = strand_array_;
+coding_array = coding_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+
+//std::cout<<"gtps_container::gtps_container: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+}
+//------------------------------------------------------------------
+
+
+
+//------------------------------------------------------------------
+gtps_container::gtps_container(char * gtps_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+gtps_container::~gtps_container(void)
+{
+delete[] rearrangement_array;
+}
+
+
+
+
+//------------------------------------------------------------------
+void gtps_container::get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position)
+{
+our_byte_number = int(ceil(id_position/4.)+0.5) + (snp_position-1)*nbytes_for_one_snp; //What byte is our id in?
+
+local_person_number = id_position - ((our_byte_number-(snp_position-1)*nbytes_for_one_snp-1)*4); //What position of our id is in byte?
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------
+char gtps_container::get(unsigned id_position, unsigned snp_position)
+{
+get_our_byte_number_and_local_person_number(id_position, snp_position); //calculate our_byte_number value
+
+//char our_byte_vallue = gtps_array[our_byte_number-1];
+
+
+
+return char((gtps_array[our_byte_number-1] >> rearrangement_array[local_person_number-1]) & 3);
+}
+//------------------------------------------------------------------
+
+
+
+
+//------------------------------------------------------------------
+//Attention! This class does not carry about allocated in this function memory.
+//In order to avoid memory leak "delete" must be performed for every array created in this function.
+char* gtps_container::get_gtps_array_for_snp(unsigned snp_position)
+{
+char* gtps_for_one_snp = new char(nbytes_for_one_snp);
+get_our_byte_number_and_local_person_number(1, snp_position); //calculate our_byte_number value
+
+std::cout<<"gtps_container::get_gtps_array_for_snp: our_byte_number="<<our_byte_number<<"\n";
+std::cout<<"gtps_container::get_gtps_array_for_snp: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+std::cout<<"gtps_array[0]="<<int(gtps_array[0])<<"\n";
+
+for(unsigned int i=0 ; i<nbytes_for_one_snp; i++)
+ {
+ gtps_for_one_snp[i]=gtps_array[our_byte_number-1+i];
+ }
+
+
+return gtps_for_one_snp;
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+char gtps_container::get_strand(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return strand_array[snp_position-1];
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+return char(0);
+}
+
+
+
+char gtps_container::get_coding(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return coding_array[snp_position-1];
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+return char(0);
+}
+
+
+//------------------------------------------------------------------
+void gtps_container::set(unsigned id_position, unsigned snp_position, char data)
+{
+//for(int i=0 ; i<4 ; i++)
+//std::cout<<"rearrangement_array["<<i<<"]="<<rearrangement_array[i]<<"\n";
+
+
+static const char clear_info_for_person[]={63, 207, 243, 252};
+
+//std::cout<<"set: id_position="<<id_position<<", snp_position="<<snp_position<<", data="<<int(data)<<"\n";
+get_our_byte_number_and_local_person_number(id_position, snp_position);
+
+
+//std::cout<<"set: (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])="<<int(gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])<<"\n";
+
+//char tmp = (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1]) | data;
+//std::cout<<"set: tmp="<<int(tmp)<<"\n";
+
+
+//std::cout<<"gtps_array["<<our_byte_number-1<<"] = (gtps_array["<<our_byte_number-1<<"]&"<<int(clear_info_for_person[local_person_number-1])<<" |"<< int((data&3)<<rearrangement_array[local_person_number-1])<<"\n";
+
+
+gtps_array[our_byte_number-1] = (gtps_array[our_byte_number-1]&clear_info_for_person[local_person_number-1]) | (data&3)<<rearrangement_array[local_person_number-1];
+//std::cout<<"gtps_array["<<our_byte_number<<"-1]="<<int(gtps_array[our_byte_number-1])<<"\n";
+}
+//------------------------------------------------------------------
+
+
+
+
+/*
+gtps_container::checking(unsigned id_position, unsigned snp_position)
+{
+if (id_position > id_numbers || id_position<0)
+ {
+ std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+ }
+
+if(snp_position > snp_position || snp_position <0)
+ {
+ std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+ }
+}
+*/
+
+
+
Added: pkg/VariABEL/src/VARlib/gtps_container.h
===================================================================
--- pkg/VariABEL/src/VARlib/gtps_container.h (rev 0)
+++ pkg/VariABEL/src/VARlib/gtps_container.h 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,73 @@
+//#=====================================================================================
+//#
+//# Filename: gtps_container.h
+//#
+//# Description: Set of functions for merging two snp.data class objects.
+//#
+//# Version: 1.0
+//# Created: 18-March-2008
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin, Yurii S. Aulchenko
+//# Company: ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//# Email: m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_GTPS_CONTAINER_H
+#define SMV_GTPS_CONTAINER_H
+
+
+
+#include<iostream>
+#include<vector>
+#include<memory>
+
+
+
+
+
+
+
+
+//==================================================================
+// Class gtps_container - container for gtps data. Class facilitates access to gtps array.
+class gtps_container
+ {
+ public:
+ gtps_container(char * gtps_array_raw, char * strand_array, char * coding_array, unsigned id_numbers, unsigned snp_numbers);
+ gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers);
+ ~gtps_container(void);
+
+ //Get polymorphism for person who have ID=id_position for SNP=snp_position
+ //Here the first id_position equal zerro, the second - one etc...
+ //id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+ char get(unsigned id_position, unsigned snp_position);
+ char* get_gtps_array_for_snp(unsigned snp_position);
+ char get_strand(unsigned snp_position);
+ char get_coding(unsigned snp_position);
+
+
+ //id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+ void set(unsigned id_position, unsigned snp_position, char data);
+
+ private:
+ void get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position);
+ bool do_we_have_strand_and_codding_arrays;
+ char *gtps_array; //pointer to array where we stotages our data (passed from R)
+ char * strand_array;
+ char * coding_array;
+ unsigned id_numbers, snp_numbers;
+ unsigned nbytes_for_one_snp;
+
+ unsigned our_byte_number,
+ local_person_number; //can have vallues: 1, 2, 3 or 4. Show what is position in the byte.
+// const unsigned rearrangement_array[];
+ unsigned *rearrangement_array;
+
+
+ };
+//==================================================================
+
+#endif
Added: pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp (rev 0)
+++ pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.cpp 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,70 @@
+//#=====================================================================================
+//#
+//# Filename: inverse_variance_metaanalysis.cpp
+//#
+//# Description: Function for meta analysis with inverse variance method.
+//#
+//# Version: 1.0
+//# Created: 06-July-2009
+//# Revision: none
+//# Modifed: 26-Apr-2010
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+
+
+#include<iostream>
+//#include<math>
+
+#include <Rinternals.h>
+
+#include <R.h> // to include Rconfig.h
+
+#ifdef ENABLE_NLS
+#include <libintl.h>
+#define _(String) dgettext ("pkg", String)
+// replace pkg as appropriate
+#else
+#define _(String) (String)
+#endif
+
+extern "C" {
+#include "inverse_variance_metaanalysis.h"
+
+
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+ double *sebeta_set1, double *sebeta_set2,
+ unsigned *num, //number of betas for current cohort
+ double *mbeta,
+ double *mse,
+ char *testname)
+{
+
+unsigned num_el = *num;
+double se_set1, se_set2, wt2_set1, wt2_set2, invsumwt2;
+
+
+
+ for(unsigned i=0 ; i<num_el ; i++)
+ {
+ se_set1 = sqrt(sebeta_set1[i]*sebeta_set1[i]);
+ se_set2 = sqrt(sebeta_set2[i]*sebeta_set2[i]);
+
+ wt2_set1 = 1./(sebeta_set1[i]*sebeta_set1[i]);
+ wt2_set2 = 1./(sebeta_set2[i]*sebeta_set2[i]);
+
+ invsumwt2 = 1./(wt2_set1+wt2_set2);
+ mbeta[i] = (beta_set1[i]*wt2_set1 + beta_set2[i]*wt2_set2)*invsumwt2;
+ mse[i] = sqrt(invsumwt2);
+ }
+
+}// end of function
+
+
+
+}//end of extern "C"
Added: pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h
===================================================================
--- pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h (rev 0)
+++ pkg/VariABEL/src/VARlib/inverse_variance_metaanalysis.h 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,33 @@
+//#=====================================================================================
+//#
+//# Filename: inverse_variance_metaanalysis.h
+//#
+//# Description: Function for meta analysis with inverse variance method
+//#
+//# Version: 1.0
+//# Created: 06-July-2009
+//# Revision: none
+//# Modifed: 26-Apr-2010
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_DOMETA_C_H
+#define SMV_DOMETA_C_H
+
+
+extern "C" {
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+ double *sebeta_set1, double *sebeta_set2,
+ unsigned *num,
+ double *mbeta,
+ double *mse,
+ char *testname);
+
+
+}
+#endif
+
Added: pkg/VariABEL/src/VARlib/linear_regression.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/linear_regression.cpp (rev 0)
+++ pkg/VariABEL/src/VARlib/linear_regression.cpp 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,97 @@
+#include "linear_regression.h"
+#include <math.h>
+#include "iostream"
+
+
+//#=====================================================================================
+//#
+//# Filename: linear_regression.cpp
+//#
+//# Description: The function "linear_regression" computes least squares solutions to the system (1). Based on "dqrfit" and "ch2inv" fortran subroutine.
+//# (1) design_matrix * betas = y
+//#
+//# Version: 0.1
+//# Created: 2-Dec-2010
+//# Revision: none
+//# Modifed:
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl, m.v.struchalin at mail.ru
+//#
+//#=====================================================================================
+
+
+
+//Manual of input parameters:
+// "y" - dependable variable - array with length "num_observations"
+// "design_matrix" - design (module) matrix with dimension ("num_observations" x "p"). The first column is filled with one's (for intercept). The following columns are covariates.
+// "p" - number of covariates plus 1. "p" is number of columns in the design matrix.
+// "num_observations" - number of observation. "num_observations" is length of "y" and number of rows of design matrix.
+// "betas" - solution of equation (1). The array of length "p" which contains effects of intercect and covariates.
+// "se" - solution of equation (1). The array of length "p" which contains standart errors of effects of intercect and covariates.
+// "residuals" - residuals of y ("residuals"="y" - "betas"*"design_matrix").
+//
+// auxiliary variables: explanation of "qty", "jpvt", "qraux", "work" can be found in manual for dqrls (http://svn.r-project.org/R/trunk/src/appl/dqrls.f)
+// "qty" - array of length "num_observations"
+// "jpvt" - array of length "p"
+// "qraux" - array of length "p"
+// "work" - array of length "2*p"
+// "v" - array of length "2*p".
+// "x_for_ch2inv" - array of length "2*p"
+
+
+void linear_regression(
+ /*input variables:*/ double *y, double *design_matrix, int *p_/*cov num + 1*/, long unsigned* num_observations,
+ /*return variables:*/ double *betas, double *se, double *residuals,
+ /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
+{
+ int n=*num_observations;
+ int p = *p_;
+// int i_start = n*(p-1);
+// int i_stop = n*p;
+
+ static int ny = 1; //numeber of dependable variables (y)
+ static double tol=1e-07;
+ static int k;
+
+
+
+
+ dqrls_(design_matrix, &n, &p, y, &ny, &tol, betas, residuals, qty, &k, jpvt, qraux, work);
+
+ for(int col=0 ; col<p ; col++)
+ {
+ for(int row=0 ; row<p ; row++)
+ {
+ x_for_ch2inv[row + p*col] = design_matrix[row+n*col];
+ }
+
+ }
+
+ int info;
+ ch2inv_(x_for_ch2inv, &p, &p, v, &info);
+
+
+ //calculate rss (residuals square sum)
+ double rss=0;
+ for(int i=0 ; i<n ; i++)
+ {
+ rss += residuals[i]*residuals[i];
+ }
+
+ static double rdf;
+ rdf = n - p;
+ static double resvar;
+ resvar = rss/rdf;
+
+
+ for(int i=0 ; i<p ; i++)
+ {
+ se[i] = sqrt(v[i*p+i] * resvar); //diagonal ellements of v multiply by resvar
+ }
+
+
+}
+
+
Added: pkg/VariABEL/src/VARlib/linear_regression.h
===================================================================
--- pkg/VariABEL/src/VARlib/linear_regression.h (rev 0)
+++ pkg/VariABEL/src/VARlib/linear_regression.h 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,38 @@
+#ifndef SMV_LINEAR_REGRESSION_H
+#define SMV_LINEAR_REGRESSION_H
+
+
+
+//#=====================================================================================
+//#
+//# Filename: linear_regression.h
+//#
+//# Description: The function "linear_regression" computes least squares solutions to the system (1). Based on "dqrfit" and "ch2inv" fortran subroutine.
+//# (1) design_matrix * betas = y
+//#
+//# Version: 0.1
+//# Created: 2-Dec-2010
+//# Revision: none
+//# Modifed:
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl, m.v.struchalin at mail.ru
+//#
+//#=====================================================================================
+
+
+
+extern "C" {
+
+extern int dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work);
+extern void ch2inv_(double *x, int *ldx, int *n, double *v, int *info);
+
+}
+
+void linear_regression(
+ /*input variables:*/ double *y, double *design_matrix, int *p_/*cov num + 1*/, long unsigned* num_observations,
+ /*return variables:*/ double *betas, double *se, double *residuals,
+ /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv);
+
+#endif
Added: pkg/VariABEL/src/VARlib/supplementary_functions.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/supplementary_functions.cpp (rev 0)
+++ pkg/VariABEL/src/VARlib/supplementary_functions.cpp 2011-03-07 10:42:29 UTC (rev 670)
@@ -0,0 +1,1021 @@
+//#=====================================================================================
+//#
+//# Filename: supplementary_functions.cpp
+//#
+//# Description: Supplementary functions for variance homogeneity test and for metaanalysis.
+//#
+//# Version: 1.0
+//# Created: 28-Apr-2010
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+#include "supplementary_functions.h"
+#include "inverse_variance_metaanalysis.h"
+#include <stdlib.h>
+#include <math.h>
+
+
+
+
+
+
+
+//___________________________________________________________
+//Set genotypes in common view GA -> AG
+bool unify_snp(snp_var_data* snp, std::ofstream & warnings_file)
+{
+
+
+static std::string alleles_snp;
+alleles_snp="";
+
+for(unsigned i=0 ; i<GENO_TYPES_NUM ; i++) {alleles_snp += snp->GENO[i];}//collect all alleles
+
+alleles_snp = get_uniq_symbols(alleles_snp);
+for(unsigned i=0 ; i<alleles_snp.size() ; i++) if(alleles_snp[i] == '/') {alleles_snp.erase(i,1);}
+
+if(alleles_snp.size() > 2 && alleles_snp.size() <=1)
+ {
+ //Rprintf("SNP %s has more than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has more than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+ return false;
+ }
+
+if(alleles_snp.size() <=1)
+ {
+ //Rprintf("SNP %s has less than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has less than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+ return false;
+ }
+
+
+if(alleles_snp[0]=='0' && alleles_snp[1]=='0')
+ {
+ //Rprintf("SNP %s has undefind allels. SNP skiped..\n", snp->snpname.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has undefind allels. SNP skiped\n";
+ return false;
+ }
+
+
+
+
+//exclude those geno group which has only one id
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+
+ if(is_na(snp->SD[i]) || is_na(snp->MEAN[i]) || is_na(snp->COUNTS[i]) || snp->COUNTS[i]<=1 )
+ {
+ snp->SD[i] = NA_value;
+ snp->MEAN[i] = NA_value;
+ snp->COUNTS[i] = 0;
+ continue;
+ }
+
+ static VARIABLE_TYPE var;
+ var = snp->SD[i]*snp->SD[i];
+ if(var <= 1.E-32)
+ {
+ snp->SD[i] = NA_value;
+ snp->MEAN[i] = NA_value;
+ snp->COUNTS[i] = 0;
+
+ //Rprintf("warning: genotypic group %s in snp %s has too small variance (variance=%f). This genotypic group is excluded from analysis.\n",
+ warnings_file<<"warning: genotypic group "<<snp->GENO[i]<<" in snp "<<snp->snpname<<" has too small variance (variance="<<var<<"). This genotypic group is excluded from analysis.\n";
+
+ }
+ }
+
+
+
+
+
+
+std::sort(alleles_snp.begin(), alleles_snp.end());
+
+
+if(alleles_snp[0] == '0')
+ {
+ alleles_snp[0] = alleles_snp[1];
+ alleles_snp[1] = '0';
+ }
+
+static std::string geno[3];
+
+
+geno[0].push_back(alleles_snp[0]);
+geno[0].push_back('/');
+geno[0].push_back(alleles_snp[0]);
+
+geno[1].push_back(alleles_snp[0]);
+geno[1].push_back('/');
+geno[1].push_back(alleles_snp[1]);
+
+geno[2].push_back(alleles_snp[1]);
+geno[2].push_back('/');
+geno[2].push_back(alleles_snp[1]);
+
+
+static std::string geno_hemozyg_switched;
+geno_hemozyg_switched.push_back(geno[1][2]);
+geno_hemozyg_switched.push_back('/');
+geno_hemozyg_switched.push_back(geno[1][0]);
+
+
+if(snp->GENO[0] == geno[0] && snp->GENO[1] == geno[1] && snp->GENO[2] == geno[2])
+ {
+ geno_hemozyg_switched = "";
+ geno[0] = "";
+ geno[1] = "";
+ geno[2] = "";
+ return true; //nothing to change
+ }
+
+
+static snp_var_data snp_new;
+
+//snp_new->snpname = snp->snpname;
+//snp_new->chromosome = snp->chromosome;
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ if(snp->GENO[i] == geno[0])
+ {snp_new.GENO[0] = geno[0]; snp_new.COUNTS[0]=snp->COUNTS[i]; snp_new.MEAN[0]=snp->MEAN[i]; snp_new.SD[0]=snp->SD[i];}
+ if(snp->GENO[i] == geno[1] || snp->GENO[i]==geno_hemozyg_switched)
+ {snp_new.GENO[1] = geno[1]; snp_new.COUNTS[1]=snp->COUNTS[i]; snp_new.MEAN[1]=snp->MEAN[i]; snp_new.SD[1]=snp->SD[i];}
+ if(snp->GENO[i] == geno[2])
+ {snp_new.GENO[2] = geno[2]; snp_new.COUNTS[2]=snp->COUNTS[i]; snp_new.MEAN[2]=snp->MEAN[i]; snp_new.SD[2]=snp->SD[i];}
+ }
+
+geno_hemozyg_switched = "";
+geno[0] = "";
+geno[1] = "";
+geno[2] = "";
+
+
+
+
+
+
+
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ snp->GENO[i] = snp_new.GENO[i];
+ snp->COUNTS[i] = snp_new.COUNTS[i];
+ snp->MEAN[i] = snp_new.MEAN[i];
+ snp->SD[i] = snp_new.SD[i];
+ }
+
+snp_new.reset();
+
+
+
+
+return true;
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+//put new snp into the storage. If this snp is there already tham metaanalyse it
+bool include_snp(Snp_store_type * snps_storage, snp_var_data* snp, std::ofstream & warnings_file, char *testname)
+{
+
+
+Snp_store_type::iterator iter_map = snps_storage->find(snp->snpname);
+
+//char delim=' ';
+
+
+
+if(iter_map == snps_storage->end())
+ {
+ snps_storage->insert(std::pair<std::string, snp_var_data*>(snp->snpname, snp));
+ }
+else
+ {
+ snp_var_data* snp_current = iter_map->second;
+ static bool is_snp_ok;
+ is_snp_ok = check(snp_current, snp, warnings_file);
+
+ if(!is_snp_ok) {return false;}
+
+ (*snps_storage)[snp->snpname.c_str()] = snp_var_meta(iter_map->second, snp, testname);
+
+ //bless god souls of these objects...
+ delete snp_current;
+ delete snp;
+ }
+
+return true;
+}
+//___________________________________________________________
+
+
+
+//metaanalysis of data from two snps
+//___________________________________________________________
+snp_var_data* snp_var_meta(snp_var_data* snp1, snp_var_data* snp2, char* testname)
+{
+snp_var_data* snp_meta = new snp_var_data;
+
+
+
+snp_meta->chromosome = snp1->chromosome;
+snp_meta->snpname = snp1->snpname;
+
+VARIABLE_TYPE meta_mean_beta[GENO_TYPES_NUM], meta_mean_se[GENO_TYPES_NUM];
+
+
+static unsigned one = 1;
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ //metanalysis for variance
+
+ static std::string meta_codding;
+
+ snp_meta->GENO[i] = snp1->GENO[i];
+
+ if(snp1->COUNTS[i] != 0 && snp2->COUNTS[i] != 0 )
+ {
+
+ static VARIABLE_TYPE SD_of_the_mean_snp1, SD_of_the_mean_snp2;
+
+ SD_of_the_mean_snp1 = snp1->SD[i]/sqrt(double(snp1->COUNTS[i]));
+ SD_of_the_mean_snp2 = snp2->SD[i]/sqrt(double(snp2->COUNTS[i]));
+
+ //metanalysis for mean
+ inverse_variance_metaanalysis(&snp1->MEAN[i], &snp2->MEAN[i],
+ &SD_of_the_mean_snp1, &SD_of_the_mean_snp2,
+ &one,
+ meta_mean_beta,
+ meta_mean_se, testname);
+
+ snp_meta->MEAN[i] = meta_mean_beta[0];
+ snp_meta->COUNTS[i] = snp1->COUNTS[i] + snp2->COUNTS[i];
+ snp_meta->SD[i] = meta_mean_se[0]*sqrt(double(snp_meta->COUNTS[i]));
+ }
+ else
+ {
+ if(snp1->COUNTS[i] == 0)
+ {
+ snp_meta->SD[i] = snp2->SD[i];
+ snp_meta->COUNTS[i] = snp2->COUNTS[i];
+ snp_meta->MEAN[i] = snp2->MEAN[i];
+ }
+ else if(snp2->COUNTS[i] == 0)
+ {
+ snp_meta->SD[i] = snp1->SD[i];
+ snp_meta->COUNTS[i] = snp1->COUNTS[i];
+ snp_meta->MEAN[i] = snp1->MEAN[i];
+ }
+ else
+ {
+ error("Upss.. :-) something strange occured... wrong file format probably. Create small example of your data where you have an error and send it to developer.\n");
+ }
+ }
+ }
+return snp_meta;
+}
+//___________________________________________________________
+
+
+
+
+
+
+
+//___________________________________________________________
+bool is_na(const VARIABLE_TYPE val, const VARIABLE_TYPE na_reference)
+{
+static VARIABLE_TYPE delta = fabs(na_reference/1E6);
+if(val > na_reference-delta && val < na_reference+delta) return true;
+else return false;
+}
+//___________________________________________________________
+
+
+
+
+
+bool is_na(const int val, const int na_reference) //is numerical value recognized as NA
+{
+if(val == na_reference) return true;
+else return false;
+}
+
+
+
+//save all data into plink like format
+//___________________________________________________________
+void save_snps_data_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+
+const unsigned pp_maxsnp=10;
+
+std::ofstream file;
+
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << std::setw(4) << chromosome_column_name.c_str() << delim
+ << std::setw(pp_maxsnp) << snpname_column_name.c_str() << delim
+ << std::setw(6) << value_column_name.c_str() << delim
+ << std::setw(8) << g11_column_name.c_str() << delim
+ << std::setw(8) << g12_column_name.c_str() << delim
+ << std::setw(8) << g22_column_name.c_str() << "\n";
+
+
+
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+ {
+ //geno:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << geno_value_name << delim
+ << std::setw(8) << i->second->GENO[0] << delim
+ << std::setw(8) << i->second->GENO[1] << delim
+ << std::setw(8) << i->second->GENO[2] << "\n";
+
+
+
+ //counts:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << counts_value_name << delim;
+
+ if(is_na(i->second->COUNTS[0]))
+ {
+ file << std::setw(8) << "NA" << delim;
+ }
+ else
+ {
+ file << std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[0] << delim;
+ }
+
+ if(is_na(i->second->COUNTS[1]))
+ {
+ file << std::setw(8) << "NA" << delim;
+ }
+ else
+ {
+ file << std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[1] << delim;
+ }
+
+ if(is_na(i->second->COUNTS[2]))
+ {
+ file << std::setw(8) << "NA" << delim;
+ }
+ else
+ {
+ file << std::setw(8) << std::setiosflags(std::ios_base::dec) << i->second->COUNTS[2] << '\n';
+ }
+
+
+
+ //freq:
+ static VARIABLE_TYPE total_id_num;
+ total_id_num = i->second->COUNTS[0] + i->second->COUNTS[1] + i->second->COUNTS[2];
+
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << freq_value_name << delim
+ << std::setw(8) << (is_na(i->second->COUNTS[0])? "NA":double_2_str(i->second->COUNTS[0]/total_id_num)) << delim
+ << std::setw(8) << (is_na(i->second->COUNTS[1])? "NA":double_2_str(i->second->COUNTS[1]/total_id_num)) << delim
+ << std::setw(8) << (is_na(i->second->COUNTS[2])? "NA":double_2_str(i->second->COUNTS[2]/total_id_num)) << "\n";
+
+
+ //mean:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << mean_value_name << delim
+ << std::setw(8) << (is_na(i->second->MEAN[0])? "NA":double_2_str(i->second->MEAN[0])) << delim
+ << std::setw(8) << (is_na(i->second->MEAN[1])? "NA":double_2_str(i->second->MEAN[1])) << delim
+ << std::setw(8) << (is_na(i->second->MEAN[2])? "NA":double_2_str(i->second->MEAN[2])) << "\n";
+
+ //sd:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << sd_value_name << delim
+ << std::setw(8) << (is_na(i->second->SD[0])? "NA":double_2_str(i->second->SD[0])) << delim
+ << std::setw(8) << (is_na(i->second->SD[1])? "NA":double_2_str(i->second->SD[1])) << delim
+ << std::setw(8) << (is_na(i->second->SD[2])? "NA":double_2_str(i->second->SD[2])) << "\n";
+
+
+ }
+
+
+file.close();
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+void save_snps_tests_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+
+//const unsigned pp_maxsnp=10;
+
+std::ofstream file;
+
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << chromosome_column_name << snpname_column_name << delim << "Z" << delim << "Z_2df\n";
+
+
+
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+ {
+ //geno:
+ file << i->second->chromosome << delim
+ << i->second->snpname << delim
+ << (is_na(i->second->Z)? "NA":double_2_str(i->second->Z)) << delim
+ << (is_na(i->second->Z_2df)? "NA":double_2_str(i->second->Z_2df)) << "\n";
+ }
+
+
+file.close();
+}
+//___________________________________________________________
+
+
+
+//convert from double to string
+//___________________________________________________________
+std::string double_2_str(double val, const unsigned precision)
+{
+static std::stringstream stream;
+stream.str(""); stream.clear();
+stream.precision(precision);
+stream << val;
+return stream.str();
+}
+//___________________________________________________________
+
+
+
+//___________________________________________________________
+//Check whether snps have same genotypes and in same columns. If one of snp has allele 0 but another have real than replace 0 by real one.
+bool check(snp_var_data* snp2, snp_var_data* snp1, std::ofstream & warnings_file)
+{
+//check snp name
+if(snp1->snpname != snp2->snpname)
+ {
+ error("snp_var_meta: unexpected error; atempt to pool two different snps");
+ }
+
+//check chromosome name
+if(snp1->chromosome != snp2->chromosome)
+ {
+ //Rprintf("warning: SNP %s has different chromosome number in different files. Previos value is %i, current one is %i. Value %i is used.\n",
+ // snp2->snpname.c_str(), snp2->chromosome, snp1->chromosome, snp2->chromosome);
+
+ warnings_file<<"warning: SNP "<<snp2->snpname<<" has different chromosome number in different files. Previos value is "<<snp2->chromosome<<
+ ", current one is "<<snp1->chromosome<<". Value "<<snp2->chromosome<<" is used.\n";
+ }
+
+
+
+//check coddings
+if(snp1->GENO[1] == snp2->GENO[1])
+ {
+ return true;
+ }
+
+
+
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 670
More information about the Genabel-commits
mailing list