[Genabel-commits] r617 - in pkg: . VariABEL VariABEL/R VariABEL/man VariABEL/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Nov 9 16:28:56 CET 2010


Author: maksim
Date: 2010-11-09 16:28:55 +0100 (Tue, 09 Nov 2010)
New Revision: 617

Added:
   pkg/VariABEL/
   pkg/VariABEL/CHANGES.LOG
   pkg/VariABEL/DESCRIPTION
   pkg/VariABEL/NAMESPACE
   pkg/VariABEL/R/
   pkg/VariABEL/R/tags
   pkg/VariABEL/R/var.meta.gwaa.R
   pkg/VariABEL/R/var.test.gwaa.R
   pkg/VariABEL/data/
   pkg/VariABEL/demo/
   pkg/VariABEL/man/
   pkg/VariABEL/man/var.test.homogeneity.Rd
   pkg/VariABEL/src/
   pkg/VariABEL/src/constants.h
   pkg/VariABEL/src/gtps_container.cpp
   pkg/VariABEL/src/gtps_container.h
   pkg/VariABEL/src/inverse_variance_metaanalysis.cpp
   pkg/VariABEL/src/inverse_variance_metaanalysis.h
   pkg/VariABEL/src/supplementary_functions.cpp
   pkg/VariABEL/src/supplementary_functions.h
   pkg/VariABEL/src/tags
   pkg/VariABEL/src/var_homogeneity_test_C.cpp
   pkg/VariABEL/src/var_homogeneity_tests.cpp
   pkg/VariABEL/src/var_homogeneity_tests.h
   pkg/VariABEL/src/var_meta_gwaa_C.cpp
Log:
submision of VariABEL package for variance heregeneity analysis

Added: pkg/VariABEL/DESCRIPTION
===================================================================
--- pkg/VariABEL/DESCRIPTION	                        (rev 0)
+++ pkg/VariABEL/DESCRIPTION	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,13 @@
+Package: VarABEL
+Type: Package
+Title: genome-wide SNP association analysis
+Version: 1.5-0
+Date: 2010-02-18
+Author: Yurii Aulchenko, Maksim Struchalin
+Maintainer: Yurii Aulchenko <i.aoultchenko at erasmusmc.nl>
+Depends: R (>= 2.4.0), methods, MASS
+Suggests: qvalue, genetics, haplo.stats, DatABEL
+Description: a package for genome-wide association analysis between 
+             quantitative or binary traits and single-nucleiotide
+             polymorphisms (SNPs). 
+License: GPL (>= 2)

Added: pkg/VariABEL/NAMESPACE
===================================================================
--- pkg/VariABEL/NAMESPACE	                        (rev 0)
+++ pkg/VariABEL/NAMESPACE	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,8 @@
+useDynLib(VarABEL)
+
+
+export(
+	var.meta.gwaa,
+	var.test.gwaa
+       )
+

Added: pkg/VariABEL/R/tags
===================================================================
--- pkg/VariABEL/R/tags	                        (rev 0)
+++ pkg/VariABEL/R/tags	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,6 @@
+!_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert at users.sourceforge.net/
+!_TAG_PROGRAM_NAME	Exuberant Ctags	//
+!_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
+!_TAG_PROGRAM_VERSION	5.6	//

Added: pkg/VariABEL/R/var.meta.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.meta.gwaa.R	                        (rev 0)
+++ pkg/VariABEL/R/var.meta.gwaa.R	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,51 @@
+#=====================================================================================
+#
+#       Filename:  var.meta.gwaa.R
+#
+#    Description:  Function for meta analysis ov variance. Read flat files in plink format and perform metanalysis.
+#
+#        Version:  1.0
+#        Created:  26-Apr-2010
+#       Revision:  none
+#
+#
+#         Author:  Maksim V. Struchalin
+#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+#          Email:  m.struchalin at erasmusmc.nl
+#
+#=====================================================================================
+
+
+
+
+"var.meta.gwaa" <-
+function(input_filenames, 
+				 testname,
+				 output_filename="output.variance.metaanalysis",
+				 exclude_snp_below_threshold=F,
+				 threshold=30,
+				 all_warnings=F,
+				 skip_first_lines_amount=0,
+				 delim=' ') {
+
+
+file_amount <- length(filenames_array)
+
+
+return_val <-  .C("var_meta_gwaa_C", input_filenames, as.integer(file_amount), output_filename, as.integer(skip_first_lines_amount), delim,
+									lambdas = double(file_amount+1),
+									lambdas_NA = integer(file_amount+1),
+									as.logical(exclude_snp_below_threshold),
+									as.integer(threshold),
+									as.logical(all_warnings),
+									as.character(testname))
+			
+
+return_val$lambdas[return_val$lambdas_NA==1] <- NA
+
+lambda_df <- data.frame(filename=filenames_array, lambda=return_val$lambdas[1:file_amount])
+
+lambda_df <- rbind(lambda_df, data.frame(filename=output_filename, lambda=return_val$lambdas[file_amount+1]))
+
+lambda_df
+}

Added: pkg/VariABEL/R/var.test.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.test.gwaa.R	                        (rev 0)
+++ pkg/VariABEL/R/var.test.gwaa.R	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,113 @@
+#=====================================================================================
+#
+#           Filename:  var.test.gwaa.R
+#
+#        Description:  VarABEL functions: test each SNP in gwaa data for homogeneity of trait's variance among genotypes.
+#
+#            Version:  0.1
+#            Created:  ---
+#           Revision:  none
+#  last modification:  26-Apr-2010
+#
+#             Author:  Maksim V. Struchalin
+#        Modified by:  Maksim V. Struchalin, 11-Jan-2009
+#            Company:  ErasmusMC, Epidemiology Department, The Netherlands.
+#              Email:  m.struchalin at erasmusmc.nl
+#            license:  GPL (>=2)
+#
+#=====================================================================================
+
+
+
+
+
+#_____________________________________________________________________________________________________________________________
+#Available for user function for genome-wide testing variance homogeneity of trait's distribution.
+#
+"var.test.gwaa" <- function(genotype, trait, testname, analysis_type)
+{
+#test statistics for each SNP is stored here. There is column names: "snpname", "chisq_df2", "chisq_df1"
+results <- data.frame()
+
+
+
+		
+# Check wether the class and value of input parameters is appropriate
+#_____________________________________________________________		
+if(!is(genotype, "snp.data") & !is(genotype, "numeric"))
+	{
+	stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"genotype\".")
+	}
+
+if(!is(trait, "numeric"))
+	{
+	stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"trait\".")
+	}
+
+if(!is(testname, "character"))
+	{
+	stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"testname\". It should be \"character\".")
+	}
+
+
+#_____________________________________________________________		
+
+
+
+
+return_val <-  .Call("variance_homogeneity_test_C_old_data_type",
+									as.raw(genotype at gtps),
+									as.double(trait),
+									as.integer(genotype at nids), 
+									as.integer(genotype at nsnps),
+									analysis_type,
+									testname)
+
+
+
+
+result_df <- data.frame(chisq=return_val[[1]], df=return_val[[2]])
+
+
+#output <- .Call("interaction_rare_recesive_allele_C_", as.raw(data at gtps), as.integer(data at nids), as.integer(snp_number),
+#			 																								 as.integer(trait),
+#																											 as.integer(window), 
+#																											 return_all_result,
+#																											 test,
+#																											 as.integer(min_expected_cut_off))
+
+
+}
+##Perform variance analysis
+##_____________________________________________________________		
+#
+#if(is(genotype, "snp.data"))
+#	{
+#	nids  <- genotype at nid
+#	nsnps <- genotype at gtdata@nsnps
+#	MAR <- 1
+#	OUT <- "R"
+#
+#	FUN <- test_C_function_name[testname] #choose name of variance homogeneity test
+#
+#	#Run choosen variance homogeneity test
+#	results_C <- .Call("iterator", as.raw(genotype at gtps),
+#				 												 as.integer(nids), as.integer(nsnps),
+#								            		 as.character(FUN),
+#																 as.character(OUT), 
+#																 as.integer(MAR),
+#																 as.integer(0),
+#														     package="VarABEL")
+#	}
+#
+#
+#
+#return results
+#}
+##End of var.test.gwaa function
+##_____________________________________________________________________________________________________________________________
+
+
+
+
+

Added: pkg/VariABEL/man/var.test.homogeneity.Rd
===================================================================
--- pkg/VariABEL/man/var.test.homogeneity.Rd	                        (rev 0)
+++ pkg/VariABEL/man/var.test.homogeneity.Rd	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,55 @@
+\name{var.test.homogeneity}
+\alias{var.test.homogeneity}
+\title{function to perfome variance homogeneity test of different genotypic groups}
+\description{
+function to perfome variance homogeneity test of different genotypic groups	
+}
+\usage{
+var.test.homogeneity(trait="", formula="", trait_df, gwa_data, dir, analysis_name_specific, how_many_sigma_drop=NULL, top_snp_num_figure=3, analysis_type = "AAvsABvsBB")
+}
+\arguments{
+  \item{trait}{Name of trait you are analysing (data.frame trait_df has to contain a column with this name). If it absenses or equals to "" than the trait name must be specifed in "formula"}
+  \item{formula}{Object of a class "formula". It is used in case of any adljustment nessacarity. If it absenses or equals to "" than trait must be specifed}
+  \item{trait_df}{Object of class "data.frame". It contains columnes with analysing trait and covariates. Should contain column with id names.}
+  \item{gwa_data}{Object of class "gwaa.data"}
+  \item{dir}{Directory name where all results will be put. It should be created befored analysis starts.}
+  \item{analysis_name_specific}{Any set of characters. It figures as a part of a file names in output.}
+  \item{how_many_sigma_drop}{Cut off point to exclude outliers. Normally distributed trait is assumed.}
+  \item{top_snp_num_figure}{For how many SNPs to make boxplot.}
+  \item{analysis_type}{Which test is performed. Possible values: "AAvsABvsBB" (testing all three genotipc groups),
+		 	"AAvsABandBB" (testing AA against AB and BB together), "ABvsAAandBB", "BBvsAAandAB".}
+}
+\details{
+var.test.homogeneity function perform analisis of variance homogeneity for easch SNP in data "gwa_data". The main result is pval of variance homogeneity test for each SNP.
+Bartlett's test is currently used. This assumes normality in a trait distribution. Even a couple of outliers can spoil a whole pictures. 
+Input parametere "how_many_sigma_drop" should be used to protect overestimation of pval.
+
+}
+
+\value{
+As result the file "lambda.doc" is created which contains inflation factor, file with test statistics (name is qt1__adj__sex_age_test_results.txt 
+				for example in case if you analyse "qt1" with adjustment on "sex" and "age"), file those contains id names which are excluded due to
+	 	normality resoans, manhettan plot, two qqplots (for chi2 amd pval) and boxplots for top SNPs.
+	}
+  
+
+%\references{ ~put references to the literature/web site here ~ }
+\author{Maksim Struchalin}
+%\note{ ~~further notes~~ 
+%}
+\seealso{
+	\code{\link{var.meta}},
+}
+\examples{
+	data(srdta)
+	mytrait_df <- srdta at phdata[,c("id","qt1","sex","age")]
+	mydir <- "mydir_var_analysis"
+	dir.create(mydir)
+	var.test.homogeneity(formula=qt1 ~ sex + age, 
+					trait_df=mytrait_df, gwa_data=srdta, dir=mydir, analysis_name_specific="test", how_many_sigma_drop=3)
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{manip}

Added: pkg/VariABEL/src/constants.h
===================================================================
--- pkg/VariABEL/src/constants.h	                        (rev 0)
+++ pkg/VariABEL/src/constants.h	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,11 @@
+
+#ifndef SMV_CONSTANTS_H
+#define SMV_CONSTANTS_H
+const VARIABLE_TYPE NA_value = -999.999; // which numeric value is ised as NA
+const int NA_value_int = -999; // which numeric value is ised as NA
+const unsigned precision_output=4;
+const double median_table_chi2_df2 = 1.386294;
+
+
+
+#endif

Added: pkg/VariABEL/src/gtps_container.cpp
===================================================================
--- pkg/VariABEL/src/gtps_container.cpp	                        (rev 0)
+++ pkg/VariABEL/src/gtps_container.cpp	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,211 @@
+//#=====================================================================================
+//#
+//#       Filename:  gtps_container.cpp
+//#
+//#    Description:  Container for storaging genotype data. Use in function for merging of two snp.data class objects.
+//#
+//#        Version:  1.0
+//#        Created:  18-March-2008
+//#       Revision:  none
+//#       
+//#
+//#         Author:  Maksim V. Struchalin, Yurii S. Aulchenko
+//#        Company:  ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//#          Email:  m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#include "gtps_container.h"
+#include <cstdlib>  
+#include <math.h>
+
+
+
+
+gtps_container::gtps_container(char * gtps_array_, char * strand_array_, char * coding_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;  
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;	
+strand_array = strand_array_;
+coding_array = coding_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+
+//std::cout<<"gtps_container::gtps_container: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+}
+//------------------------------------------------------------------
+
+
+
+//------------------------------------------------------------------
+gtps_container::gtps_container(char * gtps_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;  
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;	
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+gtps_container::~gtps_container(void)
+{
+delete[] rearrangement_array;
+}
+
+
+
+
+//------------------------------------------------------------------
+void gtps_container::get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position)
+{
+our_byte_number = int(ceil(id_position/4.)+0.5) + (snp_position-1)*nbytes_for_one_snp;   //What byte is our id in? 
+
+local_person_number = id_position - ((our_byte_number-(snp_position-1)*nbytes_for_one_snp-1)*4); //What position of our id is in byte?
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------
+char gtps_container::get(unsigned id_position, unsigned snp_position)
+{
+get_our_byte_number_and_local_person_number(id_position, snp_position); //calculate our_byte_number value
+
+char our_byte_vallue = gtps_array[our_byte_number-1];
+
+
+
+return char((gtps_array[our_byte_number-1] >> rearrangement_array[local_person_number-1]) & 3); 
+}
+//------------------------------------------------------------------
+
+
+
+
+//------------------------------------------------------------------
+//Attention! This class does not carry about allocated in this function memory.
+//In order to avoid memory leak "delete" must be performed for every array created in this function.
+char* gtps_container::get_gtps_array_for_snp(unsigned snp_position)
+{
+char* gtps_for_one_snp = new char(nbytes_for_one_snp);
+get_our_byte_number_and_local_person_number(1, snp_position); //calculate our_byte_number value
+
+std::cout<<"gtps_container::get_gtps_array_for_snp:  our_byte_number="<<our_byte_number<<"\n";
+std::cout<<"gtps_container::get_gtps_array_for_snp:  nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+	
+std::cout<<"gtps_array[0]="<<int(gtps_array[0])<<"\n";
+
+for(int i=0 ; i<nbytes_for_one_snp; i++)
+	{
+	gtps_for_one_snp[i]=gtps_array[our_byte_number-1+i];
+	}
+
+
+return gtps_for_one_snp;
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+char gtps_container::get_strand(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return strand_array[snp_position-1]; 
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+}
+
+
+
+char gtps_container::get_coding(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return coding_array[snp_position-1]; 
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+}
+
+
+//------------------------------------------------------------------
+void gtps_container::set(unsigned id_position, unsigned snp_position, char data)
+{
+//for(int i=0 ; i<4 ; i++)
+//std::cout<<"rearrangement_array["<<i<<"]="<<rearrangement_array[i]<<"\n";
+
+		
+static const char clear_info_for_person[]={63, 207, 243, 252};
+		
+//std::cout<<"set: id_position="<<id_position<<", snp_position="<<snp_position<<", data="<<int(data)<<"\n";
+get_our_byte_number_and_local_person_number(id_position, snp_position);
+
+
+//std::cout<<"set: (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])="<<int(gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])<<"\n";
+
+//char tmp = (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1]) | data;
+//std::cout<<"set: tmp="<<int(tmp)<<"\n";
+
+
+//std::cout<<"gtps_array["<<our_byte_number-1<<"] = (gtps_array["<<our_byte_number-1<<"]&"<<int(clear_info_for_person[local_person_number-1])<<" |"<< int((data&3)<<rearrangement_array[local_person_number-1])<<"\n";
+
+
+gtps_array[our_byte_number-1] = (gtps_array[our_byte_number-1]&clear_info_for_person[local_person_number-1]) | (data&3)<<rearrangement_array[local_person_number-1];
+//std::cout<<"gtps_array["<<our_byte_number<<"-1]="<<int(gtps_array[our_byte_number-1])<<"\n";
+}
+//------------------------------------------------------------------
+
+
+
+
+/*
+gtps_container::checking(unsigned id_position, unsigned snp_position)
+{
+if (id_position > id_numbers || id_position<0)
+	{
+	std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+	}
+
+if(snp_position > snp_position || snp_position <0)
+	{
+	std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+	}
+}
+*/
+
+
+

Added: pkg/VariABEL/src/gtps_container.h
===================================================================
--- pkg/VariABEL/src/gtps_container.h	                        (rev 0)
+++ pkg/VariABEL/src/gtps_container.h	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,73 @@
+//#=====================================================================================
+//#
+//#       Filename:  gtps_container.h
+//#
+//#    Description:  Set of functions for merging two snp.data class objects.
+//#
+//#        Version:  1.0
+//#        Created:  18-March-2008
+//#       Revision:  none
+//#       
+//#
+//#         Author:  Maksim V. Struchalin, Yurii S. Aulchenko
+//#        Company:  ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//#          Email:  m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_GTPS_CONTAINER_H
+#define SMV_GTPS_CONTAINER_H
+
+
+
+#include<iostream>
+#include<vector>
+#include<memory>
+
+
+
+
+
+
+
+
+//==================================================================
+// Class gtps_container - container for gtps data. Class facilitates access to gtps array.
+class gtps_container
+	{
+	public:
+		gtps_container(char * gtps_array_raw, char * strand_array, char *  coding_array, unsigned id_numbers, unsigned snp_numbers);
+		gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers);
+		~gtps_container(void);
+
+		//Get polymorphism for person who have ID=id_position for SNP=snp_position
+		//Here the first id_position equal zerro, the second - one etc...
+		//id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+		char get(unsigned id_position, unsigned snp_position);
+		char* get_gtps_array_for_snp(unsigned snp_position);
+		char get_strand(unsigned snp_position);
+		char get_coding(unsigned snp_position);
+		
+		
+		//id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+		void set(unsigned id_position, unsigned snp_position, char data);
+
+	private:
+		void get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position);
+		bool do_we_have_strand_and_codding_arrays;
+		char *gtps_array; //pointer to array where we stotages our data (passed from R)
+		char * strand_array;
+		char * coding_array;
+		unsigned id_numbers, snp_numbers;
+		unsigned nbytes_for_one_snp;
+
+		unsigned our_byte_number, 
+						 local_person_number; //can have vallues: 1, 2, 3 or 4. Show what is position in the byte.
+//	  const unsigned rearrangement_array[];
+		unsigned *rearrangement_array;
+
+
+	};
+//==================================================================
+
+#endif

Added: pkg/VariABEL/src/inverse_variance_metaanalysis.cpp
===================================================================
--- pkg/VariABEL/src/inverse_variance_metaanalysis.cpp	                        (rev 0)
+++ pkg/VariABEL/src/inverse_variance_metaanalysis.cpp	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,70 @@
+//#=====================================================================================
+//#
+//#       Filename:  inverse_variance_metaanalysis.cpp
+//#
+//#    Description:  Function for meta analysis with inverse variance method. 
+//#
+//#        Version:  1.0
+//#        Created:  06-July-2009
+//#       Revision:  none
+//#				 Modifed:  26-Apr-2010
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+
+
+#include<iostream>
+//#include<math>
+
+#include <Rinternals.h>
+
+#include <R.h>  // to include Rconfig.h 
+
+#ifdef ENABLE_NLS
+#include <libintl.h>
+#define _(String) dgettext ("pkg", String)
+// replace pkg as appropriate 
+#else
+#define _(String) (String)
+#endif
+
+extern "C" {
+#include "inverse_variance_metaanalysis.h"
+
+
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+			 				double *sebeta_set1, double *sebeta_set2,
+			 				unsigned *num, //number of betas for current cohort
+							double *mbeta,
+							double *mse,
+							char *testname)
+{
+
+unsigned num_el = *num;
+double se_set1, se_set2, wt2_set1, wt2_set2, invsumwt2;
+
+
+
+	for(unsigned i=0 ; i<num_el ; i++)
+		{
+		se_set1 = sqrt(sebeta_set1[i]*sebeta_set1[i]);
+		se_set2 = sqrt(sebeta_set2[i]*sebeta_set2[i]);
+	
+		wt2_set1 = 1./(sebeta_set1[i]*sebeta_set1[i]);
+		wt2_set2 = 1./(sebeta_set2[i]*sebeta_set2[i]);
+
+		invsumwt2 = 1./(wt2_set1+wt2_set2);
+		mbeta[i] = (beta_set1[i]*wt2_set1 + beta_set2[i]*wt2_set2)*invsumwt2;
+		mse[i] = sqrt(invsumwt2);
+		}
+
+}// end of function 
+
+
+
+}//end of extern "C"

Added: pkg/VariABEL/src/inverse_variance_metaanalysis.h
===================================================================
--- pkg/VariABEL/src/inverse_variance_metaanalysis.h	                        (rev 0)
+++ pkg/VariABEL/src/inverse_variance_metaanalysis.h	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,33 @@
+//#=====================================================================================
+//#
+//#       Filename:  inverse_variance_metaanalysis.h
+//#
+//#    Description:  Function for meta analysis with inverse variance method
+//#
+//#        Version:  1.0
+//#        Created:  06-July-2009
+//#       Revision:  none
+//#				Modifed:  26-Apr-2010
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_DOMETA_C_H
+#define SMV_DOMETA_C_H
+
+
+extern "C" {
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+              double *sebeta_set1, double *sebeta_set2,
+              unsigned *num,
+              double *mbeta,
+              double *mse,
+							char *testname);
+
+
+}
+#endif
+

Added: pkg/VariABEL/src/supplementary_functions.cpp
===================================================================
--- pkg/VariABEL/src/supplementary_functions.cpp	                        (rev 0)
+++ pkg/VariABEL/src/supplementary_functions.cpp	2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,1015 @@
+//#=====================================================================================
+//#
+//#       Filename:  supplementary_functions.cpp
+//#
+//#    Description:  Supplementary functions for variance homogeneity test and for metaanalysis.
+//#
+//#        Version:  1.0
+//#        Created:  28-Apr-2010
+//#       Revision:  none
+//#
+//#
+//#         Author:  Maksim V. Struchalin
+//#        Company:  ErasmusMC, Epidemiology, The Netherlands.
+//#          Email:  m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+#include "supplementary_functions.h"
+#include "inverse_variance_metaanalysis.h"
+#include <stdlib.h>
+#include <math.h>
+
+
+
+
+
+
+
+//___________________________________________________________
+//Set genotypes in common view GA -> AG
+bool unify_snp(snp_var_data* snp, std::ofstream & warnings_file)
+{
+
+
+static std::string alleles_snp;
+alleles_snp="";
+
+for(unsigned i=0 ; i<GENO_TYPES_NUM ; i++) {alleles_snp += snp->GENO[i];}//collect all alleles 
+
+alleles_snp = get_uniq_symbols(alleles_snp);
+for(unsigned i=0 ; i<alleles_snp.size() ; i++) if(alleles_snp[i] == '/') {alleles_snp.erase(i,1);}
+
+if(alleles_snp.size() > 2 && alleles_snp.size() <=1)
+	{
+	//Rprintf("SNP %s has more than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has more than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+	return false;	
+	}
+
+if(alleles_snp.size() <=1)
+	{
+	//Rprintf("SNP %s has less than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has less than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+	return false;	
+	}
+
+
+if(alleles_snp[0]=='0' && alleles_snp[1]=='0')
+	{
+	//Rprintf("SNP %s has undefind allels. SNP skiped..\n", snp->snpname.c_str());
+	warnings_file<<"SNP "<<snp->snpname<<" has undefind allels. SNP skiped\n";
+	return false;
+	}
+
+
+
+
+//exclude those geno group which has only one id
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+
+	if(is_na(snp->SD[i]) || is_na(snp->MEAN[i]) || is_na(snp->COUNTS[i]) || snp->COUNTS[i]<=1 )
+		{
+		snp->SD[i] = NA_value;
+		snp->MEAN[i] = NA_value;
+		snp->COUNTS[i] = 0;
+		continue;
+		}
+
+	static VARIABLE_TYPE var;
+	var = snp->SD[i]*snp->SD[i];	
+	if(var <= 1.E-32)
+		{
+		snp->SD[i] = NA_value;
+		snp->MEAN[i] = NA_value;
+		snp->COUNTS[i] = 0;
+		
+		//Rprintf("warning: genotypic group %s in snp %s has too small variance (variance=%f). This genotypic group is excluded from analysis.\n", 
+		warnings_file<<"warning: genotypic group "<<snp->GENO[i]<<" in snp "<<snp->snpname<<" has too small variance (variance="<<var<<"). This genotypic group is excluded from analysis.\n"; 
+		
+		}
+	}
+
+
+
+
+
+
+std::sort(alleles_snp.begin(), alleles_snp.end());
+
+
+if(alleles_snp[0] == '0')
+	{
+	alleles_snp[0] = alleles_snp[1];
+	alleles_snp[1] = '0';
+	}
+
+static std::string geno[3];
+
+
+geno[0].push_back(alleles_snp[0]);
+geno[0].push_back('/');
+geno[0].push_back(alleles_snp[0]);
+		
+geno[1].push_back(alleles_snp[0]);
+geno[1].push_back('/');
+geno[1].push_back(alleles_snp[1]);
+
+geno[2].push_back(alleles_snp[1]);
+geno[2].push_back('/');
+geno[2].push_back(alleles_snp[1]);
+
+
+static std::string geno_hemozyg_switched;
+geno_hemozyg_switched.push_back(geno[1][2]);
+geno_hemozyg_switched.push_back('/');
+geno_hemozyg_switched.push_back(geno[1][0]);
+
+
+if(snp->GENO[0] == geno[0] && snp->GENO[1] == geno[1] && snp->GENO[2] == geno[2])
+	{
+	geno_hemozyg_switched = "";
+	geno[0] = "";
+	geno[1] = "";
+	geno[2] = "";
+	return true; //nothing to change
+	}
+
+
+static snp_var_data snp_new;
+
+//snp_new->snpname = snp->snpname;
+//snp_new->chromosome = snp->chromosome;
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	if(snp->GENO[i] == geno[0])
+		{snp_new.GENO[0] = geno[0]; snp_new.COUNTS[0]=snp->COUNTS[i]; snp_new.MEAN[0]=snp->MEAN[i]; snp_new.SD[0]=snp->SD[i];}
+	if(snp->GENO[i] == geno[1] || snp->GENO[i]==geno_hemozyg_switched) 
+		{snp_new.GENO[1] = geno[1]; snp_new.COUNTS[1]=snp->COUNTS[i]; snp_new.MEAN[1]=snp->MEAN[i]; snp_new.SD[1]=snp->SD[i];}
+	if(snp->GENO[i] == geno[2]) 
+		{snp_new.GENO[2] = geno[2]; snp_new.COUNTS[2]=snp->COUNTS[i]; snp_new.MEAN[2]=snp->MEAN[i]; snp_new.SD[2]=snp->SD[i];}
+	}
+
+geno_hemozyg_switched = "";
+geno[0] = "";
+geno[1] = "";
+geno[2] = "";
+
+
+
+
+
+
+
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	snp->GENO[i] = snp_new.GENO[i];
+	snp->COUNTS[i] = snp_new.COUNTS[i];
+	snp->MEAN[i] = snp_new.MEAN[i];
+	snp->SD[i] = snp_new.SD[i];
+	}
+
+snp_new.reset();
+
+
+
+
+return true;
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+//put new snp into the storage. If this snp is there already tham metaanalyse it
+bool include_snp(Snp_store_type * snps_storage, snp_var_data* snp, std::ofstream & warnings_file, char *testname)
+{
+
+
+Snp_store_type::iterator iter_map = snps_storage->find(snp->snpname);
+
+char delim=' ';
+
+
+
+if(iter_map == snps_storage->end()) 
+	{
+	snps_storage->insert(std::pair<std::string, snp_var_data*>(snp->snpname, snp));
+	}
+else
+	{
+	snp_var_data* snp_current = iter_map->second;
+	static bool is_snp_ok;
+	is_snp_ok = check(snp_current, snp, warnings_file);
+	
+	if(!is_snp_ok) {return false;}
+	
+	(*snps_storage)[snp->snpname.c_str()] = snp_var_meta(iter_map->second, snp, testname);
+	
+	//bless god souls of these objects...
+	delete snp_current;
+	delete snp;
+	}
+
+return true;
+}
+//___________________________________________________________
+
+
+
+//metaanalysis of data from two snps
+//___________________________________________________________
+snp_var_data* snp_var_meta(snp_var_data* snp1, snp_var_data* snp2, char* testname)
+{
+snp_var_data* snp_meta = new snp_var_data;
+
+
+
+snp_meta->chromosome = snp1->chromosome;
+snp_meta->snpname = snp1->snpname;
+
+VARIABLE_TYPE meta_mean_beta[GENO_TYPES_NUM], meta_mean_se[GENO_TYPES_NUM];
+	
+
+static unsigned one = 1;
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+	{
+	//metanalysis for variance
+
+	static std::string meta_codding;
+			
+	snp_meta->GENO[i] = snp1->GENO[i];
+	
+	if(snp1->COUNTS[i] != 0	&& snp2->COUNTS[i] != 0 )
+		{
+
+		static VARIABLE_TYPE SD_of_the_mean_snp1, SD_of_the_mean_snp2;
+		
+		SD_of_the_mean_snp1 = snp1->SD[i]/sqrt(double(snp1->COUNTS[i]));
+		SD_of_the_mean_snp2 = snp2->SD[i]/sqrt(double(snp2->COUNTS[i]));
+
+		//metanalysis for mean
+		inverse_variance_metaanalysis(&snp1->MEAN[i], &snp2->MEAN[i],
+						 &SD_of_the_mean_snp1, &SD_of_the_mean_snp2,
+							&one,
+							meta_mean_beta,
+							meta_mean_se, testname);
+	
+		snp_meta->MEAN[i] = meta_mean_beta[0];
+		snp_meta->COUNTS[i] = snp1->COUNTS[i] + snp2->COUNTS[i];
+		snp_meta->SD[i] = meta_mean_se[0]*sqrt(double(snp_meta->COUNTS[i]));
+		}
+	else
+		{
+		if(snp1->COUNTS[i] == 0)
+			{
+			snp_meta->SD[i] = snp2->SD[i];
+			snp_meta->COUNTS[i] = snp2->COUNTS[i];
+			snp_meta->MEAN[i] = snp2->MEAN[i];
+			}
+		else if(snp2->COUNTS[i] == 0)
+			{
+			snp_meta->SD[i] = snp1->SD[i];
+			snp_meta->COUNTS[i] = snp1->COUNTS[i];
+			snp_meta->MEAN[i] = snp1->MEAN[i];
+			}
+		else
+			{
+			error("Upss.. :-) something strange occured... wrong file format probably. Create small example of your data where you have an error and send it to developer.\n");
+			}
+		}
+	}
+return snp_meta;		
+}
+//___________________________________________________________
+
+
+
+
+
+
+
+//___________________________________________________________
+bool is_na(const VARIABLE_TYPE val, const VARIABLE_TYPE na_reference)
+{
+static VARIABLE_TYPE delta = fabs(na_reference/1E6);
+if(val > na_reference-delta && val < na_reference+delta) return true;
+else return false;
+}
+//___________________________________________________________
+
+
+
+
+
+bool is_na(const int val, const int na_reference) //is numerical value recognized as NA 
+{
+if(val == na_reference) return true;
+else return false;
+}
+
+
+
+//save all data into plink like format
+//___________________________________________________________
+void save_snps_data_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+	
+const unsigned pp_maxsnp=10;
+		
+std::ofstream file;
+	
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << std::setw(4) << chromosome_column_name.c_str() << delim
+		 << std::setw(pp_maxsnp) << snpname_column_name.c_str() << delim
+		 << std::setw(6) << value_column_name.c_str() << delim
+		 << std::setw(8) << g11_column_name.c_str() << delim
+		 << std::setw(8) << g12_column_name.c_str() << delim
+		 << std::setw(8) << g22_column_name.c_str() << "\n";
+
+
+		
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+	{
+	//geno:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
+			 << std::setw(6) << geno_value_name << delim
+			 << std::setw(8) << i->second->GENO[0] << delim
+			 << std::setw(8) << i->second->GENO[1] << delim
+			 << std::setw(8) << i->second->GENO[2] << "\n";
+
+
+
+	//counts:
+	file << std::setw(4) << i->second->chromosome << delim
+		   << std::setw(pp_maxsnp) << i->second->snpname << delim 
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 617


More information about the Genabel-commits mailing list