[Genabel-commits] r617 - in pkg: . VariABEL VariABEL/R VariABEL/man VariABEL/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Nov 9 16:28:56 CET 2010
Author: maksim
Date: 2010-11-09 16:28:55 +0100 (Tue, 09 Nov 2010)
New Revision: 617
Added:
pkg/VariABEL/
pkg/VariABEL/CHANGES.LOG
pkg/VariABEL/DESCRIPTION
pkg/VariABEL/NAMESPACE
pkg/VariABEL/R/
pkg/VariABEL/R/tags
pkg/VariABEL/R/var.meta.gwaa.R
pkg/VariABEL/R/var.test.gwaa.R
pkg/VariABEL/data/
pkg/VariABEL/demo/
pkg/VariABEL/man/
pkg/VariABEL/man/var.test.homogeneity.Rd
pkg/VariABEL/src/
pkg/VariABEL/src/constants.h
pkg/VariABEL/src/gtps_container.cpp
pkg/VariABEL/src/gtps_container.h
pkg/VariABEL/src/inverse_variance_metaanalysis.cpp
pkg/VariABEL/src/inverse_variance_metaanalysis.h
pkg/VariABEL/src/supplementary_functions.cpp
pkg/VariABEL/src/supplementary_functions.h
pkg/VariABEL/src/tags
pkg/VariABEL/src/var_homogeneity_test_C.cpp
pkg/VariABEL/src/var_homogeneity_tests.cpp
pkg/VariABEL/src/var_homogeneity_tests.h
pkg/VariABEL/src/var_meta_gwaa_C.cpp
Log:
submision of VariABEL package for variance heregeneity analysis
Added: pkg/VariABEL/DESCRIPTION
===================================================================
--- pkg/VariABEL/DESCRIPTION (rev 0)
+++ pkg/VariABEL/DESCRIPTION 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,13 @@
+Package: VarABEL
+Type: Package
+Title: genome-wide SNP association analysis
+Version: 1.5-0
+Date: 2010-02-18
+Author: Yurii Aulchenko, Maksim Struchalin
+Maintainer: Yurii Aulchenko <i.aoultchenko at erasmusmc.nl>
+Depends: R (>= 2.4.0), methods, MASS
+Suggests: qvalue, genetics, haplo.stats, DatABEL
+Description: a package for genome-wide association analysis between
+ quantitative or binary traits and single-nucleiotide
+ polymorphisms (SNPs).
+License: GPL (>= 2)
Added: pkg/VariABEL/NAMESPACE
===================================================================
--- pkg/VariABEL/NAMESPACE (rev 0)
+++ pkg/VariABEL/NAMESPACE 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,8 @@
+useDynLib(VarABEL)
+
+
+export(
+ var.meta.gwaa,
+ var.test.gwaa
+ )
+
Added: pkg/VariABEL/R/tags
===================================================================
--- pkg/VariABEL/R/tags (rev 0)
+++ pkg/VariABEL/R/tags 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,6 @@
+!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert at users.sourceforge.net/
+!_TAG_PROGRAM_NAME Exuberant Ctags //
+!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
+!_TAG_PROGRAM_VERSION 5.6 //
Added: pkg/VariABEL/R/var.meta.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.meta.gwaa.R (rev 0)
+++ pkg/VariABEL/R/var.meta.gwaa.R 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,51 @@
+#=====================================================================================
+#
+# Filename: var.meta.gwaa.R
+#
+# Description: Function for meta analysis ov variance. Read flat files in plink format and perform metanalysis.
+#
+# Version: 1.0
+# Created: 26-Apr-2010
+# Revision: none
+#
+#
+# Author: Maksim V. Struchalin
+# Company: ErasmusMC, Epidemiology, The Netherlands.
+# Email: m.struchalin at erasmusmc.nl
+#
+#=====================================================================================
+
+
+
+
+"var.meta.gwaa" <-
+function(input_filenames,
+ testname,
+ output_filename="output.variance.metaanalysis",
+ exclude_snp_below_threshold=F,
+ threshold=30,
+ all_warnings=F,
+ skip_first_lines_amount=0,
+ delim=' ') {
+
+
+file_amount <- length(filenames_array)
+
+
+return_val <- .C("var_meta_gwaa_C", input_filenames, as.integer(file_amount), output_filename, as.integer(skip_first_lines_amount), delim,
+ lambdas = double(file_amount+1),
+ lambdas_NA = integer(file_amount+1),
+ as.logical(exclude_snp_below_threshold),
+ as.integer(threshold),
+ as.logical(all_warnings),
+ as.character(testname))
+
+
+return_val$lambdas[return_val$lambdas_NA==1] <- NA
+
+lambda_df <- data.frame(filename=filenames_array, lambda=return_val$lambdas[1:file_amount])
+
+lambda_df <- rbind(lambda_df, data.frame(filename=output_filename, lambda=return_val$lambdas[file_amount+1]))
+
+lambda_df
+}
Added: pkg/VariABEL/R/var.test.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.test.gwaa.R (rev 0)
+++ pkg/VariABEL/R/var.test.gwaa.R 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,113 @@
+#=====================================================================================
+#
+# Filename: var.test.gwaa.R
+#
+# Description: VarABEL functions: test each SNP in gwaa data for homogeneity of trait's variance among genotypes.
+#
+# Version: 0.1
+# Created: ---
+# Revision: none
+# last modification: 26-Apr-2010
+#
+# Author: Maksim V. Struchalin
+# Modified by: Maksim V. Struchalin, 11-Jan-2009
+# Company: ErasmusMC, Epidemiology Department, The Netherlands.
+# Email: m.struchalin at erasmusmc.nl
+# license: GPL (>=2)
+#
+#=====================================================================================
+
+
+
+
+
+#_____________________________________________________________________________________________________________________________
+#Available for user function for genome-wide testing variance homogeneity of trait's distribution.
+#
+"var.test.gwaa" <- function(genotype, trait, testname, analysis_type)
+{
+#test statistics for each SNP is stored here. There is column names: "snpname", "chisq_df2", "chisq_df1"
+results <- data.frame()
+
+
+
+
+# Check wether the class and value of input parameters is appropriate
+#_____________________________________________________________
+if(!is(genotype, "snp.data") & !is(genotype, "numeric"))
+ {
+ stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"genotype\".")
+ }
+
+if(!is(trait, "numeric"))
+ {
+ stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"trait\".")
+ }
+
+if(!is(testname, "character"))
+ {
+ stop("VarABEL, var.test.gwaa: Wrong class of input parameter \"testname\". It should be \"character\".")
+ }
+
+
+#_____________________________________________________________
+
+
+
+
+return_val <- .Call("variance_homogeneity_test_C_old_data_type",
+ as.raw(genotype at gtps),
+ as.double(trait),
+ as.integer(genotype at nids),
+ as.integer(genotype at nsnps),
+ analysis_type,
+ testname)
+
+
+
+
+result_df <- data.frame(chisq=return_val[[1]], df=return_val[[2]])
+
+
+#output <- .Call("interaction_rare_recesive_allele_C_", as.raw(data at gtps), as.integer(data at nids), as.integer(snp_number),
+# as.integer(trait),
+# as.integer(window),
+# return_all_result,
+# test,
+# as.integer(min_expected_cut_off))
+
+
+}
+##Perform variance analysis
+##_____________________________________________________________
+#
+#if(is(genotype, "snp.data"))
+# {
+# nids <- genotype at nid
+# nsnps <- genotype at gtdata@nsnps
+# MAR <- 1
+# OUT <- "R"
+#
+# FUN <- test_C_function_name[testname] #choose name of variance homogeneity test
+#
+# #Run choosen variance homogeneity test
+# results_C <- .Call("iterator", as.raw(genotype at gtps),
+# as.integer(nids), as.integer(nsnps),
+# as.character(FUN),
+# as.character(OUT),
+# as.integer(MAR),
+# as.integer(0),
+# package="VarABEL")
+# }
+#
+#
+#
+#return results
+#}
+##End of var.test.gwaa function
+##_____________________________________________________________________________________________________________________________
+
+
+
+
+
Added: pkg/VariABEL/man/var.test.homogeneity.Rd
===================================================================
--- pkg/VariABEL/man/var.test.homogeneity.Rd (rev 0)
+++ pkg/VariABEL/man/var.test.homogeneity.Rd 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,55 @@
+\name{var.test.homogeneity}
+\alias{var.test.homogeneity}
+\title{function to perfome variance homogeneity test of different genotypic groups}
+\description{
+function to perfome variance homogeneity test of different genotypic groups
+}
+\usage{
+var.test.homogeneity(trait="", formula="", trait_df, gwa_data, dir, analysis_name_specific, how_many_sigma_drop=NULL, top_snp_num_figure=3, analysis_type = "AAvsABvsBB")
+}
+\arguments{
+ \item{trait}{Name of trait you are analysing (data.frame trait_df has to contain a column with this name). If it absenses or equals to "" than the trait name must be specifed in "formula"}
+ \item{formula}{Object of a class "formula". It is used in case of any adljustment nessacarity. If it absenses or equals to "" than trait must be specifed}
+ \item{trait_df}{Object of class "data.frame". It contains columnes with analysing trait and covariates. Should contain column with id names.}
+ \item{gwa_data}{Object of class "gwaa.data"}
+ \item{dir}{Directory name where all results will be put. It should be created befored analysis starts.}
+ \item{analysis_name_specific}{Any set of characters. It figures as a part of a file names in output.}
+ \item{how_many_sigma_drop}{Cut off point to exclude outliers. Normally distributed trait is assumed.}
+ \item{top_snp_num_figure}{For how many SNPs to make boxplot.}
+ \item{analysis_type}{Which test is performed. Possible values: "AAvsABvsBB" (testing all three genotipc groups),
+ "AAvsABandBB" (testing AA against AB and BB together), "ABvsAAandBB", "BBvsAAandAB".}
+}
+\details{
+var.test.homogeneity function perform analisis of variance homogeneity for easch SNP in data "gwa_data". The main result is pval of variance homogeneity test for each SNP.
+Bartlett's test is currently used. This assumes normality in a trait distribution. Even a couple of outliers can spoil a whole pictures.
+Input parametere "how_many_sigma_drop" should be used to protect overestimation of pval.
+
+}
+
+\value{
+As result the file "lambda.doc" is created which contains inflation factor, file with test statistics (name is qt1__adj__sex_age_test_results.txt
+ for example in case if you analyse "qt1" with adjustment on "sex" and "age"), file those contains id names which are excluded due to
+ normality resoans, manhettan plot, two qqplots (for chi2 amd pval) and boxplots for top SNPs.
+ }
+
+
+%\references{ ~put references to the literature/web site here ~ }
+\author{Maksim Struchalin}
+%\note{ ~~further notes~~
+%}
+\seealso{
+ \code{\link{var.meta}},
+}
+\examples{
+ data(srdta)
+ mytrait_df <- srdta at phdata[,c("id","qt1","sex","age")]
+ mydir <- "mydir_var_analysis"
+ dir.create(mydir)
+ var.test.homogeneity(formula=qt1 ~ sex + age,
+ trait_df=mytrait_df, gwa_data=srdta, dir=mydir, analysis_name_specific="test", how_many_sigma_drop=3)
+
+
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{manip}
Added: pkg/VariABEL/src/constants.h
===================================================================
--- pkg/VariABEL/src/constants.h (rev 0)
+++ pkg/VariABEL/src/constants.h 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,11 @@
+
+#ifndef SMV_CONSTANTS_H
+#define SMV_CONSTANTS_H
+const VARIABLE_TYPE NA_value = -999.999; // which numeric value is ised as NA
+const int NA_value_int = -999; // which numeric value is ised as NA
+const unsigned precision_output=4;
+const double median_table_chi2_df2 = 1.386294;
+
+
+
+#endif
Added: pkg/VariABEL/src/gtps_container.cpp
===================================================================
--- pkg/VariABEL/src/gtps_container.cpp (rev 0)
+++ pkg/VariABEL/src/gtps_container.cpp 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,211 @@
+//#=====================================================================================
+//#
+//# Filename: gtps_container.cpp
+//#
+//# Description: Container for storaging genotype data. Use in function for merging of two snp.data class objects.
+//#
+//# Version: 1.0
+//# Created: 18-March-2008
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin, Yurii S. Aulchenko
+//# Company: ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//# Email: m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#include "gtps_container.h"
+#include <cstdlib>
+#include <math.h>
+
+
+
+
+gtps_container::gtps_container(char * gtps_array_, char * strand_array_, char * coding_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;
+strand_array = strand_array_;
+coding_array = coding_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+
+//std::cout<<"gtps_container::gtps_container: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+}
+//------------------------------------------------------------------
+
+
+
+//------------------------------------------------------------------
+gtps_container::gtps_container(char * gtps_array_, unsigned id_numbers_, unsigned snp_numbers_)
+{
+do_we_have_strand_and_codding_arrays = true; //of course
+rearrangement_array = new unsigned[4];
+rearrangement_array[0] = 6;
+rearrangement_array[1] = 4;
+rearrangement_array[2] = 2;
+rearrangement_array[3] = 0;
+
+
+
+
+
+gtps_array = gtps_array_;
+id_numbers = id_numbers_;
+snp_numbers = snp_numbers_;
+nbytes_for_one_snp = unsigned(ceil(double(id_numbers)/4.) + 0.5);
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+gtps_container::~gtps_container(void)
+{
+delete[] rearrangement_array;
+}
+
+
+
+
+//------------------------------------------------------------------
+void gtps_container::get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position)
+{
+our_byte_number = int(ceil(id_position/4.)+0.5) + (snp_position-1)*nbytes_for_one_snp; //What byte is our id in?
+
+local_person_number = id_position - ((our_byte_number-(snp_position-1)*nbytes_for_one_snp-1)*4); //What position of our id is in byte?
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+//------------------------------------------------------------------
+char gtps_container::get(unsigned id_position, unsigned snp_position)
+{
+get_our_byte_number_and_local_person_number(id_position, snp_position); //calculate our_byte_number value
+
+char our_byte_vallue = gtps_array[our_byte_number-1];
+
+
+
+return char((gtps_array[our_byte_number-1] >> rearrangement_array[local_person_number-1]) & 3);
+}
+//------------------------------------------------------------------
+
+
+
+
+//------------------------------------------------------------------
+//Attention! This class does not carry about allocated in this function memory.
+//In order to avoid memory leak "delete" must be performed for every array created in this function.
+char* gtps_container::get_gtps_array_for_snp(unsigned snp_position)
+{
+char* gtps_for_one_snp = new char(nbytes_for_one_snp);
+get_our_byte_number_and_local_person_number(1, snp_position); //calculate our_byte_number value
+
+std::cout<<"gtps_container::get_gtps_array_for_snp: our_byte_number="<<our_byte_number<<"\n";
+std::cout<<"gtps_container::get_gtps_array_for_snp: nbytes_for_one_snp="<<nbytes_for_one_snp<<"\n";
+
+std::cout<<"gtps_array[0]="<<int(gtps_array[0])<<"\n";
+
+for(int i=0 ; i<nbytes_for_one_snp; i++)
+ {
+ gtps_for_one_snp[i]=gtps_array[our_byte_number-1+i];
+ }
+
+
+return gtps_for_one_snp;
+}
+//------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+char gtps_container::get_strand(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return strand_array[snp_position-1];
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+}
+
+
+
+char gtps_container::get_coding(unsigned snp_position)
+{
+if(do_we_have_strand_and_codding_arrays) return coding_array[snp_position-1];
+else std::cout<<"gtps_container::get_strand: You can not get strand since you create object with constructor gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers)\n";
+}
+
+
+//------------------------------------------------------------------
+void gtps_container::set(unsigned id_position, unsigned snp_position, char data)
+{
+//for(int i=0 ; i<4 ; i++)
+//std::cout<<"rearrangement_array["<<i<<"]="<<rearrangement_array[i]<<"\n";
+
+
+static const char clear_info_for_person[]={63, 207, 243, 252};
+
+//std::cout<<"set: id_position="<<id_position<<", snp_position="<<snp_position<<", data="<<int(data)<<"\n";
+get_our_byte_number_and_local_person_number(id_position, snp_position);
+
+
+//std::cout<<"set: (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])="<<int(gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1])<<"\n";
+
+//char tmp = (gtps_array[our_byte_number]&clear_info_for_person[local_person_number-1]) | data;
+//std::cout<<"set: tmp="<<int(tmp)<<"\n";
+
+
+//std::cout<<"gtps_array["<<our_byte_number-1<<"] = (gtps_array["<<our_byte_number-1<<"]&"<<int(clear_info_for_person[local_person_number-1])<<" |"<< int((data&3)<<rearrangement_array[local_person_number-1])<<"\n";
+
+
+gtps_array[our_byte_number-1] = (gtps_array[our_byte_number-1]&clear_info_for_person[local_person_number-1]) | (data&3)<<rearrangement_array[local_person_number-1];
+//std::cout<<"gtps_array["<<our_byte_number<<"-1]="<<int(gtps_array[our_byte_number-1])<<"\n";
+}
+//------------------------------------------------------------------
+
+
+
+
+/*
+gtps_container::checking(unsigned id_position, unsigned snp_position)
+{
+if (id_position > id_numbers || id_position<0)
+ {
+ std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+ }
+
+if(snp_position > snp_position || snp_position <0)
+ {
+ std::cerr<<"Bad ID number. ID_number="<<id_position<<". Maximum ID_number can be "<<id_position<<"\n";
+ }
+}
+*/
+
+
+
Added: pkg/VariABEL/src/gtps_container.h
===================================================================
--- pkg/VariABEL/src/gtps_container.h (rev 0)
+++ pkg/VariABEL/src/gtps_container.h 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,73 @@
+//#=====================================================================================
+//#
+//# Filename: gtps_container.h
+//#
+//# Description: Set of functions for merging two snp.data class objects.
+//#
+//# Version: 1.0
+//# Created: 18-March-2008
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin, Yurii S. Aulchenko
+//# Company: ErasmusMC, Epidemiology & Biostatistics Department, The Netherlands.
+//# Email: m.struchalin@@erasmusmc.nl, i.aoultchenko at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_GTPS_CONTAINER_H
+#define SMV_GTPS_CONTAINER_H
+
+
+
+#include<iostream>
+#include<vector>
+#include<memory>
+
+
+
+
+
+
+
+
+//==================================================================
+// Class gtps_container - container for gtps data. Class facilitates access to gtps array.
+class gtps_container
+ {
+ public:
+ gtps_container(char * gtps_array_raw, char * strand_array, char * coding_array, unsigned id_numbers, unsigned snp_numbers);
+ gtps_container(char * gtps_array_raw, unsigned id_numbers, unsigned snp_numbers);
+ ~gtps_container(void);
+
+ //Get polymorphism for person who have ID=id_position for SNP=snp_position
+ //Here the first id_position equal zerro, the second - one etc...
+ //id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+ char get(unsigned id_position, unsigned snp_position);
+ char* get_gtps_array_for_snp(unsigned snp_position);
+ char get_strand(unsigned snp_position);
+ char get_coding(unsigned snp_position);
+
+
+ //id_position=1 - THE FIRST ARGUMENT (id_position!=O NEVER!!!). snp_position - same
+ void set(unsigned id_position, unsigned snp_position, char data);
+
+ private:
+ void get_our_byte_number_and_local_person_number(unsigned id_position, unsigned snp_position);
+ bool do_we_have_strand_and_codding_arrays;
+ char *gtps_array; //pointer to array where we stotages our data (passed from R)
+ char * strand_array;
+ char * coding_array;
+ unsigned id_numbers, snp_numbers;
+ unsigned nbytes_for_one_snp;
+
+ unsigned our_byte_number,
+ local_person_number; //can have vallues: 1, 2, 3 or 4. Show what is position in the byte.
+// const unsigned rearrangement_array[];
+ unsigned *rearrangement_array;
+
+
+ };
+//==================================================================
+
+#endif
Added: pkg/VariABEL/src/inverse_variance_metaanalysis.cpp
===================================================================
--- pkg/VariABEL/src/inverse_variance_metaanalysis.cpp (rev 0)
+++ pkg/VariABEL/src/inverse_variance_metaanalysis.cpp 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,70 @@
+//#=====================================================================================
+//#
+//# Filename: inverse_variance_metaanalysis.cpp
+//#
+//# Description: Function for meta analysis with inverse variance method.
+//#
+//# Version: 1.0
+//# Created: 06-July-2009
+//# Revision: none
+//# Modifed: 26-Apr-2010
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+
+
+#include<iostream>
+//#include<math>
+
+#include <Rinternals.h>
+
+#include <R.h> // to include Rconfig.h
+
+#ifdef ENABLE_NLS
+#include <libintl.h>
+#define _(String) dgettext ("pkg", String)
+// replace pkg as appropriate
+#else
+#define _(String) (String)
+#endif
+
+extern "C" {
+#include "inverse_variance_metaanalysis.h"
+
+
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+ double *sebeta_set1, double *sebeta_set2,
+ unsigned *num, //number of betas for current cohort
+ double *mbeta,
+ double *mse,
+ char *testname)
+{
+
+unsigned num_el = *num;
+double se_set1, se_set2, wt2_set1, wt2_set2, invsumwt2;
+
+
+
+ for(unsigned i=0 ; i<num_el ; i++)
+ {
+ se_set1 = sqrt(sebeta_set1[i]*sebeta_set1[i]);
+ se_set2 = sqrt(sebeta_set2[i]*sebeta_set2[i]);
+
+ wt2_set1 = 1./(sebeta_set1[i]*sebeta_set1[i]);
+ wt2_set2 = 1./(sebeta_set2[i]*sebeta_set2[i]);
+
+ invsumwt2 = 1./(wt2_set1+wt2_set2);
+ mbeta[i] = (beta_set1[i]*wt2_set1 + beta_set2[i]*wt2_set2)*invsumwt2;
+ mse[i] = sqrt(invsumwt2);
+ }
+
+}// end of function
+
+
+
+}//end of extern "C"
Added: pkg/VariABEL/src/inverse_variance_metaanalysis.h
===================================================================
--- pkg/VariABEL/src/inverse_variance_metaanalysis.h (rev 0)
+++ pkg/VariABEL/src/inverse_variance_metaanalysis.h 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,33 @@
+//#=====================================================================================
+//#
+//# Filename: inverse_variance_metaanalysis.h
+//#
+//# Description: Function for meta analysis with inverse variance method
+//#
+//# Version: 1.0
+//# Created: 06-July-2009
+//# Revision: none
+//# Modifed: 26-Apr-2010
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+#ifndef SMV_DOMETA_C_H
+#define SMV_DOMETA_C_H
+
+
+extern "C" {
+void inverse_variance_metaanalysis(double *beta_set1, double *beta_set2,
+ double *sebeta_set1, double *sebeta_set2,
+ unsigned *num,
+ double *mbeta,
+ double *mse,
+ char *testname);
+
+
+}
+#endif
+
Added: pkg/VariABEL/src/supplementary_functions.cpp
===================================================================
--- pkg/VariABEL/src/supplementary_functions.cpp (rev 0)
+++ pkg/VariABEL/src/supplementary_functions.cpp 2010-11-09 15:28:55 UTC (rev 617)
@@ -0,0 +1,1015 @@
+//#=====================================================================================
+//#
+//# Filename: supplementary_functions.cpp
+//#
+//# Description: Supplementary functions for variance homogeneity test and for metaanalysis.
+//#
+//# Version: 1.0
+//# Created: 28-Apr-2010
+//# Revision: none
+//#
+//#
+//# Author: Maksim V. Struchalin
+//# Company: ErasmusMC, Epidemiology, The Netherlands.
+//# Email: m.struchalin at erasmusmc.nl
+//#
+//#=====================================================================================
+
+
+#include "supplementary_functions.h"
+#include "inverse_variance_metaanalysis.h"
+#include <stdlib.h>
+#include <math.h>
+
+
+
+
+
+
+
+//___________________________________________________________
+//Set genotypes in common view GA -> AG
+bool unify_snp(snp_var_data* snp, std::ofstream & warnings_file)
+{
+
+
+static std::string alleles_snp;
+alleles_snp="";
+
+for(unsigned i=0 ; i<GENO_TYPES_NUM ; i++) {alleles_snp += snp->GENO[i];}//collect all alleles
+
+alleles_snp = get_uniq_symbols(alleles_snp);
+for(unsigned i=0 ; i<alleles_snp.size() ; i++) if(alleles_snp[i] == '/') {alleles_snp.erase(i,1);}
+
+if(alleles_snp.size() > 2 && alleles_snp.size() <=1)
+ {
+ //Rprintf("SNP %s has more than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has more than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+ return false;
+ }
+
+if(alleles_snp.size() <=1)
+ {
+ //Rprintf("SNP %s has less than 2 alleles (it has %s). SNP skiped.\n", snp->snpname.c_str(), alleles_snp.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has less than 2 alleles (it has "<<alleles_snp<<"). SNP skiped.\n";
+ return false;
+ }
+
+
+if(alleles_snp[0]=='0' && alleles_snp[1]=='0')
+ {
+ //Rprintf("SNP %s has undefind allels. SNP skiped..\n", snp->snpname.c_str());
+ warnings_file<<"SNP "<<snp->snpname<<" has undefind allels. SNP skiped\n";
+ return false;
+ }
+
+
+
+
+//exclude those geno group which has only one id
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+
+ if(is_na(snp->SD[i]) || is_na(snp->MEAN[i]) || is_na(snp->COUNTS[i]) || snp->COUNTS[i]<=1 )
+ {
+ snp->SD[i] = NA_value;
+ snp->MEAN[i] = NA_value;
+ snp->COUNTS[i] = 0;
+ continue;
+ }
+
+ static VARIABLE_TYPE var;
+ var = snp->SD[i]*snp->SD[i];
+ if(var <= 1.E-32)
+ {
+ snp->SD[i] = NA_value;
+ snp->MEAN[i] = NA_value;
+ snp->COUNTS[i] = 0;
+
+ //Rprintf("warning: genotypic group %s in snp %s has too small variance (variance=%f). This genotypic group is excluded from analysis.\n",
+ warnings_file<<"warning: genotypic group "<<snp->GENO[i]<<" in snp "<<snp->snpname<<" has too small variance (variance="<<var<<"). This genotypic group is excluded from analysis.\n";
+
+ }
+ }
+
+
+
+
+
+
+std::sort(alleles_snp.begin(), alleles_snp.end());
+
+
+if(alleles_snp[0] == '0')
+ {
+ alleles_snp[0] = alleles_snp[1];
+ alleles_snp[1] = '0';
+ }
+
+static std::string geno[3];
+
+
+geno[0].push_back(alleles_snp[0]);
+geno[0].push_back('/');
+geno[0].push_back(alleles_snp[0]);
+
+geno[1].push_back(alleles_snp[0]);
+geno[1].push_back('/');
+geno[1].push_back(alleles_snp[1]);
+
+geno[2].push_back(alleles_snp[1]);
+geno[2].push_back('/');
+geno[2].push_back(alleles_snp[1]);
+
+
+static std::string geno_hemozyg_switched;
+geno_hemozyg_switched.push_back(geno[1][2]);
+geno_hemozyg_switched.push_back('/');
+geno_hemozyg_switched.push_back(geno[1][0]);
+
+
+if(snp->GENO[0] == geno[0] && snp->GENO[1] == geno[1] && snp->GENO[2] == geno[2])
+ {
+ geno_hemozyg_switched = "";
+ geno[0] = "";
+ geno[1] = "";
+ geno[2] = "";
+ return true; //nothing to change
+ }
+
+
+static snp_var_data snp_new;
+
+//snp_new->snpname = snp->snpname;
+//snp_new->chromosome = snp->chromosome;
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ if(snp->GENO[i] == geno[0])
+ {snp_new.GENO[0] = geno[0]; snp_new.COUNTS[0]=snp->COUNTS[i]; snp_new.MEAN[0]=snp->MEAN[i]; snp_new.SD[0]=snp->SD[i];}
+ if(snp->GENO[i] == geno[1] || snp->GENO[i]==geno_hemozyg_switched)
+ {snp_new.GENO[1] = geno[1]; snp_new.COUNTS[1]=snp->COUNTS[i]; snp_new.MEAN[1]=snp->MEAN[i]; snp_new.SD[1]=snp->SD[i];}
+ if(snp->GENO[i] == geno[2])
+ {snp_new.GENO[2] = geno[2]; snp_new.COUNTS[2]=snp->COUNTS[i]; snp_new.MEAN[2]=snp->MEAN[i]; snp_new.SD[2]=snp->SD[i];}
+ }
+
+geno_hemozyg_switched = "";
+geno[0] = "";
+geno[1] = "";
+geno[2] = "";
+
+
+
+
+
+
+
+
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ snp->GENO[i] = snp_new.GENO[i];
+ snp->COUNTS[i] = snp_new.COUNTS[i];
+ snp->MEAN[i] = snp_new.MEAN[i];
+ snp->SD[i] = snp_new.SD[i];
+ }
+
+snp_new.reset();
+
+
+
+
+return true;
+}
+//___________________________________________________________
+
+
+
+
+//___________________________________________________________
+//put new snp into the storage. If this snp is there already tham metaanalyse it
+bool include_snp(Snp_store_type * snps_storage, snp_var_data* snp, std::ofstream & warnings_file, char *testname)
+{
+
+
+Snp_store_type::iterator iter_map = snps_storage->find(snp->snpname);
+
+char delim=' ';
+
+
+
+if(iter_map == snps_storage->end())
+ {
+ snps_storage->insert(std::pair<std::string, snp_var_data*>(snp->snpname, snp));
+ }
+else
+ {
+ snp_var_data* snp_current = iter_map->second;
+ static bool is_snp_ok;
+ is_snp_ok = check(snp_current, snp, warnings_file);
+
+ if(!is_snp_ok) {return false;}
+
+ (*snps_storage)[snp->snpname.c_str()] = snp_var_meta(iter_map->second, snp, testname);
+
+ //bless god souls of these objects...
+ delete snp_current;
+ delete snp;
+ }
+
+return true;
+}
+//___________________________________________________________
+
+
+
+//metaanalysis of data from two snps
+//___________________________________________________________
+snp_var_data* snp_var_meta(snp_var_data* snp1, snp_var_data* snp2, char* testname)
+{
+snp_var_data* snp_meta = new snp_var_data;
+
+
+
+snp_meta->chromosome = snp1->chromosome;
+snp_meta->snpname = snp1->snpname;
+
+VARIABLE_TYPE meta_mean_beta[GENO_TYPES_NUM], meta_mean_se[GENO_TYPES_NUM];
+
+
+static unsigned one = 1;
+
+for(int i=0 ; i<GENO_TYPES_NUM ; i++)
+ {
+ //metanalysis for variance
+
+ static std::string meta_codding;
+
+ snp_meta->GENO[i] = snp1->GENO[i];
+
+ if(snp1->COUNTS[i] != 0 && snp2->COUNTS[i] != 0 )
+ {
+
+ static VARIABLE_TYPE SD_of_the_mean_snp1, SD_of_the_mean_snp2;
+
+ SD_of_the_mean_snp1 = snp1->SD[i]/sqrt(double(snp1->COUNTS[i]));
+ SD_of_the_mean_snp2 = snp2->SD[i]/sqrt(double(snp2->COUNTS[i]));
+
+ //metanalysis for mean
+ inverse_variance_metaanalysis(&snp1->MEAN[i], &snp2->MEAN[i],
+ &SD_of_the_mean_snp1, &SD_of_the_mean_snp2,
+ &one,
+ meta_mean_beta,
+ meta_mean_se, testname);
+
+ snp_meta->MEAN[i] = meta_mean_beta[0];
+ snp_meta->COUNTS[i] = snp1->COUNTS[i] + snp2->COUNTS[i];
+ snp_meta->SD[i] = meta_mean_se[0]*sqrt(double(snp_meta->COUNTS[i]));
+ }
+ else
+ {
+ if(snp1->COUNTS[i] == 0)
+ {
+ snp_meta->SD[i] = snp2->SD[i];
+ snp_meta->COUNTS[i] = snp2->COUNTS[i];
+ snp_meta->MEAN[i] = snp2->MEAN[i];
+ }
+ else if(snp2->COUNTS[i] == 0)
+ {
+ snp_meta->SD[i] = snp1->SD[i];
+ snp_meta->COUNTS[i] = snp1->COUNTS[i];
+ snp_meta->MEAN[i] = snp1->MEAN[i];
+ }
+ else
+ {
+ error("Upss.. :-) something strange occured... wrong file format probably. Create small example of your data where you have an error and send it to developer.\n");
+ }
+ }
+ }
+return snp_meta;
+}
+//___________________________________________________________
+
+
+
+
+
+
+
+//___________________________________________________________
+bool is_na(const VARIABLE_TYPE val, const VARIABLE_TYPE na_reference)
+{
+static VARIABLE_TYPE delta = fabs(na_reference/1E6);
+if(val > na_reference-delta && val < na_reference+delta) return true;
+else return false;
+}
+//___________________________________________________________
+
+
+
+
+
+bool is_na(const int val, const int na_reference) //is numerical value recognized as NA
+{
+if(val == na_reference) return true;
+else return false;
+}
+
+
+
+//save all data into plink like format
+//___________________________________________________________
+void save_snps_data_into_file(Snp_store_type *snps_data, const char *output_filename, char delim)
+{
+//print header
+
+const unsigned pp_maxsnp=10;
+
+std::ofstream file;
+
+
+file.open(output_filename);
+
+
+if(!file.is_open()){error("Can not open file %s\n", output_filename);}
+
+
+
+//file<<setiosflags(std::ios::right);
+
+file.precision(precision_output);
+
+file << std::setw(4) << chromosome_column_name.c_str() << delim
+ << std::setw(pp_maxsnp) << snpname_column_name.c_str() << delim
+ << std::setw(6) << value_column_name.c_str() << delim
+ << std::setw(8) << g11_column_name.c_str() << delim
+ << std::setw(8) << g12_column_name.c_str() << delim
+ << std::setw(8) << g22_column_name.c_str() << "\n";
+
+
+
+static Snp_store_type::iterator iter_map;
+
+for(Snp_store_type::const_iterator i=snps_data->begin() ; i!=snps_data->end() ; ++i)
+ {
+ //geno:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
+ << std::setw(6) << geno_value_name << delim
+ << std::setw(8) << i->second->GENO[0] << delim
+ << std::setw(8) << i->second->GENO[1] << delim
+ << std::setw(8) << i->second->GENO[2] << "\n";
+
+
+
+ //counts:
+ file << std::setw(4) << i->second->chromosome << delim
+ << std::setw(pp_maxsnp) << i->second->snpname << delim
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 617
More information about the Genabel-commits
mailing list