[Genabel-commits] r1030 - in pkg/ProbABEL: src tests tests/verified_results
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Nov 28 00:51:20 CET 2012
Author: lckarssen
Date: 2012-11-28 00:51:20 +0100 (Wed, 28 Nov 2012)
New Revision: 1030
Added:
pkg/ProbABEL/tests/check_dose_input.sh
pkg/ProbABEL/tests/verified_results/height_base_add.out.txt
Modified:
pkg/ProbABEL/src/gendata.cpp
pkg/ProbABEL/src/gendata.h
pkg/ProbABEL/tests/Makefile.am
Log:
ProbABEL:
- Removed sscanf() call because cppcheck was complaining about a possible buffer overflow. sscanf() was used to check for the presence of the Mach/Minimac -> arrow in the ID names (when using a plain text dose file).
- Added a check with a dose file that doesn't contain arrows (->).
- Tried to fix a bug where valgrind reported a jump based on a possibly uninitialised value. This was caused by the atof() call when reading in dosages from a text file. Unfortunately valgrind seems to be broken, because the present solution based on the string class gives other errors: "Invalid read of size 8" when encountering a NaN value in the dosages. This has been reported as a bug in Valgrind.
- cppcheck complained that several variables in the gendata class should be initialised using initialisation lists for a possible speedup. Fixed now.
Modified: pkg/ProbABEL/src/gendata.cpp
===================================================================
--- pkg/ProbABEL/src/gendata.cpp 2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/src/gendata.cpp 2012-11-27 23:51:20 UTC (rev 1030)
@@ -35,11 +35,8 @@
report_error("cannot get gendata");
}
-gendata::gendata()
+gendata::gendata() : nsnps(0), nids(0), ngpreds(0), DAG(NULL), DAGmask(NULL)
{
- nsnps = nids = ngpreds = 0;
- DAG = NULL;
- DAGmask = NULL;
}
void gendata::re_gendata(string filename, unsigned int insnps,
@@ -85,6 +82,7 @@
report_error("nids != mneasured (%i != %i)\n", nids, nmeasured);
}
+
void gendata::re_gendata(char * fname, unsigned int insnps,
unsigned int ingpreds, unsigned int npeople, unsigned int nmeasured,
unsigned short int * allmeasured, int skipd, std::string * idnames)
@@ -105,68 +103,70 @@
std::cerr << "gendata: cannot open file " << fname << endl;
}
- char tmp[100], tmpn[100];
std::string tmpid, tmpstr;
int k = 0;
for (unsigned int i = 0; i < npeople; i++)
+ {
if (allmeasured[i] == 1)
{
if (skipd > 0)
{
- // int ttt;
- char ttt[100];
- infile >> tmp;
- // sscanf(tmp,"%d->%s",&ttt, tmpn);
- // these changes are thanks to BMM & BP :)
- // sscanf(tmp,"%s->%s",&ttt, tmpn);
- // sscanf(tmp,"%[^->]->%[^->]",&ttt, tmpn);
- tmpstr = tmp;
- if (tmpstr.find("->") != string::npos)
+ // Read the genotype data and look for the signature
+ // arrow of MaCH/minimac. If found only use the part
+ // after the arrow as ID.
+ infile >> tmpstr;
+ size_t strpos = tmpstr.find("->") ;
+ if (strpos != string::npos)
{
- sscanf(tmp, "%[^->]->%s", ttt, tmpn);
- tmpid = tmpn;
+ tmpid = tmpstr.substr(strpos+2, string::npos);
} else
{
tmpid = tmpstr;
- // std::cout << tmp << ";" << ttt << ";" << tmpn
- // << tmpid.c_str() << ";" << idnames[k].c_str() << "\n";
}
if (tmpid != idnames[k])
{
cerr << "phenotype file and dose or probability file "
- << "did not match at line " << i + 2 << "(" << tmpid
- << " != " << idnames[k] << ")" << endl;
+ << "did not match at line " << i + 2 << "(" << tmpid
+ << " != " << idnames[k] << ")" << endl;
infile.close();
exit(1);
}
}
+
for (int j = 1; j < skipd; j++)
{
- infile >> tmp;
+ infile >> tmpstr;
}
+
for (int j = 0; j < (nsnps * ngpreds); j++)
{
if (infile.good())
{
- infile >> tmp;
+ infile >> tmpstr;
+ // tmpstr contains the dosage in string form. Convert
+ // it to float (if tmpstr is NaN it will be set to nan).
+ // Note that Valgrind 3.7.0 gives "Invalid read of
+ // size 8" error messages here. A bug in Valgrind?!
+ float dosage = strtod(tmpstr.c_str(), (char **) NULL);
+ G.put(dosage, k, j);
} else
{
- std::cerr
- << "cannot read dose-file: check skipd and ngpreds parameters\n";
+ std::cerr << "cannot read dose-file: "
+ << "check skipd and ngpreds parameters\n";
infile.close();
exit(1);
}
- G.put(atof(tmp), k, j);
}
k++;
} else
{
for (int j = 0; j < skipd; j++)
- infile >> tmp;
+ infile >> tmpstr;
for (int j = 0; j < (nsnps * ngpreds); j++)
- infile >> tmp;
+ infile >> tmpstr;
}
+ }
infile.close();
}
// HERE NEED A NEW CONSTRUCTOR BASED ON DATABELBASECPP OBJECT
Modified: pkg/ProbABEL/src/gendata.h
===================================================================
--- pkg/ProbABEL/src/gendata.h 2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/src/gendata.h 2012-11-27 23:51:20 UTC (rev 1030)
@@ -24,13 +24,17 @@
unsigned int nids;
unsigned int ngpreds;
gendata();
+
void re_gendata(char * fname, unsigned int insnps, unsigned int ingpreds,
unsigned int npeople, unsigned int nmeasured,
unsigned short int * allmeasured, int skipd, std::string * idnames);
+
void re_gendata(string filename, unsigned int insnps, unsigned int ingpreds,
unsigned int npeople, unsigned int nmeasured,
unsigned short int * allmeasured, std::string * idnames);
+
void get_var(int var, float * data);
+
~gendata();
// MAKE THAT PRIVATE, ACCESS THROUGH GET_SNP
Modified: pkg/ProbABEL/tests/Makefile.am
===================================================================
--- pkg/ProbABEL/tests/Makefile.am 2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/tests/Makefile.am 2012-11-27 23:51:20 UTC (rev 1030)
@@ -2,13 +2,14 @@
AUTOMAKE_OPTIONS = foreign color-tests
-tests_files = check_probabel.pl_chunk.sh
+tests_files = check_probabel.pl_chunk.sh check_dose_input.sh
verified_results = verified_results/height_add.out.txt \
verified_results/height_ngp2_add.out.txt \
verified_results/height_ngp2_2df.out.txt \
verified_results/height_ngp2_domin.out.txt \
verified_results/height_ngp2_over_domin.out.txt \
- verified_results/height_ngp2_recess.out.txt
+ verified_results/height_ngp2_recess.out.txt \
+ verified_results/height_base_add.out.txt
dose_files = chr1.chunk1.dose chr1.chunk2.dose chr2.chunk1.dose \
chr2.chunk2.dose chr1.dose chr2.dose
@@ -26,9 +27,9 @@
height_domin.out.txt height_over_domin.out.txt height_recess.out.txt \
height_ngp2_recess.out.txt height_ngp2_domin.out.txt \
height_ngp2_over_domin.out.txt height_ngp2_2df.out.txt \
- height_ngp2_add.out.txt
+ height_ngp2_add.out.txt height_base_add.out.txt
-other_files = probabel.pl probabel_config.cfg height.PHE
+other_files = probabel.pl probabel_config.cfg height.PHE test.mldose
@@ -36,7 +37,7 @@
dist_tests_DATA = $(tests_files) $(verified_results)
TESTS_ENVIRONMENT = sh
-check_SCRIPTS = check_probabel.pl_chunk.sh
+check_SCRIPTS = $(tests_files)
TESTS = $(check_SCRIPTS)
Added: pkg/ProbABEL/tests/check_dose_input.sh
===================================================================
--- pkg/ProbABEL/tests/check_dose_input.sh (rev 0)
+++ pkg/ProbABEL/tests/check_dose_input.sh 2012-11-27 23:51:20 UTC (rev 1030)
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# This script tests whether dose data without a MaCH/minimac-style
+# arrow is read correctly by palinear (and by palogist, since reading
+# genetic data is done using the same code).
+
+echo "Checking palinear with dose data without '->'"
+if [ -z ${srcdir} ]; then
+ srcdir="."
+fi
+
+exampledir="${srcdir}/../examples/"
+results="${srcdir}/verified_results/"
+outfile="height_base_add.out.txt"
+
+sed 's/^[[:digit:]]*->//' $exampledir/test.mldose > test.mldose
+
+
+../src/palinear \
+ -p ${exampledir}/height.txt \
+ -d test.mldose \
+ -i ${exampledir}/test.mlinfo \
+ -m ${exampledir}/test.map \
+ -c 19 \
+ -o height_base
+
+echo -n " Verifying $outfile: "
+if diff $outfile $results/$outfile; then
+ echo -e "\t\tOK"
+else
+ echo -e "\t\tFAILED"
+ exit 1
+fi
Property changes on: pkg/ProbABEL/tests/check_dose_input.sh
___________________________________________________________________
Added: svn:executable
+ *
Added: pkg/ProbABEL/tests/verified_results/height_base_add.out.txt
===================================================================
--- pkg/ProbABEL/tests/verified_results/height_base_add.out.txt (rev 0)
+++ pkg/ProbABEL/tests/verified_results/height_base_add.out.txt 2012-11-27 23:51:20 UTC (rev 1030)
@@ -0,0 +1,6 @@
+name A1 A2 Freq1 MAF Quality Rsq n Mean_predictor_allele chrom position beta_SNP_add sebeta_SNP_add loglik
+rs7247199 G A 0.5847 0.415 0.9299 0.8666 182 0.56444 19 204938 -0.218693 0.734966 -428.971
+rs8102643 C T 0.5847 0.415 0.9308 0.8685 182 0.564412 19 207859 -0.218352 0.734214 -428.971
+rs8102615 T A 0.5006 0.4702 0.9375 0.8932 181 0.498997 19 211970 0.280289 0.728449 -427.081
+rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.561132 19 212033 -0.216918 0.7259 -428.971
+rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.929684 19 217034 2.23335 1.30142 -427.523
More information about the Genabel-commits
mailing list