[Genabel-commits] r1030 - in pkg/ProbABEL: src tests tests/verified_results

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Nov 28 00:51:20 CET 2012


Author: lckarssen
Date: 2012-11-28 00:51:20 +0100 (Wed, 28 Nov 2012)
New Revision: 1030

Added:
   pkg/ProbABEL/tests/check_dose_input.sh
   pkg/ProbABEL/tests/verified_results/height_base_add.out.txt
Modified:
   pkg/ProbABEL/src/gendata.cpp
   pkg/ProbABEL/src/gendata.h
   pkg/ProbABEL/tests/Makefile.am
Log:
ProbABEL:
- Removed sscanf() call because cppcheck was complaining about a possible buffer overflow. sscanf() was used to check for the presence of the Mach/Minimac -> arrow in the ID names (when using a plain text dose file). 

- Added a check with a dose file that doesn't contain arrows (->).

- Tried to fix a bug where valgrind reported a jump based on a possibly uninitialised value. This was caused by the atof() call when reading in dosages from a text file. Unfortunately valgrind seems to be broken, because the present solution based on the string class gives other errors: "Invalid read of size 8" when encountering a NaN value in the dosages. This has been reported as a bug in Valgrind.

- cppcheck complained that several variables in the gendata class should be initialised using initialisation lists for a possible speedup. Fixed now.


Modified: pkg/ProbABEL/src/gendata.cpp
===================================================================
--- pkg/ProbABEL/src/gendata.cpp	2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/src/gendata.cpp	2012-11-27 23:51:20 UTC (rev 1030)
@@ -35,11 +35,8 @@
         report_error("cannot get gendata");
 }
 
-gendata::gendata()
+gendata::gendata() : nsnps(0), nids(0), ngpreds(0), DAG(NULL), DAGmask(NULL)
 {
-    nsnps = nids = ngpreds = 0;
-    DAG = NULL;
-    DAGmask = NULL;
 }
 
 void gendata::re_gendata(string filename, unsigned int insnps,
@@ -85,6 +82,7 @@
         report_error("nids != mneasured (%i != %i)\n", nids, nmeasured);
 
 }
+
 void gendata::re_gendata(char * fname, unsigned int insnps,
         unsigned int ingpreds, unsigned int npeople, unsigned int nmeasured,
         unsigned short int * allmeasured, int skipd, std::string * idnames)
@@ -105,68 +103,70 @@
         std::cerr << "gendata: cannot open file " << fname << endl;
     }
 
-    char tmp[100], tmpn[100];
     std::string tmpid, tmpstr;
 
     int k = 0;
     for (unsigned int i = 0; i < npeople; i++)
+    {
         if (allmeasured[i] == 1)
         {
             if (skipd > 0)
             {
-                //				int ttt;
-                char ttt[100];
-                infile >> tmp;
-                //				sscanf(tmp,"%d->%s",&ttt, tmpn);
-                //		these changes are thanks to BMM & BP :)
-                //				sscanf(tmp,"%s->%s",&ttt, tmpn);
-                //				sscanf(tmp,"%[^->]->%[^->]",&ttt, tmpn);
-                tmpstr = tmp;
-                if (tmpstr.find("->") != string::npos)
+                // Read the genotype data and look for the signature
+                // arrow of MaCH/minimac. If found only use the part
+                // after the arrow as ID.
+                infile >> tmpstr;
+                size_t strpos = tmpstr.find("->") ;
+                if (strpos != string::npos)
                 {
-                    sscanf(tmp, "%[^->]->%s", ttt, tmpn);
-                    tmpid = tmpn;
+                    tmpid = tmpstr.substr(strpos+2, string::npos);
                 } else
                 {
                     tmpid = tmpstr;
-                    // std::cout << tmp << ";" << ttt << ";" << tmpn
-                    // << tmpid.c_str() << ";" << idnames[k].c_str() << "\n";
                 }
                 if (tmpid != idnames[k])
                 {
                     cerr << "phenotype file and dose or probability file "
-                            << "did not match at line " << i + 2 << "(" << tmpid
-                            << " != " << idnames[k] << ")" << endl;
+                         << "did not match at line " << i + 2 << "(" << tmpid
+                         << " != " << idnames[k] << ")" << endl;
                     infile.close();
                     exit(1);
                 }
             }
+
             for (int j = 1; j < skipd; j++)
             {
-                infile >> tmp;
+                infile >> tmpstr;
             }
+
             for (int j = 0; j < (nsnps * ngpreds); j++)
             {
                 if (infile.good())
                 {
-                    infile >> tmp;
+                    infile >> tmpstr;
+                    // tmpstr contains the dosage in string form. Convert
+                    // it to float (if tmpstr is NaN it will be set to nan).
+                    // Note that Valgrind 3.7.0 gives "Invalid read of
+                    // size 8" error messages here. A bug in Valgrind?!
+                    float dosage = strtod(tmpstr.c_str(), (char **) NULL);
+                    G.put(dosage, k, j);
                 } else
                 {
-                    std::cerr
-                            << "cannot read dose-file: check skipd and ngpreds parameters\n";
+                    std::cerr << "cannot read dose-file: "
+                              << "check skipd and ngpreds parameters\n";
                     infile.close();
                     exit(1);
                 }
-                G.put(atof(tmp), k, j);
             }
             k++;
         } else
         {
             for (int j = 0; j < skipd; j++)
-                infile >> tmp;
+                infile >> tmpstr;
             for (int j = 0; j < (nsnps * ngpreds); j++)
-                infile >> tmp;
+                infile >> tmpstr;
         }
+    }
     infile.close();
 }
 // HERE NEED A NEW CONSTRUCTOR BASED ON DATABELBASECPP OBJECT

Modified: pkg/ProbABEL/src/gendata.h
===================================================================
--- pkg/ProbABEL/src/gendata.h	2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/src/gendata.h	2012-11-27 23:51:20 UTC (rev 1030)
@@ -24,13 +24,17 @@
     unsigned int nids;
     unsigned int ngpreds;
     gendata();
+
     void re_gendata(char * fname, unsigned int insnps, unsigned int ingpreds,
             unsigned int npeople, unsigned int nmeasured,
             unsigned short int * allmeasured, int skipd, std::string * idnames);
+
     void re_gendata(string filename, unsigned int insnps, unsigned int ingpreds,
             unsigned int npeople, unsigned int nmeasured,
             unsigned short int * allmeasured, std::string * idnames);
+
     void get_var(int var, float * data);
+
     ~gendata();
 
     // MAKE THAT PRIVATE, ACCESS THROUGH GET_SNP

Modified: pkg/ProbABEL/tests/Makefile.am
===================================================================
--- pkg/ProbABEL/tests/Makefile.am	2012-11-27 22:27:07 UTC (rev 1029)
+++ pkg/ProbABEL/tests/Makefile.am	2012-11-27 23:51:20 UTC (rev 1030)
@@ -2,13 +2,14 @@
 
 AUTOMAKE_OPTIONS = foreign color-tests
 
-tests_files = check_probabel.pl_chunk.sh
+tests_files = check_probabel.pl_chunk.sh check_dose_input.sh
 verified_results = verified_results/height_add.out.txt	\
  verified_results/height_ngp2_add.out.txt			\
  verified_results/height_ngp2_2df.out.txt			\
  verified_results/height_ngp2_domin.out.txt			\
  verified_results/height_ngp2_over_domin.out.txt		\
- verified_results/height_ngp2_recess.out.txt
+ verified_results/height_ngp2_recess.out.txt                    \
+ verified_results/height_base_add.out.txt
 
 dose_files = chr1.chunk1.dose chr1.chunk2.dose chr2.chunk1.dose	\
  chr2.chunk2.dose chr1.dose chr2.dose
@@ -26,9 +27,9 @@
  height_domin.out.txt height_over_domin.out.txt height_recess.out.txt	\
  height_ngp2_recess.out.txt height_ngp2_domin.out.txt			\
  height_ngp2_over_domin.out.txt height_ngp2_2df.out.txt			\
- height_ngp2_add.out.txt
+ height_ngp2_add.out.txt height_base_add.out.txt
 
-other_files = probabel.pl probabel_config.cfg height.PHE
+other_files = probabel.pl probabel_config.cfg height.PHE test.mldose
 
 
 
@@ -36,7 +37,7 @@
 dist_tests_DATA = $(tests_files) $(verified_results)
 
 TESTS_ENVIRONMENT = sh
-check_SCRIPTS = check_probabel.pl_chunk.sh
+check_SCRIPTS = $(tests_files)
 
 TESTS = $(check_SCRIPTS)
 

Added: pkg/ProbABEL/tests/check_dose_input.sh
===================================================================
--- pkg/ProbABEL/tests/check_dose_input.sh	                        (rev 0)
+++ pkg/ProbABEL/tests/check_dose_input.sh	2012-11-27 23:51:20 UTC (rev 1030)
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# This script tests whether dose data without a MaCH/minimac-style
+# arrow is read correctly by palinear (and by palogist, since reading
+# genetic data is done using the same code).
+
+echo "Checking palinear with dose data without '->'"
+if [ -z ${srcdir} ]; then
+    srcdir="."
+fi
+
+exampledir="${srcdir}/../examples/"
+results="${srcdir}/verified_results/"
+outfile="height_base_add.out.txt"
+
+sed 's/^[[:digit:]]*->//' $exampledir/test.mldose > test.mldose
+
+
+../src/palinear \
+    -p ${exampledir}/height.txt \
+    -d test.mldose \
+    -i ${exampledir}/test.mlinfo \
+    -m ${exampledir}/test.map \
+    -c 19 \
+    -o height_base
+
+echo -n "  Verifying $outfile: "
+if diff $outfile $results/$outfile; then
+    echo -e "\t\tOK"
+else
+    echo -e "\t\tFAILED"
+    exit 1
+fi


Property changes on: pkg/ProbABEL/tests/check_dose_input.sh
___________________________________________________________________
Added: svn:executable
   + *

Added: pkg/ProbABEL/tests/verified_results/height_base_add.out.txt
===================================================================
--- pkg/ProbABEL/tests/verified_results/height_base_add.out.txt	                        (rev 0)
+++ pkg/ProbABEL/tests/verified_results/height_base_add.out.txt	2012-11-27 23:51:20 UTC (rev 1030)
@@ -0,0 +1,6 @@
+name A1 A2 Freq1 MAF Quality Rsq n Mean_predictor_allele chrom position beta_SNP_add sebeta_SNP_add loglik
+rs7247199 G A 0.5847 0.415 0.9299 0.8666 182 0.56444 19 204938 -0.218693 0.734966 -428.971
+rs8102643 C T 0.5847 0.415 0.9308 0.8685 182 0.564412 19 207859 -0.218352 0.734214 -428.971
+rs8102615 T A 0.5006 0.4702 0.9375 0.8932 181 0.498997 19 211970 0.280289 0.728449 -427.081
+rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.561132 19 212033 -0.216918 0.7259 -428.971
+rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.929684 19 217034 2.23335 1.30142 -427.523



More information about the Genabel-commits mailing list