[Genabel-commits] r1528 - pkg/ProbABEL/src

Mon Jan 6 11:47:44 CET 2014

Author: lckarssen
Date: 2014-01-06 11:47:44 +0100 (Mon, 06 Jan 2014)
New Revision: 1528

Modified:
   pkg/ProbABEL/src/coxph_data.cpp
   pkg/ProbABEL/src/gendata.cpp
   pkg/ProbABEL/src/gendata.h
   pkg/ProbABEL/src/regdata.cpp
Log:
ProbABEL: intermediate commit. Most checks fail.

Maarten's profiling showed that the conversion to double when reading in genetic data resulted in a huge performance bottleneck. Since the genetic data is not stored with more than a few signicicant digits anyway it doesn't even make sense to use double precision. 

With this commit I removed the conversion to double, but now most checks fail (mostly when comparing prob vs. dose input). Needs more time to fix. 


Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================

--- pkg/ProbABEL/src/coxph_data.cpp	2014-01-02 16:32:42 UTC (rev 1527)
+++ pkg/ProbABEL/src/coxph_data.cpp	2014-01-06 10:47:44 UTC (rev 1528)
@@ -125,7 +125,7 @@
     {
         for (int j = 0; j < ngpreds; j++)
         {
-            double *snpdata = new double[nids];
+            float *snpdata = new float[nids];
             gend.get_var(snpnum * ngpreds + j, snpdata);
             for (int i = 0; i < nids; i++)
             {
@@ -216,7 +216,7 @@
     freq   = 0.0;
 
     for (int j = 0; j < ngpreds; j++) {
-        double *snpdata = new double[nids];
+        float *snpdata = new float[nids];
         for (int i = 0; i < nids; i++) {
             masked_data[i] = 0;
         }

Modified: pkg/ProbABEL/src/gendata.cpp
===================================================================
--- pkg/ProbABEL/src/gendata.cpp	2014-01-02 16:32:42 UTC (rev 1527)
+++ pkg/ProbABEL/src/gendata.cpp	2014-01-06 10:47:44 UTC (rev 1528)
@@ -17,7 +17,7 @@
 #endif
 #include "utilities.h"
 
-void gendata::get_var(int var, double * data)
+void gendata::get_var(int var, float * data)
 {
     // Read the genetic data for SNP 'var' and store in the array 'data'
 
@@ -30,7 +30,7 @@
     }
     else if (DAG != NULL)       // Read from fv file
     {
-        double *tmpdata = new double[DAG->getNumObservations()];
+        float *tmpdata = new float[DAG->getNumObservations()];
         DAG->readVariableAs((unsigned long int) var, tmpdata);
 
         unsigned int j = 0;
@@ -38,33 +38,7 @@
         {
             if (!DAGmask[i])
             {
-                // A dirty trick to get rid of conversion
-                // errors. Instead of casting float data to double we
-                // convert the data to string and then do strtod()
-                std::ostringstream strs;
-                strs << tmpdata[i];
-                std::string str = strs.str();
-                double val;
-                char *endptr;
-                errno = 0;      // To distinguish success/failure
-                                // after strtod()
-                val = strtod(str.c_str(), &endptr);
-
-                if ((errno == ERANGE && (val == HUGE_VALF || val == HUGE_VALL))
-                    || (errno != 0 && val == 0)) {
-                    perror("Error while reading genetic data (strtod)");
-                    exit(EXIT_FAILURE);
-                }
-
-                if (endptr == str.c_str()) {
-                    cerr << "No digits were found while reading genetic data"
-                         << " (individual " << i + 1
-                         << ", position " << var + 1 << ")"
-                         << endl;
-                    exit(EXIT_FAILURE);
-                }
-                /* If we got here, strtod() successfully parsed a number */
-                data[j++] = val;
+                data[j++] = tmpdata[i];
             }
         }
         delete[] tmpdata;

Modified: pkg/ProbABEL/src/gendata.h
===================================================================
--- pkg/ProbABEL/src/gendata.h	2014-01-02 16:32:42 UTC (rev 1527)
+++ pkg/ProbABEL/src/gendata.h	2014-01-06 10:47:44 UTC (rev 1528)
@@ -32,7 +32,7 @@
             unsigned int npeople, unsigned int nmeasured,
             unsigned short int * allmeasured, std::string * idnames);
 
-    void get_var(int var, double * data);
+    void get_var(int var, float * data);
 
     ~gendata();
 

Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp	2014-01-02 16:32:42 UTC (rev 1527)
+++ pkg/ProbABEL/src/regdata.cpp	2014-01-06 10:47:44 UTC (rev 1528)
@@ -90,7 +90,7 @@
     if (snpnum > 0)
         for (int j = 0; j < ngpreds; j++)
         {
-            double *snpdata = new double[nids];
+            float *snpdata = new float[nids];
             gend.get_var(snpnum * ngpreds + j, snpdata);
             for (int i = 0; i < nids; i++)
             {
@@ -115,7 +115,7 @@
     // matrix X
     for (int j = 0; j < ngpreds; j++)
     {
-        double *snpdata = new double[nids];
+        float *snpdata = new float[nids];
         for (int i = 0; i < nids; i++)
         {
             masked_data[i] = 0;