[Genabel-commits] r1540 - pkg/ProbABEL/src

Sat Jan 11 11:35:31 CET 2014

Author: lckarssen
Date: 2014-01-11 11:35:31 +0100 (Sat, 11 Jan 2014)
New Revision: 1540

Modified:
   pkg/ProbABEL/src/coxph_data.cpp
   pkg/ProbABEL/src/gendata.cpp
   pkg/ProbABEL/src/gendata.h
   pkg/ProbABEL/src/regdata.cpp
Log:
ProbABEL: change back to using doubles instead of floats. With floats reading from text files worked, but reading from filevector files gave rounding errors (even though the fv files were of type FLOAT). Now I've replaced the conversion that was present in v0.4.2 from C++ style to C style and this seems to work.


Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================

--- pkg/ProbABEL/src/coxph_data.cpp	2014-01-11 00:56:55 UTC (rev 1539)
+++ pkg/ProbABEL/src/coxph_data.cpp	2014-01-11 10:35:31 UTC (rev 1540)
@@ -125,7 +125,7 @@
     {
         for (int j = 0; j < ngpreds; j++)
         {
-            float *snpdata = new float[nids];
+            double *snpdata = new double[nids];
             gend.get_var(snpnum * ngpreds + j, snpdata);
             for (int i = 0; i < nids; i++)
             {
@@ -216,7 +216,7 @@
     freq   = 0.0;
 
     for (int j = 0; j < ngpreds; j++) {
-        float *snpdata = new float[nids];
+        double *snpdata = new double[nids];
         for (int i = 0; i < nids; i++) {
             masked_data[i] = 0;
         }

Modified: pkg/ProbABEL/src/gendata.cpp
===================================================================
--- pkg/ProbABEL/src/gendata.cpp	2014-01-11 00:56:55 UTC (rev 1539)
+++ pkg/ProbABEL/src/gendata.cpp	2014-01-11 10:35:31 UTC (rev 1540)
@@ -17,7 +17,7 @@
 #endif
 #include "utilities.h"
 
-void gendata::get_var(int var, float * data)
+void gendata::get_var(int var, double * data)
 {
     // Read the genetic data for SNP 'var' and store in the array 'data'
 
@@ -30,7 +30,9 @@
     }
     else if (DAG != NULL)       // Read from fv file
     {
-        float *tmpdata = new float[DAG->getNumObservations()];
+        // cout << "Data Type: " << dataTypeToString(DAG->getElementType())
+        //      << endl;
+        double *tmpdata = new double[DAG->getNumObservations()];
         DAG->readVariableAs((unsigned long int) var, tmpdata);
 
         unsigned int j = 0;
@@ -38,7 +40,33 @@
         {
             if (!DAGmask[i])
             {
-                data[j++] = tmpdata[i];
+                // A dirty trick to get rid of conversion
+                // errors. Instead of casting float data to double we
+                // convert the data to string and then do strtod()
+                char tmpstr[1048576];
+                snprintf (tmpstr, sizeof(tmpstr), "%f", tmpdata[i]);
+
+                double val;
+                char *endptr;
+                errno = 0;      // To distinguish success/failure
+                                // after strtod()
+                val = strtod(tmpstr, &endptr);
+
+                if ((errno == ERANGE && (val == HUGE_VALF || val == HUGE_VALL))
+                    || (errno != 0 && val == 0)) {
+                    perror("Error while reading genetic data (strtod)");
+                    exit(EXIT_FAILURE);
+                }
+
+                if (endptr == tmpstr) {
+                    cerr << "No digits were found while reading genetic data"
+                         << " (individual " << i + 1
+                         << ", position " << var + 1 << ")"
+                         << endl;
+                    exit(EXIT_FAILURE);
+                }
+                /* If we got here, strtod() successfully parsed a number */
+                data[j++] = val;
             }
         }
         delete[] tmpdata;
@@ -167,8 +195,9 @@
                 if (infile.good())
                 {
                     infile >> tmpstr;
-                    // tmpstr contains the dosage in string form. Convert
-                    // it to double (if tmpstr is NaN it will be set to nan).
+                    // tmpstr contains the dosage/probability in
+                    // string form. Convert it to double (if tmpstr is
+                    // NaN it will be set to nan).
                     double dosage;
                     char *endptr;
                     errno = 0;      // To distinguish success/failure

Modified: pkg/ProbABEL/src/gendata.h
===================================================================
--- pkg/ProbABEL/src/gendata.h	2014-01-11 00:56:55 UTC (rev 1539)
+++ pkg/ProbABEL/src/gendata.h	2014-01-11 10:35:31 UTC (rev 1540)
@@ -32,7 +32,7 @@
             unsigned int npeople, unsigned int nmeasured,
             unsigned short int * allmeasured, std::string * idnames);
 
-    void get_var(int var, float * data);
+    void get_var(int var, double * data);
 
     ~gendata();
 

Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp	2014-01-11 00:56:55 UTC (rev 1539)
+++ pkg/ProbABEL/src/regdata.cpp	2014-01-11 10:35:31 UTC (rev 1540)
@@ -90,7 +90,7 @@
     if (snpnum > 0)
         for (int j = 0; j < ngpreds; j++)
         {
-            float *snpdata = new float[nids];
+            double *snpdata = new double[nids];
             gend.get_var(snpnum * ngpreds + j, snpdata);
             for (int i = 0; i < nids; i++)
             {
@@ -115,7 +115,7 @@
     // matrix X
     for (int j = 0; j < ngpreds; j++)
     {
-        float *snpdata = new float[nids];
+        double *snpdata = new double[nids];
         for (int i = 0; i < nids; i++)
         {
             masked_data[i] = 0;
@@ -125,6 +125,7 @@
 
         for (int i = 0; i < nids; i++) {
             X.put(snpdata[i], i, (ncov - j));
+
             if (std::isnan(snpdata[i])) {
                 masked_data[i] = 1;
                 // SNP not masked