[Genabel-commits] r1660 - branches/ProbABEL-0.50/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Mar 25 22:24:03 CET 2014


Author: maartenk
Date: 2014-03-25 22:24:03 +0100 (Tue, 25 Mar 2014)
New Revision: 1660

Modified:
   branches/ProbABEL-0.50/src/gendata.cpp
   branches/ProbABEL-0.50/src/main.cpp
Log:
-  reading mldose/mlprob are timed and shown in output
-Replaced for reading mldose/mlprob files the buildin strtod function to home build version. This speeds up the reading the file about a two fold

Modified: branches/ProbABEL-0.50/src/gendata.cpp
===================================================================
--- branches/ProbABEL-0.50/src/gendata.cpp	2014-03-21 23:09:15 UTC (rev 1659)
+++ branches/ProbABEL-0.50/src/gendata.cpp	2014-03-25 21:24:03 UTC (rev 1660)
@@ -28,6 +28,7 @@
 
 #include <string>
 #include <errno.h>
+#include <limits>
 #include "gendata.h"
 #include "fvlib/FileVector.h"
 #if EIGEN
@@ -39,6 +40,57 @@
 #endif
 #include "utilities.h"
 
+double mldose_strtod(const char *str_pointer) {
+    // This function is inspired on some answers found at stackoverflow :
+    // eg question 5678932
+    int sign = 0;
+    double result = 0;
+    //check if not a null pointer
+    if (!*str_pointer ){
+        return std::numeric_limits<double>::quiet_NaN();
+    }
+    //skip whitespace
+    while (*str_pointer == ' ')
+    {
+        str_pointer++;
+    }
+    //set sign to -1 if negative: multiply by sign just before return
+    if (*str_pointer == '-')
+    {
+        str_pointer++;
+        sign = -1;
+    }
+    //read digits before dot
+    while (*str_pointer <= '9' && *str_pointer >= '0'){
+        result = result * 10 + (*str_pointer++ - '0');
+    }
+    //read digit after dot
+    if (*str_pointer == '.')
+    {
+        double decimal_counter = 1.0;
+        str_pointer++;
+        while (*str_pointer <= '9' && *str_pointer >= '0')
+        {
+            decimal_counter *= 0.1;
+            result += (*str_pointer++ - '0') * decimal_counter;
+        }
+    }
+    //str_pointer should be null since all characters are read.
+    if (*str_pointer){
+        perror("Error while reading genetic data (mldose_strtod)");
+                          exit(EXIT_FAILURE);
+    }
+    //correct for negative number
+    if (sign == -1){
+        return sign * result;
+    }else{
+        return result;
+    }
+
+}
+
+
+
 void gendata::get_var(int var, double * data)
 {
     // Read the genetic data for SNP 'var' and store in the array 'data'
@@ -165,8 +217,10 @@
     DAG     = NULL;
     //	int nids_all = npeople;
 
+
     G.reinit(nids, (nsnps * ngpreds));
 
+    
     std::ifstream infile;
 
     infile.open(fname);
@@ -224,14 +278,15 @@
                     char *endptr;
                     errno = 0;      // To distinguish success/failure
                                     // after strtod()
-                    dosage = strtod(tmpstr.c_str(), &endptr);
 
-                    if ((errno == ERANGE &&
-                         (dosage == HUGE_VALF || dosage == HUGE_VALL))
-                        || (errno != 0 && dosage == 0)) {
-                        perror("Error while reading genetic data (strtod)");
-                        exit(EXIT_FAILURE);
-                    }
+                    dosage = mldose_strtod(tmpstr.c_str());
+                    //dosage = strtod(tmpstr.c_str(), &endptr);
+//                    if ((errno == ERANGE &&
+//                         (dosage == HUGE_VALF || dosage == HUGE_VALL))
+//                        || (errno != 0 && dosage == 0)) {
+//                        perror("Error while reading genetic data (strtod)");
+//                        exit(EXIT_FAILURE);
+//                    }
 
                     if (endptr == tmpstr.c_str()) {
                         cerr << "No digits were found while reading genetic data"
@@ -263,6 +318,7 @@
         }
     }
     infile.close();
+
 }
 // HERE NEED A NEW CONSTRUCTOR BASED ON DATABELBASECPP OBJECT
 gendata::~gendata()

Modified: branches/ProbABEL-0.50/src/main.cpp
===================================================================
--- branches/ProbABEL-0.50/src/main.cpp	2014-03-21 23:09:15 UTC (rev 1659)
+++ branches/ProbABEL-0.50/src/main.cpp	2014-03-25 21:24:03 UTC (rev 1660)
@@ -65,6 +65,9 @@
 #include <iomanip>
 #include <vector>
 
+#include <ctime> //needed for timing loading non file vector format
+
+
 #if EIGEN
 #include "eigen_mematrix.h"
 #include "eigen_mematrix.cpp"
@@ -124,10 +127,18 @@
     cout << "Reading genotype data... " << flush;
     if (!input_var.getIsFvf())
     {
+        // make clock to time loading of the non filevector file
+        std::clock_t    start;
+        start = std::clock();
+
         // use the non-filevector input format
         gtd.re_gendata(input_var.getGenfilename(), nsnps,
                        input_var.getNgpreds(), phd.nids_all, phd.nids,
                        phd.allmeasured, input_var.getSkipd(), phd.idnames);
+
+        double milisec=((std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000))/1000;
+        cout << "done in "<< milisec<< " seconds.\n" << flush;
+
     }
     else
     {
@@ -136,8 +147,9 @@
         gtd.re_gendata(input_var.getStrGenfilename(), nsnps,
                        input_var.getNgpreds(), phd.nids_all, phd.nids,
                        phd.allmeasured, phd.idnames);
+        cout << "done.\n" << flush;
+
     }
-    cout << "done.\n" << flush;
 
 
     // estimate null model



More information about the Genabel-commits mailing list