[Genabel-commits] r1664 - branches/ProbABEL-0.50/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Mar 28 20:12:41 CET 2014


Author: maartenk
Date: 2014-03-28 20:12:41 +0100 (Fri, 28 Mar 2014)
New Revision: 1664

Modified:
   branches/ProbABEL-0.50/src/gendata.cpp
   branches/ProbABEL-0.50/src/gendata.h
Log:
new implementation of reading in numbers of mldose file: this version is about a 10(!) fold faster than in ProABEL 0.42 

Modified: branches/ProbABEL-0.50/src/gendata.cpp
===================================================================
--- branches/ProbABEL-0.50/src/gendata.cpp	2014-03-27 21:16:16 UTC (rev 1663)
+++ branches/ProbABEL-0.50/src/gendata.cpp	2014-03-28 19:12:41 UTC (rev 1664)
@@ -40,58 +40,69 @@
 #endif
 #include "utilities.h"
 
-double mldose_strtod(const char *str_pointer) {
-    // This function is inspired on some answers found at stackoverflow :
-    // eg question 5678932
-    int sign = 0;
-    double result = 0;
-    //check if not a null pointer or NaN (right now checks only first character)
-//TODO: make catching of NaN more rigid
-    if (!*str_pointer | *str_pointer == 'N'){
-        return std::numeric_limits<double>::quiet_NaN();
+
+void gendata::mldose_line_to_matrix(int k,const char *all_numbers,int amount_of_numbers){
+    int j = 0;
+    //check if not a null pointer
+    if (!*all_numbers){
+        perror("Error while reading genetic data (expected pointer to char but found a null pointer)");
+                       exit(EXIT_FAILURE);
     }
-    //skip whitespace
-    while (*str_pointer == ' ')
+    while (j<amount_of_numbers)
     {
-        str_pointer++;
-    }
-    //set sign to -1 if negative: multiply by sign just before return
-    if (*str_pointer == '-')
-    {
-        str_pointer++;
-        sign = -1;
-    }
-    //read digits before dot
-    while (*str_pointer <= '9' && *str_pointer >= '0'){
-        result = result * 10 + (*str_pointer++ - '0');
-    }
-    //read digit after dot
-    if (*str_pointer == '.')
-    {
-        double decimal_counter = 1.0;
-        str_pointer++;
-        while (*str_pointer <= '9' && *str_pointer >= '0')
+        double result = 0;
+        //skip whitespace
+        while (*all_numbers == ' ')
         {
-            decimal_counter *= 0.1;
-            result += (*str_pointer++ - '0') * decimal_counter;
+            all_numbers++;
         }
+        //check NaN (right now checks only first character)
+        //TODO: make catching of NaN more rigid
+        if (*all_numbers == 'N')
+        {
+            result = std::numeric_limits<double>::quiet_NaN();
+            //skip other characters of NaN
+            while ((*all_numbers == 'a') | (*all_numbers == 'N'))
+            {
+                all_numbers++;
+            }
+        }
+        else
+        {
+            int sign = 0;
+            //set sign to -1 if negative: multiply by sign just before return
+            if (*all_numbers == '-')
+            {
+                all_numbers++;
+                sign = -1;
+            }
+            //read digits before dot
+            while (*all_numbers <= '9' && *all_numbers >= '0')
+            {
+                result = result * 10 + (*all_numbers++ - '0');
+            }
+            //read digit after dot
+            if (*all_numbers == '.')
+            {
+                double decimal_counter = 1.0;
+                all_numbers++;
+                while (*all_numbers <= '9' && *all_numbers >= '0')
+                {
+                    decimal_counter *= 0.1;
+                    result += (*all_numbers++ - '0') * decimal_counter;
+                }
+            }
+            //correct for negative number
+            if (sign == -1)
+            {
+                result = sign * result;
+            }
+        }
+        G.put(result, k, j);
+        j++;
     }
-    //str_pointer should be null since all characters are read.
-    if (*str_pointer){
-        perror("Error while reading genetic data (mldose_strtod)");
-                          exit(EXIT_FAILURE);
-    }
-    //correct for negative number
-    if (sign == -1){
-        return sign * result;
-    }else{
-        return result;
-    }
-
 }
 
-
-
 void gendata::get_var(int var, double * data)
 {
     // Read the genetic data for SNP 'var' and store in the array 'data'
@@ -246,7 +257,7 @@
                 size_t strpos = tmpstr.find("->");
                 if (strpos != string::npos)
                 {
-                    tmpid = tmpstr.substr(strpos+2, string::npos);
+                    tmpid = tmpstr.substr(strpos + 2, string::npos);
                 }
                 else
                 {
@@ -255,8 +266,8 @@
                 if (tmpid != idnames[k])
                 {
                     cerr << "phenotype file and dose or probability file "
-                         << "did not match at line " << i + 2 << " (" << tmpid
-                         << " != " << idnames[k] << ")" << endl;
+                            << "did not match at line " << i + 2 << " ("
+                            << tmpid << " != " << idnames[k] << ")" << endl;
                     infile.close();
                     exit(1);
                 }
@@ -267,47 +278,58 @@
                 infile >> tmpstr;
             }
 
-            for (unsigned int j = 0; j < (nsnps * ngpreds); j++)
+            int oldstyle = 0;
+            if (oldstyle == 1)
             {
-                if (infile.good())
+                for (unsigned int j = 0; j < (nsnps * ngpreds); j++)
                 {
-                    infile >> inStr;
-                    // tmpstr contains the dosage/probability in
-                    // string form. Convert it to double (if tmpstr is
-                    // NaN it will be set to nan).
-                    double dosage;
-                    char *endptr;
-                    errno = 0;      // To distinguish success/failure
-                                    // after strtod()
+                    if (infile.good())
+                    {
+                        infile >> inStr;
+                        // tmpstr contains the dosage/probability in
+                        // string form. Convert it to double (if tmpstr is
+                        // NaN it will be set to nan).
+                        double dosage;
+                        char *endptr;
+                        errno = 0;      // To distinguish success/failure
+                                        // after strtod()
 
-                    dosage = mldose_strtod(inStr);
-                    //dosage = strtod(tmpstr.c_str(), &endptr);
-//                    if ((errno == ERANGE &&
-//                         (dosage == HUGE_VALF || dosage == HUGE_VALL))
-//                        || (errno != 0 && dosage == 0)) {
-//                        perror("Error while reading genetic data (strtod)");
-//                        exit(EXIT_FAILURE);
-//                    }
+                        dosage = strtod(inStr, &endptr);
+                        if ((errno == ERANGE
+                                && (dosage == HUGE_VALF || dosage == HUGE_VALL))
+                                || (errno != 0 && dosage == 0))
+                        {
+                            perror("Error while reading genetic data (strtod)");
+                            exit(EXIT_FAILURE);
+                        }
 
-                    if (endptr == tmpstr.c_str()) {
-                        cerr << "No digits were found while reading genetic data"
-                             << " (individual " << i + 1
-                             << ", position " << j + 1 << ")"
-                             << endl;
-                        exit(EXIT_FAILURE);
+                        if (endptr == tmpstr.c_str())
+                        {
+                            cerr
+                                    << "No digits were found while reading genetic data"
+                                    << " (individual " << i + 1 << ", position "
+                                    << j + 1 << ")" << endl;
+                            exit(EXIT_FAILURE);
+                        }
+                        /* If we got here, strtod() successfully parsed a number */
+                        G.put(dosage, k, j);
                     }
-
-                    /* If we got here, strtod() successfully parsed a number */
-                    G.put(dosage, k, j);
+                    else
+                    {
+                        std::cerr << "cannot read dose-file: " << fname
+                                << "check skipd and ngpreds parameters\n";
+                        infile.close();
+                        exit(1);
+                    }
                 }
-                else
-                {
-                    std::cerr << "cannot read dose-file: " << fname
-                              << "check skipd and ngpreds parameters\n";
-                    infile.close();
-                    exit(1);
-                }
             }
+            else
+            {
+                std::string all_numbers;
+                all_numbers.reserve(nsnps * ngpreds * 7);
+                std::getline(infile, all_numbers);
+                mldose_line_to_matrix(k, all_numbers.c_str(), nsnps * ngpreds);
+            }
             k++;
         }
         else

Modified: branches/ProbABEL-0.50/src/gendata.h
===================================================================
--- branches/ProbABEL-0.50/src/gendata.h	2014-03-27 21:16:16 UTC (rev 1663)
+++ branches/ProbABEL-0.50/src/gendata.h	2014-03-28 19:12:41 UTC (rev 1664)
@@ -44,7 +44,7 @@
     unsigned int nids;
     unsigned int ngpreds;
     gendata();
-    double convert(   char* source,  char** endPtr );
+    void mldose_line_to_matrix(int k,const char *all_numbers,int amount_of_numbers);
 
     void re_gendata(char * fname, unsigned int insnps, unsigned int ingpreds,
             unsigned int npeople, unsigned int nmeasured,



More information about the Genabel-commits mailing list