[Genabel-commits] r1664 - branches/ProbABEL-0.50/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Mar 28 20:12:41 CET 2014
Author: maartenk
Date: 2014-03-28 20:12:41 +0100 (Fri, 28 Mar 2014)
New Revision: 1664
Modified:
branches/ProbABEL-0.50/src/gendata.cpp
branches/ProbABEL-0.50/src/gendata.h
Log:
new implementation of reading in numbers of mldose file: this version is about a 10(!) fold faster than in ProABEL 0.42
Modified: branches/ProbABEL-0.50/src/gendata.cpp
===================================================================
--- branches/ProbABEL-0.50/src/gendata.cpp 2014-03-27 21:16:16 UTC (rev 1663)
+++ branches/ProbABEL-0.50/src/gendata.cpp 2014-03-28 19:12:41 UTC (rev 1664)
@@ -40,58 +40,69 @@
#endif
#include "utilities.h"
-double mldose_strtod(const char *str_pointer) {
- // This function is inspired on some answers found at stackoverflow :
- // eg question 5678932
- int sign = 0;
- double result = 0;
- //check if not a null pointer or NaN (right now checks only first character)
-//TODO: make catching of NaN more rigid
- if (!*str_pointer | *str_pointer == 'N'){
- return std::numeric_limits<double>::quiet_NaN();
+
+void gendata::mldose_line_to_matrix(int k,const char *all_numbers,int amount_of_numbers){
+ int j = 0;
+ //check if not a null pointer
+ if (!*all_numbers){
+ perror("Error while reading genetic data (expected pointer to char but found a null pointer)");
+ exit(EXIT_FAILURE);
}
- //skip whitespace
- while (*str_pointer == ' ')
+ while (j<amount_of_numbers)
{
- str_pointer++;
- }
- //set sign to -1 if negative: multiply by sign just before return
- if (*str_pointer == '-')
- {
- str_pointer++;
- sign = -1;
- }
- //read digits before dot
- while (*str_pointer <= '9' && *str_pointer >= '0'){
- result = result * 10 + (*str_pointer++ - '0');
- }
- //read digit after dot
- if (*str_pointer == '.')
- {
- double decimal_counter = 1.0;
- str_pointer++;
- while (*str_pointer <= '9' && *str_pointer >= '0')
+ double result = 0;
+ //skip whitespace
+ while (*all_numbers == ' ')
{
- decimal_counter *= 0.1;
- result += (*str_pointer++ - '0') * decimal_counter;
+ all_numbers++;
}
+ //check NaN (right now checks only first character)
+ //TODO: make catching of NaN more rigid
+ if (*all_numbers == 'N')
+ {
+ result = std::numeric_limits<double>::quiet_NaN();
+ //skip other characters of NaN
+ while ((*all_numbers == 'a') | (*all_numbers == 'N'))
+ {
+ all_numbers++;
+ }
+ }
+ else
+ {
+ int sign = 0;
+ //set sign to -1 if negative: multiply by sign just before return
+ if (*all_numbers == '-')
+ {
+ all_numbers++;
+ sign = -1;
+ }
+ //read digits before dot
+ while (*all_numbers <= '9' && *all_numbers >= '0')
+ {
+ result = result * 10 + (*all_numbers++ - '0');
+ }
+ //read digit after dot
+ if (*all_numbers == '.')
+ {
+ double decimal_counter = 1.0;
+ all_numbers++;
+ while (*all_numbers <= '9' && *all_numbers >= '0')
+ {
+ decimal_counter *= 0.1;
+ result += (*all_numbers++ - '0') * decimal_counter;
+ }
+ }
+ //correct for negative number
+ if (sign == -1)
+ {
+ result = sign * result;
+ }
+ }
+ G.put(result, k, j);
+ j++;
}
- //str_pointer should be null since all characters are read.
- if (*str_pointer){
- perror("Error while reading genetic data (mldose_strtod)");
- exit(EXIT_FAILURE);
- }
- //correct for negative number
- if (sign == -1){
- return sign * result;
- }else{
- return result;
- }
-
}
-
-
void gendata::get_var(int var, double * data)
{
// Read the genetic data for SNP 'var' and store in the array 'data'
@@ -246,7 +257,7 @@
size_t strpos = tmpstr.find("->");
if (strpos != string::npos)
{
- tmpid = tmpstr.substr(strpos+2, string::npos);
+ tmpid = tmpstr.substr(strpos + 2, string::npos);
}
else
{
@@ -255,8 +266,8 @@
if (tmpid != idnames[k])
{
cerr << "phenotype file and dose or probability file "
- << "did not match at line " << i + 2 << " (" << tmpid
- << " != " << idnames[k] << ")" << endl;
+ << "did not match at line " << i + 2 << " ("
+ << tmpid << " != " << idnames[k] << ")" << endl;
infile.close();
exit(1);
}
@@ -267,47 +278,58 @@
infile >> tmpstr;
}
- for (unsigned int j = 0; j < (nsnps * ngpreds); j++)
+ int oldstyle = 0;
+ if (oldstyle == 1)
{
- if (infile.good())
+ for (unsigned int j = 0; j < (nsnps * ngpreds); j++)
{
- infile >> inStr;
- // tmpstr contains the dosage/probability in
- // string form. Convert it to double (if tmpstr is
- // NaN it will be set to nan).
- double dosage;
- char *endptr;
- errno = 0; // To distinguish success/failure
- // after strtod()
+ if (infile.good())
+ {
+ infile >> inStr;
+ // tmpstr contains the dosage/probability in
+ // string form. Convert it to double (if tmpstr is
+ // NaN it will be set to nan).
+ double dosage;
+ char *endptr;
+ errno = 0; // To distinguish success/failure
+ // after strtod()
- dosage = mldose_strtod(inStr);
- //dosage = strtod(tmpstr.c_str(), &endptr);
-// if ((errno == ERANGE &&
-// (dosage == HUGE_VALF || dosage == HUGE_VALL))
-// || (errno != 0 && dosage == 0)) {
-// perror("Error while reading genetic data (strtod)");
-// exit(EXIT_FAILURE);
-// }
+ dosage = strtod(inStr, &endptr);
+ if ((errno == ERANGE
+ && (dosage == HUGE_VALF || dosage == HUGE_VALL))
+ || (errno != 0 && dosage == 0))
+ {
+ perror("Error while reading genetic data (strtod)");
+ exit(EXIT_FAILURE);
+ }
- if (endptr == tmpstr.c_str()) {
- cerr << "No digits were found while reading genetic data"
- << " (individual " << i + 1
- << ", position " << j + 1 << ")"
- << endl;
- exit(EXIT_FAILURE);
+ if (endptr == tmpstr.c_str())
+ {
+ cerr
+ << "No digits were found while reading genetic data"
+ << " (individual " << i + 1 << ", position "
+ << j + 1 << ")" << endl;
+ exit(EXIT_FAILURE);
+ }
+ /* If we got here, strtod() successfully parsed a number */
+ G.put(dosage, k, j);
}
-
- /* If we got here, strtod() successfully parsed a number */
- G.put(dosage, k, j);
+ else
+ {
+ std::cerr << "cannot read dose-file: " << fname
+ << "check skipd and ngpreds parameters\n";
+ infile.close();
+ exit(1);
+ }
}
- else
- {
- std::cerr << "cannot read dose-file: " << fname
- << "check skipd and ngpreds parameters\n";
- infile.close();
- exit(1);
- }
}
+ else
+ {
+ std::string all_numbers;
+ all_numbers.reserve(nsnps * ngpreds * 7);
+ std::getline(infile, all_numbers);
+ mldose_line_to_matrix(k, all_numbers.c_str(), nsnps * ngpreds);
+ }
k++;
}
else
Modified: branches/ProbABEL-0.50/src/gendata.h
===================================================================
--- branches/ProbABEL-0.50/src/gendata.h 2014-03-27 21:16:16 UTC (rev 1663)
+++ branches/ProbABEL-0.50/src/gendata.h 2014-03-28 19:12:41 UTC (rev 1664)
@@ -44,7 +44,7 @@
unsigned int nids;
unsigned int ngpreds;
gendata();
- double convert( char* source, char** endPtr );
+ void mldose_line_to_matrix(int k,const char *all_numbers,int amount_of_numbers);
void re_gendata(char * fname, unsigned int insnps, unsigned int ingpreds,
unsigned int npeople, unsigned int nmeasured,
More information about the Genabel-commits
mailing list