[Genabel-commits] r1275 - pkg/ProbABEL/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jul 23 00:42:35 CEST 2013
Author: lckarssen
Date: 2013-07-23 00:42:34 +0200 (Tue, 23 Jul 2013)
New Revision: 1275
Modified:
pkg/ProbABEL/src/main.cpp
Log:
Added some comments to the source of ProbABEL's main.cpp.
Modified: pkg/ProbABEL/src/main.cpp
===================================================================
--- pkg/ProbABEL/src/main.cpp 2013-07-22 22:17:21 UTC (rev 1274)
+++ pkg/ProbABEL/src/main.cpp 2013-07-22 22:42:34 UTC (rev 1275)
@@ -333,6 +333,7 @@
}
}
+
int get_start_position(const cmdvars& input_var, int model,
int number_of_rows_or_columns)
{
@@ -454,12 +455,15 @@
#endif
std::cout << " formed regression object ...";
- //________________________________________________________________
- //Maksim, 9 Jan, 2009
+
+ // Open a vector of files that will be used for output. Depending
+ // on the number of genomic predictors we either open 5 files (one
+ // for each model if we have prob data) or one (if we have dosage
+ // data).
std::string outfilename_str(input_var.getOutfilename());
std::vector<std::ofstream*> outfile;
- //All models output.One file per each model
+ // Prob data: All models output. One file per model
if (input_var.getNgpreds() == 2)
{
open_files_for_output(outfile, outfilename_str);
@@ -467,7 +471,7 @@
{
create_header_1(outfile, input_var, phd, interaction_cox);
}
- } else //Only additive model. Only one output file
+ } else //Dosage data: Only additive model => only one output file
{
outfile.push_back(
new std::ofstream((outfilename_str + "_add.out.txt").c_str()));
@@ -483,7 +487,7 @@
{
create_header2(outfile, input_var, phd, interaction_cox);
}
- }
+ } // END else: we have dosage data => only one file
int maxmod = 5;
@@ -495,6 +499,9 @@
//Oct 26, 2009
std::vector<std::ostringstream *> chi2;
+ // Create string streams for betas, SEs, etc. These are used to
+ // later store the various output values that will be written to
+ // files.
for (int i = 0; i < maxmod; i++)
{
beta_sebeta.push_back(new std::ostringstream());
@@ -524,11 +531,13 @@
gtd.get_var(csnp * 2, snpdata1);
gtd.get_var(csnp * 2 + 1, snpdata2);
for (unsigned int ii = 0; ii < gtd.nids; ii++)
+ {
if (!isnan(snpdata1[ii]) && !isnan(snpdata2[ii]))
{
gcount++;
freq += snpdata1[ii] + snpdata2[ii] * 0.5;
}
+ }
}
else // Only one predictor (dosage data)
{
@@ -543,7 +552,8 @@
}
}
}
- freq /= static_cast<double>(gcount);
+ freq /= static_cast<double>(gcount); // Allele frequency
+
int poly = 1;
if (fabs(freq) < 1.e-16 || fabs(1. - freq) < 1.e-16)
{
@@ -555,8 +565,7 @@
poly = 0;
}
- // All models output. One file per model
- //
+ // Prob data: All models output. One file per model
if (input_var.getNgpreds() == 2)
{
// Write mlinfo to output:
@@ -567,15 +576,17 @@
}
} else
{
+ // Dosage data: only additive model
int file = 0;
write_mlinfo(outfile, file, mli, csnp, input_var, gcount, freq);
- maxmod = 1;
+ maxmod = 1; // We can only calculate the additive
+ // model with dosage data
}
- // Run regression for each model
+ // Run regression for each model for the current SNP
for (int model = 0; model < maxmod; model++)
{
- if (poly) // Allele freq is not too rare; still ngpreds=2
+ if (poly) // Allele freq is not too rare
{
#if LOGISTIC
logistic_reg rd(rgd);
@@ -622,6 +633,8 @@
start_pos = get_start_position(input_var, model,
number_of_rows_or_columns);
+ // The regression coefficients for the SNPs are in the
+ // last rows of beta[] and sebeta[].
for (int pos = start_pos; pos < rd.beta.nrow; pos++)
{
*beta_sebeta[model] << input_var.getSep()
@@ -648,23 +661,24 @@
<< rd.covariance[pos - 2];
}
- } //end ngpred=2
+ } // END ngpreds=2
else
{
*covvalue[model] << rd.covariance[pos - 1];
}
- } //end model
+ } //END model == 0
else
{
*covvalue[model] << rd.covariance[pos - 1];
- } //end ngpred==1
- }
+ } // END model != 0
+ } // END if pos > start_pos
}
#endif
//Oct 26, 2009
- }
+ } // END for(pos = start_pos; pos < rd.beta.nrow; pos++)
+
//calculate chi2
//________________________________
//cout << rd.loglik<<" "<<input_var.getNgpreds() << "\n";
@@ -704,7 +718,7 @@
number_of_rows_or_columns);
if (input_var.getInteraction() != 0 && !input_var.getAllcov()
- && input_var.getNgpreds() != 2)
+ && input_var.getNgpreds() != 2)
{
start_pos++;
}
@@ -751,7 +765,7 @@
//Oct 26, 2009
*chi2[model] << "nan";
} else
- { //ngpreds=1
+ { //ngpreds==1 (and SNP is rare)
if (input_var.getInverseFilename() == NULL)
{
// Han Chen
@@ -764,11 +778,13 @@
#endif
//Oct 26, 2009
*chi2[model] << "nan";
- }
- }
- }
- }
- //end of model cycle
+ } // END if getInverseFilename == NULL
+ } // END ngpreds == 1 (and SNP is rare)
+ } // END else: SNP is rare
+ } // END of model cycle
+
+
+ // Start writing beta's, se_beta's etc. to file
if (input_var.getNgpreds() == 2)
{
//Han Chen
@@ -836,30 +852,31 @@
{
*outfile[0] << beta_sebeta[0]->str() << "\n";
}
- //} // end ngpreds ==1
+ } // End ngpreds == 1 when writing output files
- }
-//
-//clean chi2
- for (int i = 0; i < 5; i++)
+ // Clean chi2 and other streams
+ for (int model = 0; model < maxmod; model++)
{
- beta_sebeta[i]->str("");
+ beta_sebeta[model]->str("");
//Han Chen
- covvalue[i]->str("");
+ covvalue[model]->str("");
//Oct 26, 2009
- chi2[i]->str("");
+ chi2[model]->str("");
}
+
update_progress_to_cmd_line(csnp, nsnps);
- }
+ } // END for loop over all SNPs
+
+ // We're almost done. All computations have finished, time to
+ // clean up.
+
std::cout << setprecision(2) << fixed;
std::cout << "\b\b\b\b\b\b\b\b\b" << 100.;
std::cout << "%... done\n";
- //________________________________________________________________
- //Maksim, 9 Jan, 2009
-
+ // Close output files
for (unsigned int i = 0; i < outfile.size(); i++)
{
outfile[i]->close();
More information about the Genabel-commits
mailing list