[Genabel-commits] r1286 - pkg/ProbABEL/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Aug 8 13:19:30 CEST 2013
Author: lckarssen
Date: 2013-08-08 13:19:30 +0200 (Thu, 08 Aug 2013)
New Revision: 1286
Modified:
pkg/ProbABEL/src/eigen_mematrix.cpp
pkg/ProbABEL/src/eigen_mematrix.h
pkg/ProbABEL/src/main.cpp
pkg/ProbABEL/src/mematri1.h
pkg/ProbABEL/src/mematrix.h
pkg/ProbABEL/src/reg1.cpp
pkg/ProbABEL/src/regdata.cpp
pkg/ProbABEL/src/regdata.h
Log:
Added chi^2 information to the ProbABEL output for linear regression.
NOTE: for palogist and pacoxph this still needs to be fixed!!!
The chi^2 values are based on the LRT. The null model is calculated at
the beginning (this was already part of ProbABEL for a long time). In
the case of missing genotype data the null model is recalculated for
that SNP only. So for people with imputed data there should be no
difference in computation time.
This is a bit of a rough implementation. Maybe some more work is
needed to make it better (in terms of programming style/efficiency).
Changes per file:
src/main.cpp:
- Some small (unrelated) changes to the way progress information is printed
- Changed output precision of beta, se_beta, chi^2 to 6 instead of 9 digits
- around line 700 is where the recalculation of the null model is done.
src/regdata.h, src/regdata.cpp:
- Add a function remove_snp_from_X() that removes the genotype data
from the design matrix. This is necessary, because in order to know
which individuals have missing genotype data (and therefore should
be excluded from the null estimation), we first need to have the
genotype data in.
src/reg1.cpp:
- At the beginning of apply_model() check if we are calculating the
null model. if so, we don't need to apply the genotypic model at
all.
src/eigen_mematrix.h, src/eigen_mematrix.cpp:
- Implement the delete_column() function. When transitioning to Eigen
this function wasn't used anywhere in the code, so it wasn't
carried over from the mematrix files.
src/mematri1.h, src/mematrix.h:
- Set the col/row number argument to const in the delete_column() and
delete_row() functions.
Modified: pkg/ProbABEL/src/eigen_mematrix.cpp
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.cpp 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/eigen_mematrix.cpp 2013-08-08 11:19:30 UTC (rev 1286)
@@ -362,4 +362,30 @@
return temp;
}
+
+template<class DT>
+void mematrix<DT>::delete_column(const int delcol)
+{
+ if (delcol > ncol || delcol < 0)
+ {
+ fprintf(stderr, "mematrix::delete_column: column out of range\n");
+ exit(1);
+ }
+
+ // Eigen::Matrix<DT,-1,-1,0,-1,-1> *auxdata =
+ // new Eigen::Matrix<DT,-1,-1,0,-1,-1>;
+ MatrixXd auxdata = data;
+
+ data.resize(data.rows(), data.cols()-1);
+
+ int rightColsSize = auxdata.cols() - delcol - 1;
+
+ data.leftCols(delcol) = auxdata.leftCols(delcol);
+ data.rightCols(rightColsSize) = auxdata.rightCols(rightColsSize);
+
+ ncol--;
+}
+
+
+
#endif
Modified: pkg/ProbABEL/src/eigen_mematrix.h
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.h 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/eigen_mematrix.h 2013-08-08 11:19:30 UTC (rev 1286)
@@ -37,6 +37,8 @@
mematrix operator*(const mematrix &M);
mematrix operator*(const mematrix *M);
+ void delete_column(const int delcol);
+
void reinit(int nr, int nc);
unsigned int getnrow(void)
Modified: pkg/ProbABEL/src/main.cpp
===================================================================
--- pkg/ProbABEL/src/main.cpp 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/main.cpp 2013-08-08 11:19:30 UTC (rev 1286)
@@ -208,9 +208,9 @@
<< input_var.getSep()
<< "sebeta_SNP_recA1";
*outfile[4] << input_var.getSep()
- << "beta_SNP_odom"
+ << "beta_SNP_odomA1"
<< input_var.getSep()
- << "sebeta_SNP_odom";
+ << "sebeta_SNP_odomA1";
if (input_var.getInteraction() != 0)
{
//Han Chen
@@ -263,7 +263,7 @@
*outfile[1] << input_var.getSep() << "chi2_SNP_A1\n"; // "loglik\n";
*outfile[2] << input_var.getSep() << "chi2_SNP_domA1\n";// "loglik\n";
*outfile[3] << input_var.getSep() << "chi2_SNP_recA1\n";// "loglik\n";
- *outfile[4] << input_var.getSep() << "chi2_SNP_odom\n"; // "loglik\n";
+ *outfile[4] << input_var.getSep() << "chi2_SNP_odomA1\n"; // "loglik\n";
}
void create_header2(std::vector<std::ofstream*>& outfile, cmdvars& input_var,
@@ -389,7 +389,7 @@
masked_matrix invvarmatrix;
- std::cout << "Reading data ..." << std::flush;
+ std::cout << "Reading data..." << std::flush;
if (input_var.getInverseFilename() != NULL)
{
loadInvSigma(input_var, phd, invvarmatrix);
@@ -412,7 +412,7 @@
phd.allmeasured, phd.idnames);
}
- std::cout << " loaded genotypic data ..." << std::flush;
+ std::cout << " loaded genotypic data..." << std::flush;
// estimate null model
#if COXPH
@@ -421,7 +421,7 @@
regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded());
#endif
- std::cout << " loaded null data ..." << std::flush;
+ std::cout << " loaded null data..." << std::flush;
#if LOGISTIC
logistic_reg nrd = logistic_reg(nrgd);
nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
@@ -446,14 +446,14 @@
#endif
double null_loglik = nrd.loglik;
- std::cout << " estimated null model ...";
+ std::cout << " estimated null model...";
// end null
#if COXPH
coxph_data rgd(phd, gtd, 0);
#else
regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded());
#endif
- std::cout << " formed regression object ...";
+ std::cout << " formed regression object...\n";
// Open a vector of files that will be used for output. Depending
@@ -505,13 +505,16 @@
for (int i = 0; i < maxmod; i++)
{
beta_sebeta.push_back(new std::ostringstream());
- beta_sebeta[i]->precision(9);
+ beta_sebeta[i]->precision(6);
+ //*beta_sebeta[i] << scientific;
//Han Chen
covvalue.push_back(new std::ostringstream());
- covvalue[i]->precision(9);
+ covvalue[i]->precision(6);
+ //*covvalue[i] << scientific;
//Oct 26, 2009
chi2.push_back(new std::ostringstream());
- chi2[i]->precision(9);
+ chi2[i]->precision(6);
+ //*chi2[i] << scientific;
}
@@ -565,10 +568,10 @@
poly = 0;
}
+ // Write mlinfo information to the output file(s)
// Prob data: All models output. One file per model
if (input_var.getNgpreds() == 2)
{
- // Write mlinfo to output:
for (unsigned int file = 0; file < outfile.size(); file++)
{
write_mlinfo(outfile, file, mli, csnp, input_var,
@@ -679,7 +682,7 @@
} // END for(pos = start_pos; pos < rd.beta.nrow; pos++)
- //calculate chi2
+ //calculate chi^2
//________________________________
//cout << rd.loglik<<" "<<input_var.getNgpreds() << "\n";
@@ -690,23 +693,41 @@
if (input_var.getScore() == 0)
{
+ double loglik = rd.loglik;
if (gcount != gtd.nids)
{
// If SNP data is missing we didn't
// correctly compute the null likelihood
- *chi2[model] << "NaN";
+
+ // Recalculate null likelihood by
+ // stripping the SNP data column(s) from
+ // the X matrix in the regression object
+ // and run the null model estimation again
+ // for this SNP.
+// BEWARE, ONLY IMPLEMENTED FOR LINEAR REG!!!
+// TODO LCK
+#ifdef LINEAR
+ regdata new_rgd = rgd;
+ new_rgd.remove_snp_from_X();
+ linear_reg new_null_rd(new_rgd);
+ new_null_rd.estimate(new_rgd, 0, CHOLTOL, model,
+ input_var.getInteraction(),
+ input_var.getNgpreds(),
+ invvarmatrix,
+ input_var.getRobust(), 1);
+
+ *chi2[model] << 2. * (loglik - new_null_rd.loglik);
+#endif
}
else
{
// No missing SNP data, we can compute the LRT
- *chi2[model] << 2. * (rd.loglik - null_loglik);
+ *chi2[model] << 2. * (loglik - null_loglik);
}
- //*chi2[model] << rd.loglik;
} else
{
// We want score test output
*chi2[model] << rd.chi2_score;
- //*chi2[model] << "nan";
}
}
} // END first part of if(poly); allele not too rare
Modified: pkg/ProbABEL/src/mematri1.h
===================================================================
--- pkg/ProbABEL/src/mematri1.h 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/mematri1.h 2013-08-08 11:19:30 UTC (rev 1286)
@@ -301,7 +301,7 @@
}
template<class DT>
-void mematrix<DT>::delete_column(int delcol)
+void mematrix<DT>::delete_column(const int delcol)
{
if (delcol > ncol || delcol < 0)
{
@@ -333,7 +333,7 @@
}
template<class DT>
-void mematrix<DT>::delete_row(int delrow)
+void mematrix<DT>::delete_row(const int delrow)
{
if (delrow > nrow || delrow < 0)
{
Modified: pkg/ProbABEL/src/mematrix.h
===================================================================
--- pkg/ProbABEL/src/mematrix.h 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/mematrix.h 2013-08-08 11:19:30 UTC (rev 1286)
@@ -48,8 +48,8 @@
void put(DT value, int nr, int nc);
DT column_mean(int nc);
void print(void);
- void delete_column(int delcol);
- void delete_row(int delrow);
+ void delete_column(const int delcol);
+ void delete_row(const int delrow);
};
Modified: pkg/ProbABEL/src/reg1.cpp
===================================================================
--- pkg/ProbABEL/src/reg1.cpp 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/reg1.cpp 2013-08-08 11:19:30 UTC (rev 1286)
@@ -4,12 +4,22 @@
mematrix<double> apply_model(mematrix<double>& X, int model, int interaction,
int ngpreds, bool is_interaction_excluded,
bool iscox, int nullmodel)
+// if ngpreds==1 (dose data):
+// model 0 = additive 1 df
+// if ngpreds==2 (prob data):
// model 0 = 2 df
// model 1 = additive 1 df
// model 2 = dominant 1 df
// model 3 = recessive 1 df
// model 4 = over-dominant 1 df
{
+ if(nullmodel)
+ {
+ // No need to apply any genotypic model when calculating the
+ // null model
+ return (X);
+ }
+
if (model == 0)
{
if (interaction != 0 && !nullmodel)
@@ -295,12 +305,13 @@
if (verbose)
{
cout << rdata.is_interaction_excluded
- << " <-irdata.is_interaction_excluded\n";
+ << " <-rdata.is_interaction_excluded\n";
// std::cout << "invvarmatrix:\n";
// invvarmatrixin.masked_data->print();
std::cout << "rdata.X:\n";
rdata.X.print();
}
+
mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
rdata.is_interaction_excluded, false,
nullmodel);
@@ -311,6 +322,7 @@
std::cout << "Y:\n";
rdata.Y.print();
}
+
int length_beta = X.ncol;
beta.reinit(length_beta, 1);
sebeta.reinit(length_beta, 1);
Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/regdata.cpp 2013-08-08 11:19:30 UTC (rev 1286)
@@ -39,7 +39,7 @@
for (int i = 0; i < nids; i++)
{
- masked_data[i] = 0;
+ masked_data[i] = obj.masked_data[i];
}
}
@@ -95,6 +95,9 @@
void regdata::update_snp(gendata &gend, int snpnum)
{
+ // Add genotypic data (dosage or probabilities) to the design
+ // matrix X.
+
for (int j = 0; j < ngpreds; j++)
{
double snpdata[nids];
@@ -109,11 +112,34 @@
{
X.put(snpdata[i], i, (ncov - j));
if (isnan(snpdata[i]))
+ {
masked_data[i] = 1;
+ }
}
}
}
+void regdata::remove_snp_from_X()
+{
+ // update_snp() adds SNP information to the design matrix. This
+ // function allows you to strip that information from X again.
+ // This is used for example when calculating the null model.
+
+ if(ngpreds == 1)
+ {
+ X.delete_column(X.ncol -1);
+ }
+ else if(ngpreds == 2)
+ {
+ X.delete_column(X.ncol -1);
+ X.delete_column(X.ncol -1);
+ }
+ else
+ {
+ cerr << "ngpreds should be 1 or 2. you should never come here!\n";
+ }
+}
+
regdata::~regdata()
{
delete[] regdata::masked_data;
Modified: pkg/ProbABEL/src/regdata.h
===================================================================
--- pkg/ProbABEL/src/regdata.h 2013-08-08 10:07:32 UTC (rev 1285)
+++ pkg/ProbABEL/src/regdata.h 2013-08-08 11:19:30 UTC (rev 1286)
@@ -34,6 +34,7 @@
bool ext_is_interaction_excluded);
mematrix<double> extract_genotypes();
void update_snp(gendata &gend, int snpnum);
+ void remove_snp_from_X();
regdata get_unmasked_data();
~regdata();
More information about the Genabel-commits
mailing list