[Genabel-commits] r1726 - pkg/ProbABEL/src

Tue May 13 23:17:30 CEST 2014

Author: lckarssen
Date: 2014-05-13 23:17:30 +0200 (Tue, 13 May 2014)
New Revision: 1726

Modified:
   pkg/ProbABEL/src/coxph_data.cpp
   pkg/ProbABEL/src/coxph_data.h
   pkg/ProbABEL/src/main.cpp
   pkg/ProbABEL/src/reg1.cpp
   pkg/ProbABEL/src/reg1.h
Log:
Remove the #define-d constants MAXITER, EPS and CHOLTOL from ProbABEL's main().
CHOLTOL is only used by the cox regression module, now that we moved to EIGEN-only, so it makes sense to define it as a constant in the coxph_data class.
MAXITER and EPS are used for logistic and Cox regression, so they are only defined in the logistic_reg and coxph_data classes.

This simplifies the estimate() and score() function calls, because they don't need these parameters anymore and as a result several lines in main() can now be shared among linear and logistic regression, changing almost identical code to identical code.



Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================

--- pkg/ProbABEL/src/coxph_data.cpp	2014-05-11 12:11:19 UTC (rev 1725)
+++ pkg/ProbABEL/src/coxph_data.cpp	2014-05-13 21:17:30 UTC (rev 1726)
@@ -408,11 +408,10 @@
 
 
 void coxph_reg::estimate(const coxph_data &cdatain,
-                        int maxiter, double eps,
-                        double tol_chol, const int model,
-                        const int interaction, const int ngpreds,
-                        const bool iscox, const int nullmodel,
-                        const mlinfo &snpinfo, const int cursnp)
+                         const int model,
+                         const int interaction, const int ngpreds,
+                         const bool iscox, const int nullmodel,
+                         const mlinfo &snpinfo, const int cursnp)
 {
     coxph_data cdata = cdatain.get_unmasked_data();
 
@@ -426,8 +425,6 @@
         (cdata.offset).column_mean(0);
     mematrix<double> means(X.nrow, 1);
 
-    int maxiterinput = maxiter;
-
     for (int i = 0; i < X.nrow; i++)
     {
         beta[i] = 0.;
@@ -446,32 +443,44 @@
     // R's coxph()
     double sctest = 1.0;
 
-    coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
+    // Set the maximum number of iterations that coxfit2() will run to
+    // the default value from the class definition.
+    int maxiterinput = MAXITER;
+    // Make separate variables epsinput and tolcholinput that are not
+    // const to send to coxfit2(), this way we won't have to alter
+    // that function (which is a good thing: we want to keep it as
+    // pristine as possible because it is copied from the R survival
+    // package).
+    double epsinput = EPS;
+    double tolcholinput = CHOLTOL;
+
+    coxfit2(&maxiterinput, &cdata.nids, &X.nrow, cdata.stime.data.data(),
             cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
             cdata.weights.data.data(), cdata.strata.data.data(),
             means.data.data(), beta.data.data(), u.data.data(),
-            imat.data.data(), loglik_int, &flag, work, &eps, &tol_chol,
-            &sctest);
+            imat.data.data(), loglik_int, &flag, work, &epsinput,
+            &tolcholinput, &sctest);
 
+    // After coxfit2() maxiterinput contains the actual number of
+    // iterations that were used. Store it in niter.
+    niter = maxiterinput;
 
-    niter = maxiter;
-
     // Check the results of the Cox fit; mirrored from the same checks
     // in coxph_fit.S from the R survival package
 
     bool setToZero = false;
 
-    if (flag < X.nrow && maxiter > 0) {
+    if (flag < X.nrow && niter > 0) {
         cerr << "Warning for " << snpinfo.name[cursnp]
              << ": X matrix deemed to be singular,"
              << " setting beta and se to 'nan'\n";
         setToZero = true;
     }
 
-    if (niter >= maxiterinput)
+    if (niter >= MAXITER)
     {
         cerr << "Warning for " << snpinfo.name[cursnp]
-             << ": nr of iterations > MAXITER (" << maxiterinput << "): "
+             << ": nr of iterations > the maximum (" << MAXITER << "): "
              << niter << endl;
     }
 
@@ -486,8 +495,8 @@
         MatrixXd imateigen = imat.data;
         VectorXd infs = ueigen.transpose() * imateigen;
         VectorXd betaeigen = beta.data;
-        if ( infs.norm() > eps ||
-             infs.norm() > sqrt(eps) * betaeigen.norm() )
+        if ( infs.norm() > EPS ||
+             infs.norm() > sqrt(EPS) * betaeigen.norm() )
         {
             cerr << "Warning for " << snpinfo.name[cursnp]
                  << ": beta may be infinite,"

Modified: pkg/ProbABEL/src/coxph_data.h
===================================================================
--- pkg/ProbABEL/src/coxph_data.h	2014-05-11 12:11:19 UTC (rev 1725)
+++ pkg/ProbABEL/src/coxph_data.h	2014-05-13 21:17:30 UTC (rev 1726)
@@ -199,10 +199,31 @@
 
 
     // Member functions
-    void estimate(const coxph_data &cdatain, int maxiter,
-                  double eps, double tol_chol, const int model,
+    void estimate(const coxph_data &cdatain, const int model,
                   const int interaction, const int ngpreds, const bool iscox,
                   const int nullmodel, const mlinfo &snpinfo, const int cursnp);
+
+ private:
+    /**
+     * \brief Constant that contains the maximum number of iterations
+     * done during the regression routine.
+     */
+    static const int MAXITER = 20;
+
+    /**
+     * \brief Constant containing the tolerance for
+     * convergence.
+     *
+     * Iteration continues until the percent change in loglikelihood
+     * is <= EPS.
+     */
+    static const double EPS = 1e-8;
+
+    /**
+     * \brief Constant containing the precision for the Cholesky
+     * decomposition.
+     */
+    static const double CHOLTOL = 1.5e-12;
 };
 
 #endif /* COXPH_DATA_H_ */

Modified: pkg/ProbABEL/src/main.cpp
===================================================================
--- pkg/ProbABEL/src/main.cpp	2014-05-11 12:11:19 UTC (rev 1725)
+++ pkg/ProbABEL/src/main.cpp	2014-05-13 21:17:30 UTC (rev 1726)
@@ -76,16 +76,7 @@
 #include "coxph_data.h"
 #include "main_functions_dump.h"
 
-/// Maximum number of iterations we allow the regression to run
-#define MAXITER 20
-/// Tolerance for convergence
-#define EPS 1.e-8
-/// Precision for the Cholesky decomposition
-#define CHOLTOL 1.5e-12
 
-
-
-
 /**
  * Main routine. The main logic of ProbABEL can be found here
  *
@@ -158,7 +149,7 @@
 #if LOGISTIC
     logistic_reg nrd = logistic_reg(nrgd);
 
-    nrd.estimate(0, MAXITER, EPS, 0,
+    nrd.estimate(0, 0,
                  input_var.getInteraction(),
                  input_var.getNgpreds(),
                  invvarmatrix,
@@ -170,12 +161,12 @@
 #if DEBUG
     std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
 #endif
-    nrd.estimate(0, CHOLTOL, 0, input_var.getInteraction(),
+    nrd.estimate(0, 0, input_var.getInteraction(),
                  input_var.getNgpreds(), invvarmatrix,
                  input_var.getRobust(), 1);
 #elif COXPH
     coxph_reg nrd = coxph_reg(nrgd);
-    nrd.estimate(nrgd,  MAXITER, EPS, CHOLTOL, 0,
+    nrd.estimate(nrgd, 0,
                  input_var.getInteraction(), input_var.getNgpreds(),
                  true, 1, mli, 0);
 #endif
@@ -302,41 +293,29 @@
             {
 #if LOGISTIC
                 logistic_reg rd(rgd);
-                if (input_var.getScore())
-                {
-                    rd.score(nrd.residuals, CHOLTOL, model,
-                             input_var.getInteraction(),
-                             input_var.getNgpreds(),
-                             invvarmatrix);
-                }
-                else
-                {
-                    rd.estimate(0, MAXITER, EPS, model,
-                                input_var.getInteraction(),
-                                input_var.getNgpreds(),
-                                invvarmatrix,
-                                input_var.getRobust());
-                }
 #elif LINEAR
                 linear_reg rd(rgd);
+#elif COXPH
+                coxph_reg rd(rgd);
+#endif
+#if !COXPH
                 if (input_var.getScore())
                 {
-                    rd.score(nrd.residuals,  CHOLTOL, model,
+                    rd.score(nrd.residuals, model,
                              input_var.getInteraction(),
                              input_var.getNgpreds(),
                              invvarmatrix);
                 }
                 else
                 {
-                    rd.estimate(0, CHOLTOL, model,
+                    rd.estimate(0, model,
                                 input_var.getInteraction(),
                                 input_var.getNgpreds(),
                                 invvarmatrix,
                                 input_var.getRobust());
                 }
-#elif COXPH
-                coxph_reg rd(rgd);
-                rd.estimate(rgd,  MAXITER, EPS, CHOLTOL, model,
+#else
+                rd.estimate(rgd, model,
                             input_var.getInteraction(),
                             input_var.getNgpreds(), true, 0, mli, csnp);
 #endif
@@ -408,34 +387,30 @@
                             // the X matrix in the regression object
                             // and run the null model estimation again
                             // for this SNP.
-#ifdef LINEAR
+#if !COXPH
                             regdata new_rgd = rgd;
+#else
+                            coxph_data new_rgd = rgd;
+#endif
+
                             new_rgd.remove_snp_from_X();
-                            linear_reg new_null_rd(new_rgd);
-                            new_null_rd.estimate(0,
-                                                 CHOLTOL, model,
-                                                 input_var.getInteraction(),
-                                                 input_var.getNgpreds(),
-                                                 invvarmatrix,
-                                                 input_var.getRobust(), 1);
 
+#ifdef LINEAR
+                            linear_reg new_null_rd(new_rgd);
 #elif LOGISTIC
-                            regdata new_rgd = rgd;
-                            new_rgd.remove_snp_from_X();
                             logistic_reg new_null_rd(new_rgd);
-                            new_null_rd.estimate(0, MAXITER, EPS,
-                                                  model,
+#endif
+#if !COXPH
+                            new_null_rd.estimate(0,
+                                                 model,
                                                  input_var.getInteraction(),
                                                  input_var.getNgpreds(),
                                                  invvarmatrix,
                                                  input_var.getRobust(), 1);
-
-#elif COXPH
-                            coxph_data new_rgd = rgd;
-                            new_rgd.remove_snp_from_X();
+#else
                             coxph_reg new_null_rd(new_rgd);
-                            new_null_rd.estimate(new_rgd, MAXITER,
-                                                 EPS, CHOLTOL, model,
+                            new_null_rd.estimate(new_rgd,
+                                                 model,
                                                  input_var.getInteraction(),
                                                  input_var.getNgpreds(),
                                                  true, 1, mli, csnp);

Modified: pkg/ProbABEL/src/reg1.cpp
===================================================================
--- pkg/ProbABEL/src/reg1.cpp	2014-05-11 12:11:19 UTC (rev 1725)
+++ pkg/ProbABEL/src/reg1.cpp	2014-05-13 21:17:30 UTC (rev 1726)
@@ -305,7 +305,7 @@
 
 
 void base_reg::base_score(const mematrix<double>& resid,
-                          const double tol_chol, const int model,
+                          const int model,
                           const int interaction,
                           const int ngpreds,
                           const masked_matrix& invvarmatrix,
@@ -497,7 +497,6 @@
  * \brief Estimate the parameters for linear regression.
  *
  * @param verbose
- * @param tol_chol
  * @param model
  * @param interaction
  * @param ngpreds
@@ -505,11 +504,13 @@
  * @param robust
  * @param nullmodel
  */
-void linear_reg::estimate(const int verbose, const double tol_chol,
-                          const int model, const int interaction,
+void linear_reg::estimate(const int verbose,
+                          const int model,
+                          const int interaction,
                           const int ngpreds,
                           masked_matrix& invvarmatrixin,
-                          const int robust, const int nullmodel) {
+                          const int robust,
+                          const int nullmodel) {
     // suda interaction parameter
     // model should come here
     //regdata rdata = rdatain.get_unmasked_data();
@@ -613,13 +614,13 @@
 
 
 void linear_reg::score(const mematrix<double>& resid,
-                       const double tol_chol, const int model,
+                       const int model,
                        const int interaction, const int ngpreds,
                        const masked_matrix& invvarmatrix,
                        int nullmodel)
 {
     //regdata rdata = rdatain.get_unmasked_data();
-    base_score(resid, tol_chol, model, interaction, ngpreds,
+    base_score(resid, model, interaction, ngpreds,
                invvarmatrix, nullmodel = 0);
     // TODO: find out why nullmodel is assigned 0 in the call above.
 }
@@ -645,8 +646,8 @@
 }
 
 
-void logistic_reg::estimate(const int verbose, const int maxiter,
-                            const double eps, const int model,
+void logistic_reg::estimate(const int verbose,
+                            const int model,
                             const int interaction, const int ngpreds,
                             masked_matrix& invvarmatrixin,
                             const int robust, const int nullmodel) {
@@ -721,7 +722,7 @@
      */
     niter = 0;
     double delta = 1.;
-    while (niter < maxiter && delta > eps)
+    while (niter < MAXITER && delta > EPS)
     {
         mematrix<double> eMu = (X) * beta;
         mematrix<double> eMu_us = eMu;
@@ -800,7 +801,7 @@
 
         delta = fabs(1. - (prevlik / loglik));
         niter++;
-    }  // END while (niter < maxiter && delta > eps)
+    }  // END while (niter < MAXITER && delta > EPS)
 
     sigma2 = 0.;
     mematrix<double> robust_sigma2(X.ncol, X.ncol);
@@ -881,11 +882,11 @@
 
 
 void logistic_reg::score(const mematrix<double>& resid,
-                         const double tol_chol, const int model,
+                         const int model,
                          const int interaction, const int ngpreds,
                          const masked_matrix& invvarmatrix,
                          int nullmodel) {
-    base_score(resid, tol_chol, model, interaction, ngpreds,
+    base_score(resid, model, interaction, ngpreds,
                invvarmatrix, nullmodel = 0);
     // TODO: find out why nullmodel is assigned 0 in the call above.
 }

Modified: pkg/ProbABEL/src/reg1.h
===================================================================
--- pkg/ProbABEL/src/reg1.h	2014-05-11 12:11:19 UTC (rev 1725)
+++ pkg/ProbABEL/src/reg1.h	2014-05-13 21:17:30 UTC (rev 1726)
@@ -84,7 +84,7 @@
     regdata reg_data;
 
     void base_score(const mematrix<double>& resid,
-                    const double tol_chol, const int model,
+                    const int model,
                     const int interaction, const int ngpreds,
                     const masked_matrix& invvarmatrix,
                     int nullmodel);
@@ -95,13 +95,13 @@
  public:
     linear_reg(const regdata& rdatain);
 
-    void estimate(const int verbose, const double tol_chol, const int model,
+    void estimate(const int verbose, const int model,
                   const int interaction, const int ngpreds,
                   masked_matrix& invvarmatrixin,
                   const int robust, const int nullmodel = 0);
 
     void score(const mematrix<double>& resid,
-               const double tol_chol, const int model, const int interaction,
+               const int model, const int interaction,
                const int ngpreds, const masked_matrix& invvarmatrix,
                int nullmodel = 0);
 
@@ -128,16 +128,30 @@
 
     logistic_reg(const regdata& rdatain);
 
-    void estimate(const int verbose, const int maxiter, const double eps,
+    void estimate(const int verbose,
                   const int model, const int interaction, const int ngpreds,
                   masked_matrix& invvarmatrixin, const int robust,
                   const int nullmodel = 0);
 
     // just a stupid copy from linear_reg
     void score(const mematrix<double>& resid,
-               const double tol_chol, const int model, const int interaction,
+               const int model, const int interaction,
                const int ngpreds, const masked_matrix& invvarmatrix,
                int nullmodel = 0);
+ private:
+    /**
+     * \brief Constant that contains the maximum number of iterations
+     * done during the regression routine.
+     */
+    static const int MAXITER = 20;
+
+    /**
+     * \brief Constant containing the tolerance for convergence.
+     *
+     * Iteration continues until the percent change in loglikelihood
+     * is <= EPS.
+     */
+    static const double EPS = 1e-8;
 };
 
 #endif // REG1_H_