[Genabel-commits] r1712 - pkg/ProbABEL/src

Mon Apr 28 18:59:21 CEST 2014

Author: lckarssen
Date: 2014-04-28 18:59:21 +0200 (Mon, 28 Apr 2014)
New Revision: 1712

Modified:
   pkg/ProbABEL/src/coxph_data.cpp
   pkg/ProbABEL/src/coxph_data.h
   pkg/ProbABEL/src/regdata.cpp
   pkg/ProbABEL/src/regdata.h
Log:
Added and updated Doxygen documentation for the regdata and coxph_data classes.
Moved the default coxph_data() constructor from the .h file to the .cpp file. Removed the empty destructor.


Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================

--- pkg/ProbABEL/src/coxph_data.cpp	2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/coxph_data.cpp	2014-04-28 16:59:21 UTC (rev 1712)
@@ -45,7 +45,10 @@
 #include "fvlib/Logger.h"
 #include "fvlib/Transposer.h"
 
-// compare for sort of times
+/**
+ * Definition of the compare function used for the sort of times. Used
+ * by the qsort() function.
+ */
 int cmpfun(const void *a, const void *b)
 {
     double el1 = *(double*) a;
@@ -67,10 +70,31 @@
     return -9;
 }
 
-coxph_data::coxph_data(const coxph_data &obj) : sstat(obj.sstat),
+
+/**
+ * Constructor. Initialises all values to zero.
+ */
+coxph_data::coxph_data()
+{
+    nids    = 0;
+    ncov    = 0;
+    ngpreds = 0;
+    gcount  = 0;
+    freq    = 0;
+}
+
+
+/**
+ * Copy constructor. Creates a coxph_data object by copying the values
+ * of another one.
+ *
+ * \param obj Reference to the coxph_data object to be copied to the new
+ * object.
+ */
+coxph_data::coxph_data(const coxph_data &obj) : X(obj.X),
+                                                sstat(obj.sstat),
                                                 offset(obj.offset),
                                                 strata(obj.strata),
-                                                X(obj.X),
                                                 order(obj.order)
 {
     nids        = obj.nids;
@@ -83,6 +107,18 @@
     masked_data = obj.masked_data;
 }
 
+
+/**
+ * Constructor that fills a coxph_data object with phenotype and genotype
+ * data.
+ *
+ * @param phed Reference to a phedata object with phenotype data
+ * @param gend Reference to a gendata object with genotype data
+ * @param snpnum The number of the SNP in the genotype data object to
+ * be added to the design matrix regdata::X. When set to a number < 0
+ * no SNP data is added to the design matrix (e.g. when calculating
+ * the null model).
+ */
 coxph_data::coxph_data(const phedata &phed, const gendata &gend,
                        const int snpnum)
 {
@@ -117,7 +153,6 @@
     order.reinit(nids, 1);
     for (int i = 0; i < nids; i++)
     {
-        //          X.put(1.,i,0);
         stime[i] = (phed.Y).get(i, 0);
         sstat[i] = static_cast<int>((phed.Y).get(i, 1));
         if (sstat[i] != 1 && sstat[i] != 0)
@@ -159,7 +194,7 @@
     // sort by time
     double *tmptime = new double[nids];
     int *passed_sorted = new int[nids];
-    std::fill (passed_sorted, passed_sorted + nids, 0);
+    std::fill(passed_sorted, passed_sorted + nids, 0);
 
 
     for (int i = 0; i < nids; i++)
@@ -211,6 +246,19 @@
     delete[] passed_sorted;
 }
 
+
+/**
+ * \brief Update the SNP dosages/probabilities in the design matrix
+ * coxph_data::X.
+ *
+ * Adds the genetic information for a new SNP to the design
+ * matrix.
+ *
+ * @param gend Object that contains the genetic data from which the
+ * dosages/probabilities will be added to the design matrix.
+ * @param snpnum Number of the SNP for which the dosage/probability
+ * data will be extracted from the gend object.
+ */
 void coxph_data::update_snp(const gendata *gend, const int snpnum) {
     /*
      * This is the main part of the fix of bug #1846
@@ -227,7 +275,7 @@
      * 'ncov-j' changes to 'ncov-j-1'
      */
 
-    // reset counter for frequency since it is a new snp
+    // reset counter for frequency since it is a new SNP
     gcount = 0;
     freq   = 0.0;
 
@@ -266,10 +314,12 @@
 
 
 /**
- * update_snp() adds SNP information to the design matrix. This
- * function allows you to strip that information from X again. This
- * is used for example when calculating the null model.
+ * \brief Remove SNP information from the design matrix coxph_data::X.
  *
+ * coxph_data::update_snp() adds SNP information to the design
+ * matrix. This function allows you to strip that information from X
+ * again. This is used for example when calculating the null model.
+ *
  */
 void coxph_data::remove_snp_from_X()
 {
@@ -290,11 +340,20 @@
 }
 
 
-coxph_data::~coxph_data()
-{
-}
-
-
+/**
+ * \brief Create a new coxph_data object that contains only the
+ * non-masked data.
+ *
+ * The non-masked data is extracted according to the data in the
+ * regdata::masked_data array. The resulting regdata::nids corresponds
+ * to the number of IDs for which genotype data is present.
+ *
+ * Note that the regdata::masked_data array of the new object should
+ * contain only zeros (i.e. not masked).
+ *
+ * @return A new regdata object containing only the rows from
+ * regdata::X and regdata::Y for which genotype data is present.
+ */
 coxph_data coxph_data::get_unmasked_data() const
 {
     coxph_data to;
@@ -336,7 +395,7 @@
 }
 
 
-coxph_reg::coxph_reg(coxph_data &cdatain)
+coxph_reg::coxph_reg(const coxph_data &cdatain)
 {
     coxph_data cdata = cdatain.get_unmasked_data();
     beta.reinit(cdata.X.nrow, 1);
@@ -387,8 +446,6 @@
     // R's coxph()
     double sctest = 1.0;
 
-    // When using Eigen coxfit2 needs to be called in a slightly
-    // different way (i.e. the .data()-part needs to be added).
     coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
             cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
             cdata.weights.data.data(), cdata.strata.data.data(),
@@ -430,7 +487,7 @@
         VectorXd infs = ueigen.transpose() * imateigen;
         VectorXd betaeigen = beta.data;
         if ( infs.norm() > eps ||
-            infs.norm() > sqrt(eps) * betaeigen.norm() )
+             infs.norm() > sqrt(eps) * betaeigen.norm() )
         {
             cerr << "Warning for " << snpinfo.name[cursnp]
                  << ": beta may be infinite,"
@@ -438,7 +495,6 @@
 
             setToZero = true;
         }
-
     }
 
     for (int i = 0; i < X.nrow; i++)

Modified: pkg/ProbABEL/src/coxph_data.h
===================================================================
--- pkg/ProbABEL/src/coxph_data.h	2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/coxph_data.h	2014-04-28 16:59:21 UTC (rev 1712)
@@ -1,8 +1,15 @@
-/*
- * coxph_data.h
+/**
+ * \file coxph_data.h
+ * \author Yurii S. Aulchenko
+ * \author M. Kooyman
+ * \author L.C. Karssen
+ * \author Maksim V. Struchalin
  *
+ * \brief Describes the coxph_data and coxph_reg classes used in for
+ * Cox Proportional Hazards regression. The coxph_data class is
+ * similar to the regdata class.
+ *
  *  Created on: Mar 31, 2012
- *      Author: mkooyman
  *
  *
  * Copyright (C) 2009--2014 Various members of the GenABEL team. See
@@ -37,37 +44,138 @@
 #include "gendata.h"
 #include "phedata.h"
 
+
+/**
+ * \brief A coxph_data object contains the data used for Cox PH
+ * regression.
+ *
+ * The Cox regression code in coxfit2.c requires all data to be sorted
+ * based on follow-up time, which is done in the constructor.
+ *
+ * A similar class for linear and logistic regression can be found in
+ * the regdata class.
+ */
 class coxph_data {
  public:
-    int nids;
+    int nids;                   /**< \brief Number of IDs/samples.  */
+
+    /**
+     * \brief Number of covariates + possible nr of genomic
+     * predictors.
+     *
+     * If snpnum >=0 then this equals the number of covariates + the
+     * number of regdata::ngpreds.
+     */
     int ncov;
+
+   /**
+     * \brief Number of genomic predictors, 1 for dosage data, 2 for
+     * probability data.
+     */
     int ngpreds;
+
+    /**
+     * \brief Number of non-masked genotypes.
+     */
     unsigned int gcount;
+
+    /**
+     * \brief Allele frequency.
+     *
+     * Calculation is only based on non-masked SNPs.
+     */
     double freq;
+
+    /**
+     * \brief The design matrix.
+     *
+     * The matrix dimensions are coxph_data::nids x coxph_data::ncov.
+     * After filling the matrix is it is time-sorted based on
+     * coxph_data::order. Note that coxfit2() expects the data to be
+     * in column-major order, so at the end of the constructor the
+     * matrix is transposed!
+     */
+    mematrix<double> X;
+
+    /**
+     * \brief A vector that contains the follow-up times for each
+     * individual, as read from the phenotype file.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
+    mematrix<double> stime;
+
+    /**
+     * \brief A vector containing the survival status of each
+     * individual, as read from the phenotype file.
+     *
+     * Values should be 0 or 1. The vector is coxph_data::nids
+     * long. After reading the data from the phenotype file this
+     * vector is time-sorted based on coxph_data::order.
+     */
+    mematrix<int> sstat;
+
+    /**
+     * \brief A vector containing case weights. These are set to 1.0
+     * in the constructor.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
     mematrix<double> weights;
-    mematrix<double> stime;
-    mematrix<int>    sstat;
+
+    /**
+     * \brief A vector containing an offset for the linear
+     * predictor. Set to 0.0 in the constructor. This is a mandatory
+     * input for coxfit2().
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
     mematrix<double> offset;
-    mematrix<int>    strata;
-    mematrix<double> X;
-    mematrix<int>    order;
+
+    /**
+     * \brief A vector containing strata. See coxfit2() for more
+     * details. All strata are set to 0 in the constructor.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
+    mematrix<int> strata;
+
+    /**
+     * \brief A vector used to store the follow-up times in ascending
+     * order. It is used to reorder the other vectors and
+     * coxph_data::X.
+     *
+     * The vector is coxph_data::nids long.
+     */
+    mematrix<int> order;
+
+    /**
+     * \brief A vector that contains ones/zeros to indicate which data points
+     * should be omitted because no genetic data is present.
+     *
+     * The vector is regdata::nids long. A value of 1 or 'true' means
+     * that that ID/sample will be masked because the SNP data is NA
+     * for that ID.
+     */
     std::vector<bool> masked_data;
 
 
-    coxph_data()
-    {
-        nids        = 0;
-        ncov        = 0;
-        ngpreds     = 0;
-        gcount      = 0;
-        freq        = 0;
-    }
-
+    // Constructors and destructors
+    coxph_data();
     coxph_data(const coxph_data &obj);
     coxph_data(const phedata &phed, const gendata &gend, const int snpnum);
-    ~coxph_data();
+    // ~coxph_data();
 
 
+    // Member functions
     coxph_data get_unmasked_data() const;
     void update_snp(const gendata *gend, const int snpnum);
     void remove_snp_from_X();
@@ -76,6 +184,7 @@
 
 class coxph_reg {
  public:
+    // Variables
     mematrix<double> beta;
     mematrix<double> sebeta;
     mematrix<double> residuals;
@@ -85,9 +194,11 @@
     int niter;
 
 
-    coxph_reg(coxph_data &cdatain);
+    // Constructors and destructors
+    coxph_reg(const coxph_data &cdatain);
 
 
+    // Member functions
     void estimate(const coxph_data &cdatain, int maxiter,
                   double eps, double tol_chol, const int model,
                   const int interaction, const int ngpreds, const bool iscox,

Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp	2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/regdata.cpp	2014-04-28 16:59:21 UTC (rev 1712)
@@ -152,7 +152,7 @@
 
 
 /**
- * Update the SNP dosages/probabilities in the design matrix
+ * \brief Update the SNP dosages/probabilities in the design matrix
  * regdata::X.
  *
  * Adds the genetic information for a new SNP to the design
@@ -207,6 +207,8 @@
 
 
 /**
+ * \brief Remove SNP information from the design matrix regdata::X.
+ *
  * update_snp() adds SNP information to the design matrix. This
  * function allows you to strip that information from X again.
  * This is used for example when calculating the null model.
@@ -229,18 +231,10 @@
     }
 }
 
-//
-//regdata::~regdata()
-//{
-//    //delete[] regdata::masked_data;
-//    // delete X;
-//    // delete Y;
-//}
 
-
 /**
- * Create a new regdata object that contains only the non-masked
- * data.
+ * \brief Create a new regdata object that contains only the
+ * non-masked data.
  *
  * The non-masked data is extracted according to the data in the
  * regdata::masked_data array. The resulting regdata::nids corresponds

Modified: pkg/ProbABEL/src/regdata.h
===================================================================
--- pkg/ProbABEL/src/regdata.h	2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/regdata.h	2014-04-28 16:59:21 UTC (rev 1712)
@@ -55,7 +55,7 @@
     int nids;                   /**< Number of IDs/samples. */
 
     /**
-     * Number of covariates + possible nr of genomic predictors.
+     * \brief Number of covariates + possible nr of genomic predictors.
      *
      * If snpnum >=0 then this equals the number of covariates + the
      * number of regdata::ngpreds.
@@ -63,19 +63,18 @@
     int ncov;
 
     /**
-     * Number of genomic predictors, 1 for dosage data, 2 for
+     * \brief Number of genomic predictors, 1 for dosage data, 2 for
      * probability data.
-     *
      */
     int ngpreds;
 
      /**
-      * Number of outcomes, taken from phedata::noutcomes.
+      * \brief Number of outcomes, taken from phedata::noutcomes.
       */
     int noutcomes;
 
     /**
-     * Boolean that indicates whether the command line option
+     * \brief Boolean that indicates whether the command line option
      * --interaction_only was set.
      *
      * See cmdvars::is_interaction_excluded.
@@ -83,8 +82,8 @@
     bool is_interaction_excluded;
 
     /**
-     * A vector that contains ones/zeros to indicate which data points
-     * should be omitted because no genetic data is present.
+     * \brief A vector that contains ones/zeros to indicate which data
+     * points should be omitted because no genetic data is present.
      *
      * The vector is regdata::nids long. A value of 1 or 'true' means
      * that that ID/sample will be masked because the SNP data is NA
@@ -93,26 +92,26 @@
     std::vector<bool> masked_data;
 
     /**
-     * Number of non-masked genotypes.
+     * \brief Number of non-masked genotypes.
      */
     unsigned int gcount;
 
     /**
-     * Allele frequency.
+     * \brief Allele frequency.
      *
      * Calculation is only based on non-masked SNPs.
      */
     double freq;
 
     /**
-     * The design matrix.
+     * \brief The design matrix.
      *
      * The matrix dimensions are regdata::nids x (regdata::ncov + 1)
      */
     mematrix<double> X;
 
     /**
-     * Matrix containing the phenotype data.
+     * \brief Matrix containing the phenotype data.
      *
      * The matrix dimensions are regdata::nids x
      * regdata::noutcomes.