[Genabel-commits] r1712 - pkg/ProbABEL/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Apr 28 18:59:21 CEST 2014
Author: lckarssen
Date: 2014-04-28 18:59:21 +0200 (Mon, 28 Apr 2014)
New Revision: 1712
Modified:
pkg/ProbABEL/src/coxph_data.cpp
pkg/ProbABEL/src/coxph_data.h
pkg/ProbABEL/src/regdata.cpp
pkg/ProbABEL/src/regdata.h
Log:
Added and updated Doxygen documentation for the regdata and coxph_data classes.
Moved the default coxph_data() constructor from the .h file to the .cpp file. Removed the empty destructor.
Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================
--- pkg/ProbABEL/src/coxph_data.cpp 2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/coxph_data.cpp 2014-04-28 16:59:21 UTC (rev 1712)
@@ -45,7 +45,10 @@
#include "fvlib/Logger.h"
#include "fvlib/Transposer.h"
-// compare for sort of times
+/**
+ * Definition of the compare function used for the sort of times. Used
+ * by the qsort() function.
+ */
int cmpfun(const void *a, const void *b)
{
double el1 = *(double*) a;
@@ -67,10 +70,31 @@
return -9;
}
-coxph_data::coxph_data(const coxph_data &obj) : sstat(obj.sstat),
+
+/**
+ * Constructor. Initialises all values to zero.
+ */
+coxph_data::coxph_data()
+{
+ nids = 0;
+ ncov = 0;
+ ngpreds = 0;
+ gcount = 0;
+ freq = 0;
+}
+
+
+/**
+ * Copy constructor. Creates a coxph_data object by copying the values
+ * of another one.
+ *
+ * \param obj Reference to the coxph_data object to be copied to the new
+ * object.
+ */
+coxph_data::coxph_data(const coxph_data &obj) : X(obj.X),
+ sstat(obj.sstat),
offset(obj.offset),
strata(obj.strata),
- X(obj.X),
order(obj.order)
{
nids = obj.nids;
@@ -83,6 +107,18 @@
masked_data = obj.masked_data;
}
+
+/**
+ * Constructor that fills a coxph_data object with phenotype and genotype
+ * data.
+ *
+ * @param phed Reference to a phedata object with phenotype data
+ * @param gend Reference to a gendata object with genotype data
+ * @param snpnum The number of the SNP in the genotype data object to
+ * be added to the design matrix regdata::X. When set to a number < 0
+ * no SNP data is added to the design matrix (e.g. when calculating
+ * the null model).
+ */
coxph_data::coxph_data(const phedata &phed, const gendata &gend,
const int snpnum)
{
@@ -117,7 +153,6 @@
order.reinit(nids, 1);
for (int i = 0; i < nids; i++)
{
- // X.put(1.,i,0);
stime[i] = (phed.Y).get(i, 0);
sstat[i] = static_cast<int>((phed.Y).get(i, 1));
if (sstat[i] != 1 && sstat[i] != 0)
@@ -159,7 +194,7 @@
// sort by time
double *tmptime = new double[nids];
int *passed_sorted = new int[nids];
- std::fill (passed_sorted, passed_sorted + nids, 0);
+ std::fill(passed_sorted, passed_sorted + nids, 0);
for (int i = 0; i < nids; i++)
@@ -211,6 +246,19 @@
delete[] passed_sorted;
}
+
+/**
+ * \brief Update the SNP dosages/probabilities in the design matrix
+ * coxph_data::X.
+ *
+ * Adds the genetic information for a new SNP to the design
+ * matrix.
+ *
+ * @param gend Object that contains the genetic data from which the
+ * dosages/probabilities will be added to the design matrix.
+ * @param snpnum Number of the SNP for which the dosage/probability
+ * data will be extracted from the gend object.
+ */
void coxph_data::update_snp(const gendata *gend, const int snpnum) {
/*
* This is the main part of the fix of bug #1846
@@ -227,7 +275,7 @@
* 'ncov-j' changes to 'ncov-j-1'
*/
- // reset counter for frequency since it is a new snp
+ // reset counter for frequency since it is a new SNP
gcount = 0;
freq = 0.0;
@@ -266,10 +314,12 @@
/**
- * update_snp() adds SNP information to the design matrix. This
- * function allows you to strip that information from X again. This
- * is used for example when calculating the null model.
+ * \brief Remove SNP information from the design matrix coxph_data::X.
*
+ * coxph_data::update_snp() adds SNP information to the design
+ * matrix. This function allows you to strip that information from X
+ * again. This is used for example when calculating the null model.
+ *
*/
void coxph_data::remove_snp_from_X()
{
@@ -290,11 +340,20 @@
}
-coxph_data::~coxph_data()
-{
-}
-
-
+/**
+ * \brief Create a new coxph_data object that contains only the
+ * non-masked data.
+ *
+ * The non-masked data is extracted according to the data in the
+ * regdata::masked_data array. The resulting regdata::nids corresponds
+ * to the number of IDs for which genotype data is present.
+ *
+ * Note that the regdata::masked_data array of the new object should
+ * contain only zeros (i.e. not masked).
+ *
+ * @return A new regdata object containing only the rows from
+ * regdata::X and regdata::Y for which genotype data is present.
+ */
coxph_data coxph_data::get_unmasked_data() const
{
coxph_data to;
@@ -336,7 +395,7 @@
}
-coxph_reg::coxph_reg(coxph_data &cdatain)
+coxph_reg::coxph_reg(const coxph_data &cdatain)
{
coxph_data cdata = cdatain.get_unmasked_data();
beta.reinit(cdata.X.nrow, 1);
@@ -387,8 +446,6 @@
// R's coxph()
double sctest = 1.0;
- // When using Eigen coxfit2 needs to be called in a slightly
- // different way (i.e. the .data()-part needs to be added).
coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
cdata.weights.data.data(), cdata.strata.data.data(),
@@ -430,7 +487,7 @@
VectorXd infs = ueigen.transpose() * imateigen;
VectorXd betaeigen = beta.data;
if ( infs.norm() > eps ||
- infs.norm() > sqrt(eps) * betaeigen.norm() )
+ infs.norm() > sqrt(eps) * betaeigen.norm() )
{
cerr << "Warning for " << snpinfo.name[cursnp]
<< ": beta may be infinite,"
@@ -438,7 +495,6 @@
setToZero = true;
}
-
}
for (int i = 0; i < X.nrow; i++)
Modified: pkg/ProbABEL/src/coxph_data.h
===================================================================
--- pkg/ProbABEL/src/coxph_data.h 2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/coxph_data.h 2014-04-28 16:59:21 UTC (rev 1712)
@@ -1,8 +1,15 @@
-/*
- * coxph_data.h
+/**
+ * \file coxph_data.h
+ * \author Yurii S. Aulchenko
+ * \author M. Kooyman
+ * \author L.C. Karssen
+ * \author Maksim V. Struchalin
*
+ * \brief Describes the coxph_data and coxph_reg classes used in for
+ * Cox Proportional Hazards regression. The coxph_data class is
+ * similar to the regdata class.
+ *
* Created on: Mar 31, 2012
- * Author: mkooyman
*
*
* Copyright (C) 2009--2014 Various members of the GenABEL team. See
@@ -37,37 +44,138 @@
#include "gendata.h"
#include "phedata.h"
+
+/**
+ * \brief A coxph_data object contains the data used for Cox PH
+ * regression.
+ *
+ * The Cox regression code in coxfit2.c requires all data to be sorted
+ * based on follow-up time, which is done in the constructor.
+ *
+ * A similar class for linear and logistic regression can be found in
+ * the regdata class.
+ */
class coxph_data {
public:
- int nids;
+ int nids; /**< \brief Number of IDs/samples. */
+
+ /**
+ * \brief Number of covariates + possible nr of genomic
+ * predictors.
+ *
+ * If snpnum >=0 then this equals the number of covariates + the
+ * number of regdata::ngpreds.
+ */
int ncov;
+
+ /**
+ * \brief Number of genomic predictors, 1 for dosage data, 2 for
+ * probability data.
+ */
int ngpreds;
+
+ /**
+ * \brief Number of non-masked genotypes.
+ */
unsigned int gcount;
+
+ /**
+ * \brief Allele frequency.
+ *
+ * Calculation is only based on non-masked SNPs.
+ */
double freq;
+
+ /**
+ * \brief The design matrix.
+ *
+ * The matrix dimensions are coxph_data::nids x coxph_data::ncov.
+ * After filling the matrix is it is time-sorted based on
+ * coxph_data::order. Note that coxfit2() expects the data to be
+ * in column-major order, so at the end of the constructor the
+ * matrix is transposed!
+ */
+ mematrix<double> X;
+
+ /**
+ * \brief A vector that contains the follow-up times for each
+ * individual, as read from the phenotype file.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
+ mematrix<double> stime;
+
+ /**
+ * \brief A vector containing the survival status of each
+ * individual, as read from the phenotype file.
+ *
+ * Values should be 0 or 1. The vector is coxph_data::nids
+ * long. After reading the data from the phenotype file this
+ * vector is time-sorted based on coxph_data::order.
+ */
+ mematrix<int> sstat;
+
+ /**
+ * \brief A vector containing case weights. These are set to 1.0
+ * in the constructor.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
mematrix<double> weights;
- mematrix<double> stime;
- mematrix<int> sstat;
+
+ /**
+ * \brief A vector containing an offset for the linear
+ * predictor. Set to 0.0 in the constructor. This is a mandatory
+ * input for coxfit2().
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
mematrix<double> offset;
- mematrix<int> strata;
- mematrix<double> X;
- mematrix<int> order;
+
+ /**
+ * \brief A vector containing strata. See coxfit2() for more
+ * details. All strata are set to 0 in the constructor.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
+ mematrix<int> strata;
+
+ /**
+ * \brief A vector used to store the follow-up times in ascending
+ * order. It is used to reorder the other vectors and
+ * coxph_data::X.
+ *
+ * The vector is coxph_data::nids long.
+ */
+ mematrix<int> order;
+
+ /**
+ * \brief A vector that contains ones/zeros to indicate which data points
+ * should be omitted because no genetic data is present.
+ *
+ * The vector is regdata::nids long. A value of 1 or 'true' means
+ * that that ID/sample will be masked because the SNP data is NA
+ * for that ID.
+ */
std::vector<bool> masked_data;
- coxph_data()
- {
- nids = 0;
- ncov = 0;
- ngpreds = 0;
- gcount = 0;
- freq = 0;
- }
-
+ // Constructors and destructors
+ coxph_data();
coxph_data(const coxph_data &obj);
coxph_data(const phedata &phed, const gendata &gend, const int snpnum);
- ~coxph_data();
+ // ~coxph_data();
+ // Member functions
coxph_data get_unmasked_data() const;
void update_snp(const gendata *gend, const int snpnum);
void remove_snp_from_X();
@@ -76,6 +184,7 @@
class coxph_reg {
public:
+ // Variables
mematrix<double> beta;
mematrix<double> sebeta;
mematrix<double> residuals;
@@ -85,9 +194,11 @@
int niter;
- coxph_reg(coxph_data &cdatain);
+ // Constructors and destructors
+ coxph_reg(const coxph_data &cdatain);
+ // Member functions
void estimate(const coxph_data &cdatain, int maxiter,
double eps, double tol_chol, const int model,
const int interaction, const int ngpreds, const bool iscox,
Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp 2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/regdata.cpp 2014-04-28 16:59:21 UTC (rev 1712)
@@ -152,7 +152,7 @@
/**
- * Update the SNP dosages/probabilities in the design matrix
+ * \brief Update the SNP dosages/probabilities in the design matrix
* regdata::X.
*
* Adds the genetic information for a new SNP to the design
@@ -207,6 +207,8 @@
/**
+ * \brief Remove SNP information from the design matrix regdata::X.
+ *
* update_snp() adds SNP information to the design matrix. This
* function allows you to strip that information from X again.
* This is used for example when calculating the null model.
@@ -229,18 +231,10 @@
}
}
-//
-//regdata::~regdata()
-//{
-// //delete[] regdata::masked_data;
-// // delete X;
-// // delete Y;
-//}
-
/**
- * Create a new regdata object that contains only the non-masked
- * data.
+ * \brief Create a new regdata object that contains only the
+ * non-masked data.
*
* The non-masked data is extracted according to the data in the
* regdata::masked_data array. The resulting regdata::nids corresponds
Modified: pkg/ProbABEL/src/regdata.h
===================================================================
--- pkg/ProbABEL/src/regdata.h 2014-04-28 16:28:42 UTC (rev 1711)
+++ pkg/ProbABEL/src/regdata.h 2014-04-28 16:59:21 UTC (rev 1712)
@@ -55,7 +55,7 @@
int nids; /**< Number of IDs/samples. */
/**
- * Number of covariates + possible nr of genomic predictors.
+ * \brief Number of covariates + possible nr of genomic predictors.
*
* If snpnum >=0 then this equals the number of covariates + the
* number of regdata::ngpreds.
@@ -63,19 +63,18 @@
int ncov;
/**
- * Number of genomic predictors, 1 for dosage data, 2 for
+ * \brief Number of genomic predictors, 1 for dosage data, 2 for
* probability data.
- *
*/
int ngpreds;
/**
- * Number of outcomes, taken from phedata::noutcomes.
+ * \brief Number of outcomes, taken from phedata::noutcomes.
*/
int noutcomes;
/**
- * Boolean that indicates whether the command line option
+ * \brief Boolean that indicates whether the command line option
* --interaction_only was set.
*
* See cmdvars::is_interaction_excluded.
@@ -83,8 +82,8 @@
bool is_interaction_excluded;
/**
- * A vector that contains ones/zeros to indicate which data points
- * should be omitted because no genetic data is present.
+ * \brief A vector that contains ones/zeros to indicate which data
+ * points should be omitted because no genetic data is present.
*
* The vector is regdata::nids long. A value of 1 or 'true' means
* that that ID/sample will be masked because the SNP data is NA
@@ -93,26 +92,26 @@
std::vector<bool> masked_data;
/**
- * Number of non-masked genotypes.
+ * \brief Number of non-masked genotypes.
*/
unsigned int gcount;
/**
- * Allele frequency.
+ * \brief Allele frequency.
*
* Calculation is only based on non-masked SNPs.
*/
double freq;
/**
- * The design matrix.
+ * \brief The design matrix.
*
* The matrix dimensions are regdata::nids x (regdata::ncov + 1)
*/
mematrix<double> X;
/**
- * Matrix containing the phenotype data.
+ * \brief Matrix containing the phenotype data.
*
* The matrix dimensions are regdata::nids x
* regdata::noutcomes.
More information about the Genabel-commits
mailing list