[Genabel-commits] r1718 - in branches/ProbABEL-pvals/ProbABEL: . checks/R-tests doc src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Apr 30 14:16:36 CEST 2014
Author: lckarssen
Date: 2014-04-30 14:16:35 +0200 (Wed, 30 Apr 2014)
New Revision: 1718
Removed:
branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp
branches/ProbABEL-pvals/ProbABEL/src/cholesky.h
branches/ProbABEL-pvals/ProbABEL/src/mematri1.h
branches/ProbABEL-pvals/ProbABEL/src/mematrix.h
Modified:
branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am
branches/ProbABEL-pvals/ProbABEL/configure.ac
branches/ProbABEL-pvals/ProbABEL/doc/INSTALL
branches/ProbABEL-pvals/ProbABEL/src/Makefile.am
branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp
branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h
branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp
branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h
branches/ProbABEL-pvals/ProbABEL/src/data.cpp
branches/ProbABEL-pvals/ProbABEL/src/eigen_mematrix.cpp
branches/ProbABEL-pvals/ProbABEL/src/eigen_mematrix.h
branches/ProbABEL-pvals/ProbABEL/src/fvlib
branches/ProbABEL-pvals/ProbABEL/src/gendata.cpp
branches/ProbABEL-pvals/ProbABEL/src/gendata.h
branches/ProbABEL-pvals/ProbABEL/src/main.cpp
branches/ProbABEL-pvals/ProbABEL/src/main_functions_dump.cpp
branches/ProbABEL-pvals/ProbABEL/src/maskedmatrix.cpp
branches/ProbABEL-pvals/ProbABEL/src/maskedmatrix.h
branches/ProbABEL-pvals/ProbABEL/src/phedata.cpp
branches/ProbABEL-pvals/ProbABEL/src/phedata.h
branches/ProbABEL-pvals/ProbABEL/src/reg1.cpp
branches/ProbABEL-pvals/ProbABEL/src/reg1.h
branches/ProbABEL-pvals/ProbABEL/src/regdata.cpp
branches/ProbABEL-pvals/ProbABEL/src/regdata.h
branches/ProbABEL-pvals/ProbABEL/src/testchol.cpp
branches/ProbABEL-pvals/ProbABEL/src/usage.cpp
Log:
Merged trunk r1716 into the ProbABEL-pvals branch.
Code compiles, but doesn't pass all checks yet (it didn't pass them before this commit either, so nothing's lost).
Modified: branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am 2014-04-30 12:16:35 UTC (rev 1718)
@@ -30,11 +30,6 @@
## The palogist R test still doesn't run correctly.
XFAIL_TESTS = run_R_test_palogist.sh
-## The pacoxph R test fails on SNP 6 when EIGEN is not enabled.
-if !WITH_EIGEN
-XFAIL_TESTS += run_R_test_pacox.sh
-endif
-
EXTRA_DIST = $(check_SCRIPTS) $(R_test_files)
Modified: branches/ProbABEL-pvals/ProbABEL/configure.ac
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/configure.ac 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/configure.ac 2014-04-30 12:16:35 UTC (rev 1718)
@@ -19,20 +19,45 @@
AM_PROG_CC_C_O
AC_PROG_INSTALL
AC_PROG_LN_S
-if test -z "$CXXFLAGS"; then
- # User did not set CXXFLAGS, so we can put in our own defaults
- CXXFLAGS="-g -O2 -DNDEBUG -Wextra"
+
+
+# Add an option to enable some default compiler options used during
+# development
+AC_ARG_ENABLE([dev-options],
+ [AS_HELP_STRING([--enable-dev-options], [enable compile options
+ for developers])],
+ [devoptions=yes],
+ [devoptions=no])
+
+if test "x$devoptions" = "xyes"; then
+ AC_MSG_NOTICE([Building with developer options enabled])
+ if test -z "$CXXFLAGS"; then
+ # User did not set CXXFLAGS, so we can put in our own defaults
+ CXXFLAGS=""
+ fi
+ if test -z "$CPPFLAGS"; then
+ # User did not set CPPFLAGS, so we can put in our own defaults
+ CPPFLAGS="-Wall"
+ fi
+ CXXFLAGS+=" -g -Wextra"
fi
-if test -z "$CPPFLAGS"; then
- # User did not set CPPFLAGS, so we can put in our own defaults
- CPPFLAGS="-Wall"
+
+if test "x$devoptions" = "xno"; then
+ if test -z "$CXXFLAGS"; then
+ # User did not set CXXFLAGS, so we can put in our own defaults
+ CXXFLAGS="-O2 -DNDEBUG"
+ fi
+ if test -z "$CPPFLAGS"; then
+ # User did not set CPPFLAGS, so we can put in our own defaults
+ CPPFLAGS+="-Wall"
+ fi
fi
+
# If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
# with its own defaults
AC_PROG_CXX
-
-#Tell compiler to build not R version of filevector
+# Tell the compiler to build not R version of filevector
CXXFLAGS+=" -D_NOT_R_FILEVECTOR"
@@ -48,40 +73,22 @@
stdint.h stdlib.h string.h sys/param.h \
wchar.h wctype.h])
+# Option that lets the user specify a path to the EIGEN include files.
+AC_ARG_WITH([eigen-include-path],
+ [AS_HELP_STRING([--with-eigen-include-path],
+ [location of the Eigen headers, defaults to /usr/include/eigen3])],
+ [CXXFLAGS+=" -I${withval}"
+ CPPFLAGS+=" -I${withval}"],
+ [CXXFLAGS+=' -I/usr/include/eigen3'
+ CPPFLAGS+=' -I/usr/include/eigen3'])
-# See if we want use of the Eigen library enabled (yes by default) and if so,
-# whether we can find the library files.
-AC_ARG_WITH([eigen],
- AS_HELP_STRING([--with-eigen], [Use the Eigen template library for fast \
- linear algebra (Eigen can be downloaded \
- from eigen.tuxfamily.org); this is enabled \
- by default]
- )
-)
+# Check for the EIGEN header files
+AC_CHECK_HEADERS([Eigen/Dense Eigen/LU])
-if test "x$with_eigen" != "xno"; then
- AC_MSG_NOTICE([building using the Eigen headers enabled])
-
- AC_ARG_WITH([eigen-include-path],
- [AS_HELP_STRING([--with-eigen-include-path],
- [location of the Eigen headers, defaults to /usr/include/eigen3])],
- [CXXFLAGS+=" -I${withval}"
- CPPFLAGS+=" -I${withval}"],
- [CXXFLAGS+=' -I/usr/include/eigen3'
- CPPFLAGS+=' -I/usr/include/eigen3'])
-
- # Check for the EIGEN header files
- AC_CHECK_HEADERS([Eigen/Dense Eigen/LU])
-
- if test x$ac_cv_header_Eigen_Dense = xno; then
- AC_MSG_ERROR([Could not find the Eigen header files. Did you specify \
---with-eigen-include-path correctly? Or use --without-eigen \
-to disable use of fast linear algebra.])
- fi
-else
- AC_MSG_NOTICE([not using Eigen for linear algebra])
+if test x$ac_cv_header_Eigen_Dense = xno; then
+ AC_MSG_ERROR([Could not find the Eigen header files. Did you specify \
+--with-eigen-include-path correctly?])
fi
-AM_CONDITIONAL([WITH_EIGEN], test "x$with_eigen" != "xno")
# See if we can use the Boost libraries for calculating p-values
Modified: branches/ProbABEL-pvals/ProbABEL/doc/INSTALL
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/doc/INSTALL 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/doc/INSTALL 2014-04-30 12:16:35 UTC (rev 1718)
@@ -22,8 +22,10 @@
If ./configure cannot find the Eigen library it will say so. To
disable the use of Eigen (even if the library is present) use:
./configure --without-eigen
+ Note that we do not recommend building ProbABEL without EIGEN as
+ the non-EIGEN code is tested less well and we are preparing to
+ phase out the non-EIGEN code.
-
* Compiling for Linux
If you downloaded the source from SVN (this is not necessary when
installing from the distributed .tar.gz file), run:
@@ -49,7 +51,7 @@
probabel_config.cfg.example. For probabel to work please rename
this file to probabel_config.cfg.
- To see options, run
+ To see additional options, run
./configure --help
The most notable option is
@@ -64,6 +66,16 @@
* Options for developers
+ Some default development compiler flags can be enabled by running
+ configure like this:
+./configure --enable-dev-options
+ The actual development values of CXXFLAGS and CPPFLAGS can be seen in
+ configure.ac or by adding --disable-silent-rules to ./configure. If
+ you want to specify your own flags run configure like this:
+CXXFLAGS="-O3 -Wextra" CPPFLAGS="-Wall" ./configure
+ or later, during the make process:
+make CXXFLAGS="-O3 -Wextra" CPPFLAGS="-Wall"
+
To generate a .tar.gz package for distribution run:
./configure
make dist
Modified: branches/ProbABEL-pvals/ProbABEL/src/Makefile.am
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/Makefile.am 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/Makefile.am 2014-04-30 12:16:35 UTC (rev 1718)
@@ -3,14 +3,13 @@
## Using wildcards in these lists doesn't work. Also GNU make's
## ($wildcard,) doesn't work. It gives warning message about
## portability, but in the end doesn't work, I tried :-).
-REGFILES = data.h data.cpp gendata.h gendata.cpp mematrix.h \
- mematri1.h command_line_settings.h command_line_settings.cpp reg1.h \
- usage.h usage.cpp main.cpp utilities.h utilities.cpp phedata.h \
- phedata.cpp cholesky.h cholesky.cpp regdata.h regdata.cpp \
- maskedmatrix.cpp maskedmatrix.h reg1.cpp main_functions_dump.h \
+REGFILES = data.h data.cpp gendata.h gendata.cpp eigen_mematrix.h \
+ eigen_mematrix.cpp command_line_settings.h command_line_settings.cpp \
+ reg1.h usage.h usage.cpp main.cpp utilities.h utilities.cpp \
+ phedata.h phedata.cpp regdata.h regdata.cpp maskedmatrix.cpp \
+ maskedmatrix.h reg1.cpp main_functions_dump.h \
main_functions_dump.cpp
-EIGENFILES = eigen_mematrix.h eigen_mematrix.cpp
## Filevector files:
FVSRC = fvlib/AbstractMatrix.cpp fvlib/CastUtils.cpp \
@@ -55,28 +54,19 @@
palinear_SOURCES = $(REGFILES) $(FVSRC) $(FVHEADERS)
palinear_CXXFLAGS = -DLINEAR $(AM_CXXFLAGS)
palinear_CPPFLAGS = $(AM_CPPFLAGS)
-if WITH_EIGEN
palinear_SOURCES += $(EIGENFILES)
-palinear_CXXFLAGS += -DEIGEN
-endif
palogist_SOURCES = $(REGFILES) $(FVSRC) $(FVHEADERS)
palogist_CXXFLAGS = -DLOGISTIC $(AM_CXXFLAGS)
palogist_CPPFLAGS = $(AM_CPPFLAGS)
-if WITH_EIGEN
palogist_SOURCES += $(EIGENFILES)
-palogist_CXXFLAGS += -DEIGEN
-endif
pacoxph_SOURCES = $(COXSRC) $(REGFILES) $(FVSRC) $(FVHEADERS) \
$(RHEADERS) survS.h survproto.h coxph_data.h coxph_data.cpp
pacoxph_CXXFLAGS = -DCOXPH -I $(top_srcdir)/src/include $(AM_CXXFLAGS)
pacoxph_CPPFLAGS = $(AM_CPPFLAGS)
pacoxph_CFLAGS = -DCOXPH -I $(top_srcdir)/src/include $(AM_CFLAGS)
-if WITH_EIGEN
pacoxph_SOURCES += $(EIGENFILES)
-pacoxph_CXXFLAGS += -DEIGEN
-endif
if WITH_BOOST
palinear_CXXFLAGS += -DWITH_BOOST
Deleted: branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp 2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,154 +0,0 @@
-/*
- * cholesky.cpp
- *
- * Created on: Mar 15, 2012
- * Author: mkooyman
- */
-#include <string>
-#include <cstdarg>
-#include <cstdio>
-#include <cstdlib>
-
-#if EIGEN
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
-
-
-/* SCCS @(#)cholesky2.c 5.2 10/27/98
- ** subroutine to do Cholesky decompostion on a matrix: C = FDF'
- ** where F is lower triangular with 1's on the diagonal, and D is diagonal
- **
- ** arguments are:
- ** n the size of the matrix to be factored
- ** **matrix a ragged array containing an n by n submatrix to be factored
- ** toler the threshold value for detecting "singularity"
- **
- ** The factorization is returned in the lower triangle, D occupies the
- ** diagonal and the upper triangle is left undisturbed.
- ** The lower triangle need not be filled in at the start.
- **
- ** Return value: the rank of the matrix (non-negative definite), or -rank
- ** it not SPD or NND
- **
- ** If a column is deemed to be redundant, then that diagonal is set to zero.
- **
- ** Terry Therneau
- */
-
-// modified Yurii Aulchenko 2008-05-20
-int cholesky2_mm(mematrix<double> &matrix, double toler)
-{
- if (matrix.ncol != matrix.nrow)
- {
- fprintf(stderr, "cholesky2_mm: matrix should be square\n");
- exit(1);
- }
- int n = matrix.ncol;
- double temp;
- int i, j, k;
- double eps, pivot;
- int rank;
- int nonneg;
-
- nonneg = 1;
- eps = 0;
- for (i = 0; i < n; i++)
- {
- if (matrix[i * n + i] > eps)
- eps = matrix[i * n + i];
- for (j = (i + 1); j < n; j++)
- matrix[j * n + i] = matrix[i * n + j];
- }
- eps *= toler;
-
- rank = 0;
- for (i = 0; i < n; i++)
- {
- pivot = matrix[i * n + i];
- if (pivot < eps)
- {
- matrix[i * n + i] = 0;
- if (pivot < -8 * eps)
- nonneg = -1;
- }
- else
- {
- rank++;
- for (j = (i + 1); j < n; j++)
- {
- temp = matrix[j * n + i] / pivot;
- matrix[j * n + i] = temp;
- matrix[j * n + j] -= temp * temp * pivot;
- for (k = (j + 1); k < n; k++)
- matrix[k * n + j] -= temp * matrix[k * n + i];
- }
- }
- }
- return (rank * nonneg);
-}
-
-void chinv2_mm(mematrix<double> &matrix)
-{
- if (matrix.ncol != matrix.nrow)
- {
- fprintf(stderr, "cholesky2_mm: matrix should be square\n");
- exit(1);
- }
-
- int n = matrix.ncol;
- double temp;
- int i, j, k;
-
- /*
- ** invert the cholesky in the lower triangle
- ** take full advantage of the cholesky's diagonal of 1's
- */
- for (i = 0; i < n; i++)
- {
- if (matrix[i * n + i] > 0)
- {
- matrix[i * n + i] = 1 / matrix[i * n + i]; /*this line inverts D */
- for (j = (i + 1); j < n; j++)
- {
- matrix[j * n + i] = -matrix[j * n + i];
- for (k = 0; k < i; k++) /*sweep operator */
- matrix[j * n + k] += matrix[j * n + i] * matrix[i * n + k];
- }
- }
- }
-
- /*
- ** lower triangle now contains inverse of cholesky
- ** calculate F'DF (inverse of cholesky decomp process) to get inverse
- ** of original matrix
- */
- for (i = 0; i < n; i++)
- {
- if (matrix[i * n + i] == 0)
- { /* singular row */
- for (j = 0; j < i; j++)
- matrix[j * n + i] = 0;
- for (j = i; j < n; j++)
- matrix[i * n + j] = 0;
- }
- else
- {
- for (j = (i + 1); j < n; j++)
- {
- temp = matrix[j * n + i] * matrix[j * n + j];
- if (j != i)
- matrix[i * n + j] = temp;
- for (k = i; k < j; k++)
- matrix[i * n + k] += temp * matrix[j * n + k];
- }
- }
- }
- // ugly fix to return only inverse
- for (int col = 1; col < n; col++)
- for (int row = 0; row < col; row++)
- matrix[col * n + row] = matrix[row * n + col];
-}
Deleted: branches/ProbABEL-pvals/ProbABEL/src/cholesky.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/cholesky.h 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/cholesky.h 2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,21 +0,0 @@
-/*
- * cholesky.h
- *
- * Created on: Mar 15, 2012
- * Author: mkooyman
- */
-
-#ifndef CHOLESKY_H_
-#define CHOLESKY_H_
-
-#if EIGEN
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#endif
-
-int cholesky2_mm(mematrix<double> &matrix, double toler);
-void chinv2_mm(mematrix<double> &matrix);
-
-#endif /* CHOLESKY_H_ */
Modified: branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp 2014-04-30 12:16:35 UTC (rev 1718)
@@ -31,9 +31,7 @@
#include <iostream>
#include "usage.h"
#include "command_line_settings.h"
-#if EIGEN
#include "eigen_mematrix.h"
-#endif
// config.h and fvlib/FileVector.h are included for the upper case variables
#if HAVE_CONFIG_H
@@ -152,6 +150,12 @@
}
+/**
+ * Process the command line arguments and save them in a cmdvars object.
+ *
+ * @param argc Number of command line arguments
+ * @param argv Values of the command line arguments
+ */
void cmdvars::set_variables(int argc, char * argv[])
{
int next_option;
Modified: branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h 2014-04-30 12:16:35 UTC (rev 1718)
@@ -3,8 +3,8 @@
*
* Created on: Apr 2; int 2012
* Author: mkooyman
-*
*
+ *
* Copyright (C) 2009--2014 Various members of the GenABEL team. See
* the SVN commit logs for more details.
*
Modified: branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp 2014-04-30 12:16:35 UTC (rev 1718)
@@ -28,6 +28,7 @@
#include "coxph_data.h"
#include <iostream>
+#include <algorithm>
#include <cmath>
extern "C" {
#include "survproto.h"
@@ -44,7 +45,10 @@
#include "fvlib/Logger.h"
#include "fvlib/Transposer.h"
-// compare for sort of times
+/**
+ * Definition of the compare function used for the sort of times. Used
+ * by the qsort() function.
+ */
int cmpfun(const void *a, const void *b)
{
double el1 = *(double*) a;
@@ -66,34 +70,63 @@
return -9;
}
-coxph_data::coxph_data(const coxph_data &obj) : sstat(obj.sstat),
+
+/**
+ * Constructor. Initialises all values to zero.
+ */
+coxph_data::coxph_data()
+{
+ nids = 0;
+ ncov = 0;
+ ngpreds = 0;
+ gcount = 0;
+ freq = 0;
+}
+
+
+/**
+ * Copy constructor. Creates a coxph_data object by copying the values
+ * of another one.
+ *
+ * \param obj Reference to the coxph_data object to be copied to the new
+ * object.
+ */
+coxph_data::coxph_data(const coxph_data &obj) : X(obj.X),
+ stime(obj.stime),
+ sstat(obj.sstat),
+ weights(obj.weights),
offset(obj.offset),
strata(obj.strata),
- X(obj.X),
- order(obj.order)
+ order(obj.order),
+ masked_data(obj.masked_data)
{
nids = obj.nids;
ncov = obj.ncov;
ngpreds = obj.ngpreds;
- weights = obj.weights;
- stime = obj.stime;
gcount = 0;
freq = 0;
- masked_data = new unsigned short int[nids];
-
- std::copy(obj.masked_data, obj.masked_data+nids,masked_data);
}
-coxph_data::coxph_data(phedata &phed, gendata &gend, const int snpnum)
+
+/**
+ * Constructor that fills a coxph_data object with phenotype and genotype
+ * data.
+ *
+ * @param phed Reference to a phedata object with phenotype data
+ * @param gend Reference to a gendata object with genotype data
+ * @param snpnum The number of the SNP in the genotype data object to
+ * be added to the design matrix regdata::X. When set to a number < 0
+ * no SNP data is added to the design matrix (e.g. when calculating
+ * the null model).
+ */
+coxph_data::coxph_data(const phedata &phed, const gendata &gend,
+ const int snpnum)
{
freq = 0;
gcount = 0;
nids = gend.nids;
- masked_data = new unsigned short int[nids];
+ masked_data = std::vector<bool>(nids, false);
-
- std::fill(masked_data,masked_data+nids,0);
-
ngpreds = gend.ngpreds;
if (snpnum >= 0)
{
@@ -120,7 +153,6 @@
order.reinit(nids, 1);
for (int i = 0; i < nids; i++)
{
- // X.put(1.,i,0);
stime[i] = (phed.Y).get(i, 0);
sstat[i] = static_cast<int>((phed.Y).get(i, 1));
if (sstat[i] != 1 && sstat[i] != 0)
@@ -162,7 +194,7 @@
// sort by time
double *tmptime = new double[nids];
int *passed_sorted = new int[nids];
- std::fill (passed_sorted,passed_sorted+nids,0);
+ std::fill(passed_sorted, passed_sorted + nids, 0);
for (int i = 0; i < nids; i++)
@@ -214,35 +246,49 @@
delete[] passed_sorted;
}
-void coxph_data::update_snp(gendata *gend, const int snpnum) {
- /**
+
+/**
+ * \brief Update the SNP dosages/probabilities in the design matrix
+ * coxph_data::X.
+ *
+ * Adds the genetic information for a new SNP to the design
+ * matrix.
+ *
+ * @param gend Object that contains the genetic data from which the
+ * dosages/probabilities will be added to the design matrix.
+ * @param snpnum Number of the SNP for which the dosage/probability
+ * data will be extracted from the gend object.
+ */
+void coxph_data::update_snp(const gendata *gend, const int snpnum) {
+ /*
* This is the main part of the fix of bug #1846
* (C) of the fix:
* UMC St Radboud Nijmegen,
* Dept of Epidemiology & Biostatistics,
* led by Prof. B. Kiemeney
- *
+ */
+ /**
* Note this sorts by "order"!!!
* Here we deal with transposed X, hence last two arguments are swapped
- * compared to the other 'update_snp'
+ * compared to regdata::update_snp().
* Also, the starting column-1 is not necessary for cox X therefore
* 'ncov-j' changes to 'ncov-j-1'
- **/
+ */
- // reset counter for frequency since it is a new snp
+ // reset counter for frequency since it is a new SNP
gcount = 0;
freq = 0.0;
for (int j = 0; j < ngpreds; j++) {
double *snpdata = new double[nids];
- std::fill (masked_data,masked_data+nids,0);
+ masked_data = std::vector<bool>(nids, false);
gend->get_var(snpnum * ngpreds + j, snpdata);
for (int i = 0; i < nids; i++) {
X.put(snpdata[i], (ncov - j - 1), order[i]);
if (std::isnan(snpdata[i])) {
- masked_data[order[i]] = 1;
+ masked_data[order[i]] = true;
// snp not masked
} else {
// check for first predictor
@@ -254,24 +300,26 @@
freq += snpdata[i];
}
} else if (j == 1) {
- // add second genotype in two predicor data form
+ // Add second genotype in two predicor data form
freq += snpdata[i] * 0.5;
}
} // end std::isnan(snpdata[i]) snp
- } // end i for loop
+ } // end for loop: i=0 to nids
delete[] snpdata;
- } // end ngpreds loop
+ } // End for loop: j=0 to ngpreds
freq /= static_cast<double>(gcount); // Allele frequency
}
/**
- * update_snp() adds SNP information to the design matrix. This
- * function allows you to strip that information from X again. This
- * is used for example when calculating the null model.
+ * \brief Remove SNP information from the design matrix coxph_data::X.
*
+ * coxph_data::update_snp() adds SNP information to the design
+ * matrix. This function allows you to strip that information from X
+ * again. This is used for example when calculating the null model.
+ *
*/
void coxph_data::remove_snp_from_X()
{
@@ -292,27 +340,25 @@
}
-coxph_data::~coxph_data()
+/**
+ * \brief Create a new coxph_data object that contains only the
+ * non-masked data.
+ *
+ * The non-masked data is extracted according to the data in the
+ * regdata::masked_data array. The resulting regdata::nids corresponds
+ * to the number of IDs for which genotype data is present.
+ *
+ * Note that the regdata::masked_data array of the new object should
+ * contain only zeros (i.e. not masked).
+ *
+ * @return A new regdata object containing only the rows from
+ * regdata::X and regdata::Y for which genotype data is present.
+ */
+coxph_data coxph_data::get_unmasked_data() const
{
- delete[] coxph_data::masked_data;
- // delete X;
- // delete sstat;
- // delete stime;
- // delete weights;
- // delete offset;
- // delete strata;
- // delete order;
-}
+ coxph_data to;
-
-coxph_data coxph_data::get_unmasked_data()
-{
- coxph_data to; // = coxph_data(*this);
-
- // filter missing data
-
- int nmeasured=std::count (masked_data, masked_data+nids, 0);
-
+ int nmeasured = std::count(masked_data.begin(), masked_data.end(), 0);
to.nids = nmeasured;
to.ncov = ncov;
to.ngpreds = ngpreds;
@@ -344,14 +390,12 @@
}
}
- //delete [] to.masked_data;
- to.masked_data = new unsigned short int[to.nids];
- std::fill(to.masked_data,to.masked_data+to.nids,0);
+ to.masked_data = masked_data;
return (to);
}
-coxph_reg::coxph_reg(coxph_data &cdatain)
+coxph_reg::coxph_reg(const coxph_data &cdatain)
{
coxph_data cdata = cdatain.get_unmasked_data();
beta.reinit(cdata.X.nrow, 1);
@@ -363,7 +407,7 @@
}
-void coxph_reg::estimate(coxph_data &cdatain,
+void coxph_reg::estimate(const coxph_data &cdatain,
int maxiter, double eps,
double tol_chol, const int model,
const int interaction, const int ngpreds,
@@ -402,24 +446,14 @@
// R's coxph()
double sctest = 1.0;
- // When using Eigen coxfit2 needs to be called in a slightly
- // different way (i.e. the .data()-part needs to be added).
-#if EIGEN
coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
cdata.weights.data.data(), cdata.strata.data.data(),
means.data.data(), beta.data.data(), u.data.data(),
imat.data.data(), loglik_int, &flag, work, &eps, &tol_chol,
&sctest);
-#else
- coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data,
- cdata.sstat.data, X.data, newoffset.data,
- cdata.weights.data, cdata.strata.data,
- means.data, beta.data, u.data,
- imat.data, loglik_int, &flag, work, &eps, &tol_chol,
- &sctest);
-#endif
+
niter = maxiter;
// Check the results of the Cox fit; mirrored from the same checks
@@ -448,13 +482,12 @@
<< " setting beta and se to 'nan'\n";
setToZero = true;
} else {
-#if EIGEN
VectorXd ueigen = u.data;
MatrixXd imateigen = imat.data;
VectorXd infs = ueigen.transpose() * imateigen;
VectorXd betaeigen = beta.data;
if ( infs.norm() > eps ||
- infs.norm() > sqrt(eps) * betaeigen.norm() )
+ infs.norm() > sqrt(eps) * betaeigen.norm() )
{
cerr << "Warning for " << snpinfo.name[cursnp]
<< ": beta may be infinite,"
@@ -462,12 +495,6 @@
setToZero = true;
}
-#else
- cerr << "Warning for " << snpinfo.name[cursnp]
- << ": can't check for infinite betas."
- << " Please compile ProbABEL with Eigen support to fix this."
- << endl;
-#endif
}
for (int i = 0; i < X.nrow; i++)
Modified: branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h 2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,8 +1,15 @@
-/*
- * coxph_data.h
+/**
+ * \file coxph_data.h
+ * \author Yurii S. Aulchenko
+ * \author M. Kooyman
+ * \author L.C. Karssen
+ * \author Maksim V. Struchalin
*
+ * \brief Describes the coxph_data and coxph_reg classes used in for
+ * Cox Proportional Hazards regression. The coxph_data class is
+ * similar to the regdata class.
+ *
* Created on: Mar 31, 2012
- * Author: mkooyman
*
*
* Copyright (C) 2009--2014 Various members of the GenABEL team. See
@@ -29,57 +36,155 @@
#ifndef COXPH_DATA_H_
#define COXPH_DATA_H_
-#if EIGEN
#include "eigen_mematrix.h"
#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
#include "data.h"
#include "reg1.h"
#include "gendata.h"
#include "phedata.h"
+
+/**
+ * \brief A coxph_data object contains the data used for Cox PH
+ * regression.
+ *
+ * The Cox regression code in coxfit2.c requires all data to be sorted
+ * based on follow-up time, which is done in the constructor.
+ *
+ * A similar class for linear and logistic regression can be found in
+ * the regdata class.
+ */
class coxph_data {
public:
- coxph_data get_unmasked_data();
+ int nids; /**< \brief Number of IDs/samples. */
- coxph_data()
- {
- nids = 0;
- ncov = 0;
- ngpreds = 0;
- masked_data = NULL;
- gcount = 0;
- freq = 0;
- }
+ /**
+ * \brief Number of covariates + possible nr of genomic
+ * predictors.
+ *
+ * If snpnum >=0 then this equals the number of covariates + the
+ * number of regdata::ngpreds.
+ */
+ int ncov;
- coxph_data(const coxph_data &obj);
- coxph_data(phedata &phed, gendata &gend, const int snpnum);
- void update_snp(gendata *gend, const int snpnum);
- void remove_snp_from_X();
- ~coxph_data();
+ /**
+ * \brief Number of genomic predictors, 1 for dosage data, 2 for
+ * probability data.
+ */
+ int ngpreds;
- int nids;
- int ncov;
- int ngpreds;
+ /**
+ * \brief Number of non-masked genotypes.
+ */
unsigned int gcount;
+
+ /**
+ * \brief Allele frequency.
+ *
+ * Calculation is only based on non-masked SNPs.
+ */
double freq;
+
+ /**
+ * \brief The design matrix.
+ *
+ * The matrix dimensions are coxph_data::nids x coxph_data::ncov.
+ * After filling the matrix is it is time-sorted based on
+ * coxph_data::order. Note that coxfit2() expects the data to be
+ * in column-major order, so at the end of the constructor the
+ * matrix is transposed!
+ */
+ mematrix<double> X;
+
+ /**
+ * \brief A vector that contains the follow-up times for each
+ * individual, as read from the phenotype file.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
+ mematrix<double> stime;
+
+ /**
+ * \brief A vector containing the survival status of each
+ * individual, as read from the phenotype file.
+ *
+ * Values should be 0 or 1. The vector is coxph_data::nids
+ * long. After reading the data from the phenotype file this
+ * vector is time-sorted based on coxph_data::order.
+ */
+ mematrix<int> sstat;
+
+ /**
+ * \brief A vector containing case weights. These are set to 1.0
+ * in the constructor.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
mematrix<double> weights;
- mematrix<double> stime;
- mematrix<int> sstat;
+
+ /**
+ * \brief A vector containing an offset for the linear
+ * predictor. Set to 0.0 in the constructor. This is a mandatory
+ * input for coxfit2().
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
mematrix<double> offset;
- mematrix<int> strata;
- mematrix<double> X;
- mematrix<int> order;
- unsigned short int * masked_data;
+
+ /**
+ * \brief A vector containing strata. See coxfit2() for more
+ * details. All strata are set to 0 in the constructor.
+ *
+ * The vector is coxph_data::nids long. After reading the data
+ * from the phenotype file this vector is time-sorted based on
+ * coxph_data::order.
+ */
+ mematrix<int> strata;
+
+ /**
+ * \brief A vector used to store the follow-up times in ascending
+ * order. It is used to reorder the other vectors and
+ * coxph_data::X.
+ *
+ * The vector is coxph_data::nids long.
+ */
+ mematrix<int> order;
+
+ /**
+ * \brief A vector that contains ones/zeros to indicate which data points
+ * should be omitted because no genetic data is present.
+ *
+ * The vector is regdata::nids long. A value of 1 or 'true' means
+ * that that ID/sample will be masked because the SNP data is NA
+ * for that ID.
+ */
+ std::vector<bool> masked_data;
+
+
+ // Constructors and destructors
+ coxph_data();
+ coxph_data(const coxph_data &obj);
+ coxph_data(const phedata &phed, const gendata &gend, const int snpnum);
+ // ~coxph_data();
+
+
+ // Member functions
+ coxph_data get_unmasked_data() const;
+ void update_snp(const gendata *gend, const int snpnum);
+ void remove_snp_from_X();
};
class coxph_reg {
public:
+ // Variables
mematrix<double> beta;
mematrix<double> sebeta;
mematrix<double> residuals;
@@ -88,8 +193,13 @@
double chi2_score;
int niter;
- coxph_reg(coxph_data &cdatain);
- void estimate(coxph_data &cdatain, int maxiter,
+
+ // Constructors and destructors
+ coxph_reg(const coxph_data &cdatain);
+
+
+ // Member functions
+ void estimate(const coxph_data &cdatain, int maxiter,
double eps, double tol_chol, const int model,
const int interaction, const int ngpreds, const bool iscox,
const int nullmodel, const mlinfo &snpinfo, const int cursnp);
Modified: branches/ProbABEL-pvals/ProbABEL/src/data.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/data.cpp 2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/data.cpp 2014-04-30 12:16:35 UTC (rev 1718)
@@ -38,54 +38,11 @@
#include "gendata.h"
#include "data.h"
-#if EIGEN
#include "eigen_mematrix.h"
#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
#include "utilities.h"
-//TODO(unknown) This function is not used. Remove in near future
-//unsigned int Nmeasured(char * fname, int nphenocols, int npeople)
-//{
-//// first pass -- find unmeasured people
-// std::ifstream infile(fname);
-// if (!infile)
-// {
-// std::cerr << "Nmeasured: cannot open file " << fname << endl;
-// }
-// char tmp[100];
-//
-// for (int i = 0; i < nphenocols; i++)
-// {
-// infile >> tmp;
-// }
-//
-// unsigned short int * allmeasured = new unsigned short int[npeople];
-// int nids = 0;
-// for (int i = 0; i < npeople; i++)
-// {
-// allmeasured[i] = 1;
-// infile >> tmp;
-// for (int j = 1; j < nphenocols; j++)
-// {
-// infile >> tmp;
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1718
More information about the Genabel-commits
mailing list