[Genabel-commits] r1718 - in branches/ProbABEL-pvals/ProbABEL: . checks/R-tests doc src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Apr 30 14:16:36 CEST 2014


Author: lckarssen
Date: 2014-04-30 14:16:35 +0200 (Wed, 30 Apr 2014)
New Revision: 1718

Removed:
   branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp
   branches/ProbABEL-pvals/ProbABEL/src/cholesky.h
   branches/ProbABEL-pvals/ProbABEL/src/mematri1.h
   branches/ProbABEL-pvals/ProbABEL/src/mematrix.h
Modified:
   branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am
   branches/ProbABEL-pvals/ProbABEL/configure.ac
   branches/ProbABEL-pvals/ProbABEL/doc/INSTALL
   branches/ProbABEL-pvals/ProbABEL/src/Makefile.am
   branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp
   branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h
   branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp
   branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h
   branches/ProbABEL-pvals/ProbABEL/src/data.cpp
   branches/ProbABEL-pvals/ProbABEL/src/eigen_mematrix.cpp
   branches/ProbABEL-pvals/ProbABEL/src/eigen_mematrix.h
   branches/ProbABEL-pvals/ProbABEL/src/fvlib
   branches/ProbABEL-pvals/ProbABEL/src/gendata.cpp
   branches/ProbABEL-pvals/ProbABEL/src/gendata.h
   branches/ProbABEL-pvals/ProbABEL/src/main.cpp
   branches/ProbABEL-pvals/ProbABEL/src/main_functions_dump.cpp
   branches/ProbABEL-pvals/ProbABEL/src/maskedmatrix.cpp
   branches/ProbABEL-pvals/ProbABEL/src/maskedmatrix.h
   branches/ProbABEL-pvals/ProbABEL/src/phedata.cpp
   branches/ProbABEL-pvals/ProbABEL/src/phedata.h
   branches/ProbABEL-pvals/ProbABEL/src/reg1.cpp
   branches/ProbABEL-pvals/ProbABEL/src/reg1.h
   branches/ProbABEL-pvals/ProbABEL/src/regdata.cpp
   branches/ProbABEL-pvals/ProbABEL/src/regdata.h
   branches/ProbABEL-pvals/ProbABEL/src/testchol.cpp
   branches/ProbABEL-pvals/ProbABEL/src/usage.cpp
Log:
Merged trunk r1716 into the ProbABEL-pvals branch.

Code compiles, but doesn't pass all checks yet (it didn't pass them before this commit either, so nothing's lost).


Modified: branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/checks/R-tests/Makefile.am	2014-04-30 12:16:35 UTC (rev 1718)
@@ -30,11 +30,6 @@
 ## The palogist R test still doesn't run correctly.
 XFAIL_TESTS = run_R_test_palogist.sh
 
-## The pacoxph R test fails on SNP 6 when EIGEN is not enabled.
-if !WITH_EIGEN
-XFAIL_TESTS += run_R_test_pacox.sh
-endif
-
 EXTRA_DIST = $(check_SCRIPTS) $(R_test_files)
 
 

Modified: branches/ProbABEL-pvals/ProbABEL/configure.ac
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/configure.ac	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/configure.ac	2014-04-30 12:16:35 UTC (rev 1718)
@@ -19,20 +19,45 @@
 AM_PROG_CC_C_O
 AC_PROG_INSTALL
 AC_PROG_LN_S
-if test -z "$CXXFLAGS"; then
-   # User did not set CXXFLAGS, so we can put in our own defaults
-    CXXFLAGS="-g -O2 -DNDEBUG -Wextra"
+
+
+# Add an option to enable some default compiler options used during
+# development
+AC_ARG_ENABLE([dev-options],
+    [AS_HELP_STRING([--enable-dev-options], [enable compile options
+    for developers])],
+    [devoptions=yes],
+    [devoptions=no])
+
+if test "x$devoptions" = "xyes"; then
+   AC_MSG_NOTICE([Building with developer options enabled])
+   if test -z "$CXXFLAGS"; then
+      # User did not set CXXFLAGS, so we can put in our own defaults
+      CXXFLAGS=""
+   fi
+   if test -z "$CPPFLAGS"; then
+      # User did not set CPPFLAGS, so we can put in our own defaults
+      CPPFLAGS="-Wall"
+   fi
+   CXXFLAGS+=" -g -Wextra"
 fi
-if test -z "$CPPFLAGS"; then
-   # User did not set CPPFLAGS, so we can put in our own defaults
-    CPPFLAGS="-Wall"
+
+if test "x$devoptions" = "xno"; then
+   if test -z "$CXXFLAGS"; then
+      # User did not set CXXFLAGS, so we can put in our own defaults
+      CXXFLAGS="-O2 -DNDEBUG"
+   fi
+   if test -z "$CPPFLAGS"; then
+      # User did not set CPPFLAGS, so we can put in our own defaults
+      CPPFLAGS+="-Wall"
+   fi
 fi
+
 # If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
 # with its own defaults
 AC_PROG_CXX
 
-
-#Tell compiler to build not R version of filevector
+# Tell the compiler to build not R version of filevector
 CXXFLAGS+=" -D_NOT_R_FILEVECTOR"
 
 
@@ -48,40 +73,22 @@
                           stdint.h stdlib.h string.h sys/param.h \
                           wchar.h wctype.h])
 
+# Option that lets the user specify a path to the EIGEN include files.
+AC_ARG_WITH([eigen-include-path],
+    [AS_HELP_STRING([--with-eigen-include-path],
+      [location of the Eigen headers, defaults to /usr/include/eigen3])],
+    [CXXFLAGS+=" -I${withval}"
+    CPPFLAGS+=" -I${withval}"],
+    [CXXFLAGS+=' -I/usr/include/eigen3'
+    CPPFLAGS+=' -I/usr/include/eigen3'])
 
-# See if we want use of the Eigen library enabled (yes by default) and if so,
-# whether we can find the library files.
-AC_ARG_WITH([eigen],
- AS_HELP_STRING([--with-eigen], [Use the Eigen template library for fast \
-                                    linear algebra (Eigen can be downloaded \
-                                    from eigen.tuxfamily.org); this is enabled \
-                                    by default]
-               )
-)
+# Check for the EIGEN header files
+AC_CHECK_HEADERS([Eigen/Dense Eigen/LU])
 
-if test "x$with_eigen" != "xno"; then
-    AC_MSG_NOTICE([building using the Eigen headers enabled])
-
-    AC_ARG_WITH([eigen-include-path],
-        [AS_HELP_STRING([--with-eigen-include-path],
-          [location of the Eigen headers, defaults to /usr/include/eigen3])],
-        [CXXFLAGS+=" -I${withval}"
-        CPPFLAGS+=" -I${withval}"],
-        [CXXFLAGS+=' -I/usr/include/eigen3'
-        CPPFLAGS+=' -I/usr/include/eigen3'])
-
-    # Check for the EIGEN header files
-    AC_CHECK_HEADERS([Eigen/Dense Eigen/LU])
-
-    if test x$ac_cv_header_Eigen_Dense = xno; then
-      AC_MSG_ERROR([Could not find the Eigen header files. Did you specify \
---with-eigen-include-path correctly? Or use --without-eigen \
-to disable use of fast linear algebra.])
-    fi
-else
-   AC_MSG_NOTICE([not using Eigen for linear algebra])
+if test x$ac_cv_header_Eigen_Dense = xno; then
+  AC_MSG_ERROR([Could not find the Eigen header files. Did you specify \
+--with-eigen-include-path correctly?])
 fi
-AM_CONDITIONAL([WITH_EIGEN], test "x$with_eigen" != "xno")
 
 
 # See if we can use the Boost libraries for calculating p-values

Modified: branches/ProbABEL-pvals/ProbABEL/doc/INSTALL
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/doc/INSTALL	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/doc/INSTALL	2014-04-30 12:16:35 UTC (rev 1718)
@@ -22,8 +22,10 @@
   If ./configure cannot find the Eigen library it will say so. To
   disable the use of Eigen (even if the library is present) use:
 ./configure --without-eigen
+  Note that we do not recommend building ProbABEL without EIGEN as
+  the non-EIGEN code is tested less well and we are preparing to
+  phase out the non-EIGEN code.
 
-
 * Compiling for Linux
   If you downloaded the source from SVN (this is not necessary when
   installing from the distributed .tar.gz file), run:
@@ -49,7 +51,7 @@
   probabel_config.cfg.example. For probabel to work please rename
   this file to probabel_config.cfg.
 
-  To see options, run
+  To see additional options, run
 ./configure --help
 
   The most notable option is
@@ -64,6 +66,16 @@
 
 
 * Options for developers
+  Some default development compiler flags can be enabled by running
+  configure like this:
+./configure --enable-dev-options
+  The actual development values of CXXFLAGS and CPPFLAGS can be seen in
+  configure.ac or by adding --disable-silent-rules to ./configure. If
+  you want to specify your own flags run configure like this:
+CXXFLAGS="-O3 -Wextra" CPPFLAGS="-Wall" ./configure
+  or later, during the make process:
+make CXXFLAGS="-O3 -Wextra" CPPFLAGS="-Wall"
+
   To generate a .tar.gz package for distribution run:
 ./configure
 make dist

Modified: branches/ProbABEL-pvals/ProbABEL/src/Makefile.am
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/Makefile.am	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/Makefile.am	2014-04-30 12:16:35 UTC (rev 1718)
@@ -3,14 +3,13 @@
 ## Using wildcards in these lists doesn't work. Also GNU make's
 ## ($wildcard,) doesn't work. It gives warning message about
 ## portability, but in the end doesn't work, I tried :-).
-REGFILES = data.h data.cpp gendata.h gendata.cpp mematrix.h		\
- mematri1.h command_line_settings.h command_line_settings.cpp reg1.h	\
- usage.h usage.cpp main.cpp utilities.h utilities.cpp phedata.h		\
- phedata.cpp cholesky.h cholesky.cpp regdata.h regdata.cpp		\
- maskedmatrix.cpp maskedmatrix.h reg1.cpp main_functions_dump.h		\
+REGFILES = data.h data.cpp gendata.h gendata.cpp eigen_mematrix.h	\
+ eigen_mematrix.cpp command_line_settings.h command_line_settings.cpp	\
+ reg1.h usage.h usage.cpp main.cpp utilities.h utilities.cpp		\
+ phedata.h phedata.cpp regdata.h regdata.cpp maskedmatrix.cpp		\
+ maskedmatrix.h reg1.cpp main_functions_dump.h				\
  main_functions_dump.cpp
 
-EIGENFILES = eigen_mematrix.h eigen_mematrix.cpp
 
 ## Filevector files:
 FVSRC = fvlib/AbstractMatrix.cpp fvlib/CastUtils.cpp			\
@@ -55,28 +54,19 @@
 palinear_SOURCES = $(REGFILES) $(FVSRC) $(FVHEADERS)
 palinear_CXXFLAGS = -DLINEAR $(AM_CXXFLAGS)
 palinear_CPPFLAGS = $(AM_CPPFLAGS)
-if WITH_EIGEN
 palinear_SOURCES += $(EIGENFILES)
-palinear_CXXFLAGS += -DEIGEN
-endif
 
 palogist_SOURCES = $(REGFILES) $(FVSRC) $(FVHEADERS)
 palogist_CXXFLAGS = -DLOGISTIC $(AM_CXXFLAGS)
 palogist_CPPFLAGS = $(AM_CPPFLAGS)
-if WITH_EIGEN
 palogist_SOURCES += $(EIGENFILES)
-palogist_CXXFLAGS += -DEIGEN
-endif
 
 pacoxph_SOURCES = $(COXSRC) $(REGFILES) $(FVSRC) $(FVHEADERS)	\
  $(RHEADERS) survS.h survproto.h coxph_data.h coxph_data.cpp
 pacoxph_CXXFLAGS = -DCOXPH -I $(top_srcdir)/src/include $(AM_CXXFLAGS)
 pacoxph_CPPFLAGS = $(AM_CPPFLAGS)
 pacoxph_CFLAGS = -DCOXPH -I $(top_srcdir)/src/include $(AM_CFLAGS)
-if WITH_EIGEN
 pacoxph_SOURCES += $(EIGENFILES)
-pacoxph_CXXFLAGS += -DEIGEN
-endif
 
 if WITH_BOOST
 palinear_CXXFLAGS += -DWITH_BOOST

Deleted: branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/cholesky.cpp	2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,154 +0,0 @@
-/*
- * cholesky.cpp
- *
- *  Created on: Mar 15, 2012
- *      Author: mkooyman
- */
-#include <string>
-#include <cstdarg>
-#include <cstdio>
-#include <cstdlib>
-
-#if EIGEN
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
-
-
-/*  SCCS @(#)cholesky2.c    5.2 10/27/98
- ** subroutine to do Cholesky decompostion on a matrix: C = FDF'
- **   where F is lower triangular with 1's on the diagonal, and D is diagonal
- **
- ** arguments are:
- **     n         the size of the matrix to be factored
- **     **matrix  a ragged array containing an n by n submatrix to be factored
- **     toler     the threshold value for detecting "singularity"
- **
- **  The factorization is returned in the lower triangle, D occupies the
- **    diagonal and the upper triangle is left undisturbed.
- **    The lower triangle need not be filled in at the start.
- **
- **  Return value:  the rank of the matrix (non-negative definite), or -rank
- **     it not SPD or NND
- **
- **  If a column is deemed to be redundant, then that diagonal is set to zero.
- **
- **   Terry Therneau
- */
-
-// modified Yurii Aulchenko 2008-05-20
-int cholesky2_mm(mematrix<double> &matrix, double toler)
-{
-    if (matrix.ncol != matrix.nrow)
-    {
-        fprintf(stderr, "cholesky2_mm: matrix should be square\n");
-        exit(1);
-    }
-    int n = matrix.ncol;
-    double temp;
-    int i, j, k;
-    double eps, pivot;
-    int rank;
-    int nonneg;
-
-    nonneg = 1;
-    eps = 0;
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] > eps)
-            eps = matrix[i * n + i];
-        for (j = (i + 1); j < n; j++)
-            matrix[j * n + i] = matrix[i * n + j];
-    }
-    eps *= toler;
-
-    rank = 0;
-    for (i = 0; i < n; i++)
-    {
-        pivot = matrix[i * n + i];
-        if (pivot < eps)
-        {
-            matrix[i * n + i] = 0;
-            if (pivot < -8 * eps)
-                nonneg = -1;
-        }
-        else
-        {
-            rank++;
-            for (j = (i + 1); j < n; j++)
-            {
-                temp = matrix[j * n + i] / pivot;
-                matrix[j * n + i] = temp;
-                matrix[j * n + j] -= temp * temp * pivot;
-                for (k = (j + 1); k < n; k++)
-                    matrix[k * n + j] -= temp * matrix[k * n + i];
-            }
-        }
-    }
-    return (rank * nonneg);
-}
-
-void chinv2_mm(mematrix<double> &matrix)
-{
-    if (matrix.ncol != matrix.nrow)
-    {
-        fprintf(stderr, "cholesky2_mm: matrix should be square\n");
-        exit(1);
-    }
-
-    int n = matrix.ncol;
-    double temp;
-    int i, j, k;
-
-    /*
-     ** invert the cholesky in the lower triangle
-     **   take full advantage of the cholesky's diagonal of 1's
-     */
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] > 0)
-        {
-            matrix[i * n + i] = 1 / matrix[i * n + i]; /*this line inverts D */
-            for (j = (i + 1); j < n; j++)
-            {
-                matrix[j * n + i] = -matrix[j * n + i];
-                for (k = 0; k < i; k++) /*sweep operator */
-                    matrix[j * n + k] += matrix[j * n + i] * matrix[i * n + k];
-            }
-        }
-    }
-
-    /*
-     ** lower triangle now contains inverse of cholesky
-     ** calculate F'DF (inverse of cholesky decomp process) to get inverse
-     **   of original matrix
-     */
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] == 0)
-        { /* singular row */
-            for (j = 0; j < i; j++)
-                matrix[j * n + i] = 0;
-            for (j = i; j < n; j++)
-                matrix[i * n + j] = 0;
-        }
-        else
-        {
-            for (j = (i + 1); j < n; j++)
-            {
-                temp = matrix[j * n + i] * matrix[j * n + j];
-                if (j != i)
-                    matrix[i * n + j] = temp;
-                for (k = i; k < j; k++)
-                    matrix[i * n + k] += temp * matrix[j * n + k];
-            }
-        }
-    }
-    // ugly fix to return only inverse
-    for (int col = 1; col < n; col++)
-        for (int row = 0; row < col; row++)
-            matrix[col * n + row] = matrix[row * n + col];
-}

Deleted: branches/ProbABEL-pvals/ProbABEL/src/cholesky.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/cholesky.h	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/cholesky.h	2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,21 +0,0 @@
-/*
- * cholesky.h
- *
- *  Created on: Mar 15, 2012
- *      Author: mkooyman
- */
-
-#ifndef CHOLESKY_H_
-#define CHOLESKY_H_
-
-#if EIGEN
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#endif
-
-int cholesky2_mm(mematrix<double> &matrix, double toler);
-void chinv2_mm(mematrix<double> &matrix);
-
-#endif /* CHOLESKY_H_ */

Modified: branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.cpp	2014-04-30 12:16:35 UTC (rev 1718)
@@ -31,9 +31,7 @@
 #include <iostream>
 #include "usage.h"
 #include "command_line_settings.h"
-#if EIGEN
 #include "eigen_mematrix.h"
-#endif
 
 // config.h and fvlib/FileVector.h are included for the upper case variables
 #if HAVE_CONFIG_H
@@ -152,6 +150,12 @@
 }
 
 
+/**
+ * Process the command line arguments and save them in a cmdvars object.
+ *
+ * @param argc Number of command line arguments
+ * @param argv Values of the command line arguments
+ */
 void cmdvars::set_variables(int argc, char * argv[])
 {
     int next_option;

Modified: branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/command_line_settings.h	2014-04-30 12:16:35 UTC (rev 1718)
@@ -3,8 +3,8 @@
  *
  *  Created on: Apr 2; int 2012
  *      Author: mkooyman
-*
  *
+ *
  * Copyright (C) 2009--2014 Various members of the GenABEL team. See
  * the SVN commit logs for more details.
  *

Modified: branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/coxph_data.cpp	2014-04-30 12:16:35 UTC (rev 1718)
@@ -28,6 +28,7 @@
 
 #include "coxph_data.h"
 #include <iostream>
+#include <algorithm>
 #include <cmath>
 extern "C" {
 #include "survproto.h"
@@ -44,7 +45,10 @@
 #include "fvlib/Logger.h"
 #include "fvlib/Transposer.h"
 
-// compare for sort of times
+/**
+ * Definition of the compare function used for the sort of times. Used
+ * by the qsort() function.
+ */
 int cmpfun(const void *a, const void *b)
 {
     double el1 = *(double*) a;
@@ -66,34 +70,63 @@
     return -9;
 }
 
-coxph_data::coxph_data(const coxph_data &obj) : sstat(obj.sstat),
+
+/**
+ * Constructor. Initialises all values to zero.
+ */
+coxph_data::coxph_data()
+{
+    nids    = 0;
+    ncov    = 0;
+    ngpreds = 0;
+    gcount  = 0;
+    freq    = 0;
+}
+
+
+/**
+ * Copy constructor. Creates a coxph_data object by copying the values
+ * of another one.
+ *
+ * \param obj Reference to the coxph_data object to be copied to the new
+ * object.
+ */
+coxph_data::coxph_data(const coxph_data &obj) : X(obj.X),
+                                                stime(obj.stime),
+                                                sstat(obj.sstat),
+                                                weights(obj.weights),
                                                 offset(obj.offset),
                                                 strata(obj.strata),
-                                                X(obj.X),
-                                                order(obj.order)
+                                                order(obj.order),
+                                                masked_data(obj.masked_data)
 {
     nids        = obj.nids;
     ncov        = obj.ncov;
     ngpreds     = obj.ngpreds;
-    weights     = obj.weights;
-    stime       = obj.stime;
     gcount      = 0;
     freq        = 0;
-    masked_data = new unsigned short int[nids];
-
-    std::copy(obj.masked_data, obj.masked_data+nids,masked_data);
 }
 
-coxph_data::coxph_data(phedata &phed, gendata &gend, const int snpnum)
+
+/**
+ * Constructor that fills a coxph_data object with phenotype and genotype
+ * data.
+ *
+ * @param phed Reference to a phedata object with phenotype data
+ * @param gend Reference to a gendata object with genotype data
+ * @param snpnum The number of the SNP in the genotype data object to
+ * be added to the design matrix regdata::X. When set to a number < 0
+ * no SNP data is added to the design matrix (e.g. when calculating
+ * the null model).
+ */
+coxph_data::coxph_data(const phedata &phed, const gendata &gend,
+                       const int snpnum)
 {
     freq        = 0;
     gcount      = 0;
     nids        = gend.nids;
-    masked_data = new unsigned short int[nids];
+    masked_data = std::vector<bool>(nids, false);
 
-
-    std::fill(masked_data,masked_data+nids,0);
-
     ngpreds = gend.ngpreds;
     if (snpnum >= 0)
     {
@@ -120,7 +153,6 @@
     order.reinit(nids, 1);
     for (int i = 0; i < nids; i++)
     {
-        //          X.put(1.,i,0);
         stime[i] = (phed.Y).get(i, 0);
         sstat[i] = static_cast<int>((phed.Y).get(i, 1));
         if (sstat[i] != 1 && sstat[i] != 0)
@@ -162,7 +194,7 @@
     // sort by time
     double *tmptime = new double[nids];
     int *passed_sorted = new int[nids];
-    std::fill (passed_sorted,passed_sorted+nids,0);
+    std::fill(passed_sorted, passed_sorted + nids, 0);
 
 
     for (int i = 0; i < nids; i++)
@@ -214,35 +246,49 @@
     delete[] passed_sorted;
 }
 
-void coxph_data::update_snp(gendata *gend, const int snpnum) {
-    /**
+
+/**
+ * \brief Update the SNP dosages/probabilities in the design matrix
+ * coxph_data::X.
+ *
+ * Adds the genetic information for a new SNP to the design
+ * matrix.
+ *
+ * @param gend Object that contains the genetic data from which the
+ * dosages/probabilities will be added to the design matrix.
+ * @param snpnum Number of the SNP for which the dosage/probability
+ * data will be extracted from the gend object.
+ */
+void coxph_data::update_snp(const gendata *gend, const int snpnum) {
+    /*
      * This is the main part of the fix of bug #1846
      * (C) of the fix:
      *   UMC St Radboud Nijmegen,
      *   Dept of Epidemiology & Biostatistics,
      *   led by Prof. B. Kiemeney
-     *
+     */
+    /**
      * Note this sorts by "order"!!!
      * Here we deal with transposed X, hence last two arguments are swapped
-     * compared to the other 'update_snp'
+     * compared to regdata::update_snp().
      * Also, the starting column-1 is not necessary for cox X therefore
      * 'ncov-j' changes to 'ncov-j-1'
-     **/
+     */
 
-    // reset counter for frequency since it is a new snp
+    // reset counter for frequency since it is a new SNP
     gcount = 0;
     freq   = 0.0;
 
     for (int j = 0; j < ngpreds; j++) {
         double *snpdata = new double[nids];
-       std::fill (masked_data,masked_data+nids,0);
+        masked_data = std::vector<bool>(nids, false);
 
         gend->get_var(snpnum * ngpreds + j, snpdata);
 
         for (int i = 0; i < nids; i++) {
             X.put(snpdata[i], (ncov - j - 1), order[i]);
             if (std::isnan(snpdata[i])) {
-                masked_data[order[i]] = 1;
+                masked_data[order[i]] = true;
                 // snp not masked
             } else {
                 // check for first predictor
@@ -254,24 +300,26 @@
                         freq += snpdata[i];
                     }
                 } else if (j == 1) {
-                    // add second genotype in two predicor data form
+                    // Add second genotype in two predicor data form
                     freq += snpdata[i] * 0.5;
                 }
             }    // end std::isnan(snpdata[i]) snp
-        }    // end i for loop
+        }    // end for loop: i=0 to nids
 
         delete[] snpdata;
-    }    // end ngpreds loop
+    }    // End for loop: j=0 to ngpreds
 
     freq /= static_cast<double>(gcount);  // Allele frequency
 }
 
 
 /**
- * update_snp() adds SNP information to the design matrix. This
- * function allows you to strip that information from X again. This
- * is used for example when calculating the null model.
+ * \brief Remove SNP information from the design matrix coxph_data::X.
  *
+ * coxph_data::update_snp() adds SNP information to the design
+ * matrix. This function allows you to strip that information from X
+ * again. This is used for example when calculating the null model.
+ *
  */
 void coxph_data::remove_snp_from_X()
 {
@@ -292,27 +340,25 @@
 }
 
 
-coxph_data::~coxph_data()
+/**
+ * \brief Create a new coxph_data object that contains only the
+ * non-masked data.
+ *
+ * The non-masked data is extracted according to the data in the
+ * regdata::masked_data array. The resulting regdata::nids corresponds
+ * to the number of IDs for which genotype data is present.
+ *
+ * Note that the regdata::masked_data array of the new object should
+ * contain only zeros (i.e. not masked).
+ *
+ * @return A new regdata object containing only the rows from
+ * regdata::X and regdata::Y for which genotype data is present.
+ */
+coxph_data coxph_data::get_unmasked_data() const
 {
-    delete[] coxph_data::masked_data;
-    //      delete X;
-    //      delete sstat;
-    //      delete stime;
-    //      delete weights;
-    //      delete offset;
-    //      delete strata;
-    //      delete order;
-}
+    coxph_data to;
 
-
-coxph_data coxph_data::get_unmasked_data()
-{
-    coxph_data to;  // = coxph_data(*this);
-
-    // filter missing data
-
-    int nmeasured=std::count (masked_data, masked_data+nids, 0);
-
+    int nmeasured = std::count(masked_data.begin(), masked_data.end(), 0);
     to.nids = nmeasured;
     to.ncov = ncov;
     to.ngpreds = ngpreds;
@@ -344,14 +390,12 @@
         }
     }
 
-    //delete [] to.masked_data;
-    to.masked_data = new unsigned short int[to.nids];
-    std::fill(to.masked_data,to.masked_data+to.nids,0);
+    to.masked_data = masked_data;
     return (to);
 }
 
 
-coxph_reg::coxph_reg(coxph_data &cdatain)
+coxph_reg::coxph_reg(const coxph_data &cdatain)
 {
     coxph_data cdata = cdatain.get_unmasked_data();
     beta.reinit(cdata.X.nrow, 1);
@@ -363,7 +407,7 @@
 }
 
 
-void coxph_reg::estimate(coxph_data &cdatain,
+void coxph_reg::estimate(const coxph_data &cdatain,
                         int maxiter, double eps,
                         double tol_chol, const int model,
                         const int interaction, const int ngpreds,
@@ -402,24 +446,14 @@
     // R's coxph()
     double sctest = 1.0;
 
-    // When using Eigen coxfit2 needs to be called in a slightly
-    // different way (i.e. the .data()-part needs to be added).
-#if EIGEN
     coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
             cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
             cdata.weights.data.data(), cdata.strata.data.data(),
             means.data.data(), beta.data.data(), u.data.data(),
             imat.data.data(), loglik_int, &flag, work, &eps, &tol_chol,
             &sctest);
-#else
-    coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data,
-            cdata.sstat.data, X.data, newoffset.data,
-            cdata.weights.data, cdata.strata.data,
-            means.data, beta.data, u.data,
-            imat.data, loglik_int, &flag, work, &eps, &tol_chol,
-            &sctest);
-#endif
 
+
     niter = maxiter;
 
     // Check the results of the Cox fit; mirrored from the same checks
@@ -448,13 +482,12 @@
              << " setting beta and se to 'nan'\n";
         setToZero = true;
     } else {
-#if EIGEN
         VectorXd ueigen = u.data;
         MatrixXd imateigen = imat.data;
         VectorXd infs = ueigen.transpose() * imateigen;
         VectorXd betaeigen = beta.data;
         if ( infs.norm() > eps ||
-            infs.norm() > sqrt(eps) * betaeigen.norm() )
+             infs.norm() > sqrt(eps) * betaeigen.norm() )
         {
             cerr << "Warning for " << snpinfo.name[cursnp]
                  << ": beta may be infinite,"
@@ -462,12 +495,6 @@
 
             setToZero = true;
         }
-#else
-        cerr << "Warning for " << snpinfo.name[cursnp]
-             << ": can't check for infinite betas."
-             << " Please compile ProbABEL with Eigen support to fix this."
-             << endl;
-#endif
     }
 
     for (int i = 0; i < X.nrow; i++)

Modified: branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/coxph_data.h	2014-04-30 12:16:35 UTC (rev 1718)
@@ -1,8 +1,15 @@
-/*
- * coxph_data.h
+/**
+ * \file coxph_data.h
+ * \author Yurii S. Aulchenko
+ * \author M. Kooyman
+ * \author L.C. Karssen
+ * \author Maksim V. Struchalin
  *
+ * \brief Describes the coxph_data and coxph_reg classes used in for
+ * Cox Proportional Hazards regression. The coxph_data class is
+ * similar to the regdata class.
+ *
  *  Created on: Mar 31, 2012
- *      Author: mkooyman
  *
  *
  * Copyright (C) 2009--2014 Various members of the GenABEL team. See
@@ -29,57 +36,155 @@
 #ifndef COXPH_DATA_H_
 #define COXPH_DATA_H_
 
-#if EIGEN
 #include "eigen_mematrix.h"
 #include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
 
 #include "data.h"
 #include "reg1.h"
 #include "gendata.h"
 #include "phedata.h"
 
+
+/**
+ * \brief A coxph_data object contains the data used for Cox PH
+ * regression.
+ *
+ * The Cox regression code in coxfit2.c requires all data to be sorted
+ * based on follow-up time, which is done in the constructor.
+ *
+ * A similar class for linear and logistic regression can be found in
+ * the regdata class.
+ */
 class coxph_data {
  public:
-    coxph_data get_unmasked_data();
+    int nids;                   /**< \brief Number of IDs/samples.  */
 
-    coxph_data()
-    {
-        nids        = 0;
-        ncov        = 0;
-        ngpreds     = 0;
-        masked_data = NULL;
-        gcount      = 0;
-        freq        = 0;
-    }
+    /**
+     * \brief Number of covariates + possible nr of genomic
+     * predictors.
+     *
+     * If snpnum >=0 then this equals the number of covariates + the
+     * number of regdata::ngpreds.
+     */
+    int ncov;
 
-    coxph_data(const coxph_data &obj);
-    coxph_data(phedata &phed, gendata &gend, const int snpnum);
-    void update_snp(gendata *gend, const int snpnum);
-    void remove_snp_from_X();
-    ~coxph_data();
+   /**
+     * \brief Number of genomic predictors, 1 for dosage data, 2 for
+     * probability data.
+     */
+    int ngpreds;
 
-    int nids;
-    int ncov;
-    int ngpreds;
+    /**
+     * \brief Number of non-masked genotypes.
+     */
     unsigned int gcount;
+
+    /**
+     * \brief Allele frequency.
+     *
+     * Calculation is only based on non-masked SNPs.
+     */
     double freq;
+
+    /**
+     * \brief The design matrix.
+     *
+     * The matrix dimensions are coxph_data::nids x coxph_data::ncov.
+     * After filling the matrix is it is time-sorted based on
+     * coxph_data::order. Note that coxfit2() expects the data to be
+     * in column-major order, so at the end of the constructor the
+     * matrix is transposed!
+     */
+    mematrix<double> X;
+
+    /**
+     * \brief A vector that contains the follow-up times for each
+     * individual, as read from the phenotype file.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
+    mematrix<double> stime;
+
+    /**
+     * \brief A vector containing the survival status of each
+     * individual, as read from the phenotype file.
+     *
+     * Values should be 0 or 1. The vector is coxph_data::nids
+     * long. After reading the data from the phenotype file this
+     * vector is time-sorted based on coxph_data::order.
+     */
+    mematrix<int> sstat;
+
+    /**
+     * \brief A vector containing case weights. These are set to 1.0
+     * in the constructor.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
     mematrix<double> weights;
-    mematrix<double> stime;
-    mematrix<int>    sstat;
+
+    /**
+     * \brief A vector containing an offset for the linear
+     * predictor. Set to 0.0 in the constructor. This is a mandatory
+     * input for coxfit2().
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
     mematrix<double> offset;
-    mematrix<int>    strata;
-    mematrix<double> X;
-    mematrix<int>    order;
-    unsigned short int * masked_data;
+
+    /**
+     * \brief A vector containing strata. See coxfit2() for more
+     * details. All strata are set to 0 in the constructor.
+     *
+     * The vector is coxph_data::nids long. After reading the data
+     * from the phenotype file this vector is time-sorted based on
+     * coxph_data::order.
+     */
+    mematrix<int> strata;
+
+    /**
+     * \brief A vector used to store the follow-up times in ascending
+     * order. It is used to reorder the other vectors and
+     * coxph_data::X.
+     *
+     * The vector is coxph_data::nids long.
+     */
+    mematrix<int> order;
+
+    /**
+     * \brief A vector that contains ones/zeros to indicate which data points
+     * should be omitted because no genetic data is present.
+     *
+     * The vector is regdata::nids long. A value of 1 or 'true' means
+     * that that ID/sample will be masked because the SNP data is NA
+     * for that ID.
+     */
+    std::vector<bool> masked_data;
+
+
+    // Constructors and destructors
+    coxph_data();
+    coxph_data(const coxph_data &obj);
+    coxph_data(const phedata &phed, const gendata &gend, const int snpnum);
+    // ~coxph_data();
+
+
+    // Member functions
+    coxph_data get_unmasked_data() const;
+    void update_snp(const gendata *gend, const int snpnum);
+    void remove_snp_from_X();
 };
 
 
 class coxph_reg {
  public:
+    // Variables
     mematrix<double> beta;
     mematrix<double> sebeta;
     mematrix<double> residuals;
@@ -88,8 +193,13 @@
     double chi2_score;
     int niter;
 
-    coxph_reg(coxph_data &cdatain);
-    void estimate(coxph_data &cdatain, int maxiter,
+
+    // Constructors and destructors
+    coxph_reg(const coxph_data &cdatain);
+
+
+    // Member functions
+    void estimate(const coxph_data &cdatain, int maxiter,
                   double eps, double tol_chol, const int model,
                   const int interaction, const int ngpreds, const bool iscox,
                   const int nullmodel, const mlinfo &snpinfo, const int cursnp);

Modified: branches/ProbABEL-pvals/ProbABEL/src/data.cpp
===================================================================
--- branches/ProbABEL-pvals/ProbABEL/src/data.cpp	2014-04-30 08:57:49 UTC (rev 1717)
+++ branches/ProbABEL-pvals/ProbABEL/src/data.cpp	2014-04-30 12:16:35 UTC (rev 1718)
@@ -38,54 +38,11 @@
 #include "gendata.h"
 #include "data.h"
 
-#if EIGEN
 #include "eigen_mematrix.h"
 #include "eigen_mematrix.cpp"
-#else
-#include "mematrix.h"
-#include "mematri1.h"
-#endif
 #include "utilities.h"
 
-//TODO(unknown) This function is not used. Remove in near future
-//unsigned int Nmeasured(char * fname, int nphenocols, int npeople)
-//{
-//// first pass -- find unmeasured people
-//    std::ifstream infile(fname);
-//    if (!infile)
-//    {
-//        std::cerr << "Nmeasured: cannot open file " << fname << endl;
-//    }
-//    char tmp[100];
-//
-//    for (int i = 0; i < nphenocols; i++)
-//    {
-//        infile >> tmp;
-//    }
-//
-//    unsigned short int * allmeasured = new unsigned short int[npeople];
-//    int nids = 0;
-//    for (int i = 0; i < npeople; i++)
-//    {
-//        allmeasured[i] = 1;
-//        infile >> tmp;
-//        for (int j = 1; j < nphenocols; j++)
-//        {
-//            infile >> tmp;
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 1718


More information about the Genabel-commits mailing list