[Genabel-commits] r1700 - pkg/ProbABEL/src

Sun Apr 27 11:01:45 CEST 2014

Author: maartenk
Date: 2014-04-27 11:01:42 +0200 (Sun, 27 Apr 2014)
New Revision: 1700

Removed:
   pkg/ProbABEL/src/cholesky.cpp
   pkg/ProbABEL/src/cholesky.h
Modified:
   pkg/ProbABEL/src/Makefile.am
   pkg/ProbABEL/src/fvlib
   pkg/ProbABEL/src/reg1.cpp
   pkg/ProbABEL/src/reg1.h
Log:
-removed dependency of reg1.* on cholesky.* since this is now done with EIGEN (remove about 150 lines of code from our codebase)
-removed cholesky.h and cholesky.cpp 
-added some consts to functions in reg1.*
-removed some whitespace in reg1.cpp

Modified: pkg/ProbABEL/src/Makefile.am
===================================================================

--- pkg/ProbABEL/src/Makefile.am	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/Makefile.am	2014-04-27 09:01:42 UTC (rev 1700)
@@ -6,8 +6,8 @@
 REGFILES = data.h data.cpp gendata.h gendata.cpp eigen_mematrix.h eigen_mematrix.cpp \
  command_line_settings.h command_line_settings.cpp reg1.h usage.h		\
  usage.cpp main.cpp utilities.h utilities.cpp phedata.h phedata.cpp	\
- cholesky.h cholesky.cpp regdata.h regdata.cpp  \
- maskedmatrix.cpp maskedmatrix.h reg1.cpp main_functions_dump.h main_functions_dump.cpp
+ regdata.h regdata.cpp maskedmatrix.cpp maskedmatrix.h \
+reg1.cpp main_functions_dump.h main_functions_dump.cpp
 
 
 ## Filevector files:

Deleted: pkg/ProbABEL/src/cholesky.cpp
===================================================================
--- pkg/ProbABEL/src/cholesky.cpp	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/cholesky.cpp	2014-04-27 09:01:42 UTC (rev 1700)
@@ -1,149 +0,0 @@
-/*
- * cholesky.cpp
- *
- *  Created on: Mar 15, 2012
- *      Author: mkooyman
- */
-#include <string>
-#include <cstdarg>
-#include <cstdio>
-#include <cstdlib>
-
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-
-
-/*  SCCS @(#)cholesky2.c    5.2 10/27/98
- ** subroutine to do Cholesky decompostion on a matrix: C = FDF'
- **   where F is lower triangular with 1's on the diagonal, and D is diagonal
- **
- ** arguments are:
- **     n         the size of the matrix to be factored
- **     **matrix  a ragged array containing an n by n submatrix to be factored
- **     toler     the threshold value for detecting "singularity"
- **
- **  The factorization is returned in the lower triangle, D occupies the
- **    diagonal and the upper triangle is left undisturbed.
- **    The lower triangle need not be filled in at the start.
- **
- **  Return value:  the rank of the matrix (non-negative definite), or -rank
- **     it not SPD or NND
- **
- **  If a column is deemed to be redundant, then that diagonal is set to zero.
- **
- **   Terry Therneau
- */
-
-// modified Yurii Aulchenko 2008-05-20
-int cholesky2_mm(mematrix<double> &matrix, double toler)
-{
-    if (matrix.ncol != matrix.nrow)
-    {
-        fprintf(stderr, "cholesky2_mm: matrix should be square\n");
-        exit(1);
-    }
-    int n = matrix.ncol;
-    double temp;
-    int i, j, k;
-    double eps, pivot;
-    int rank;
-    int nonneg;
-
-    nonneg = 1;
-    eps = 0;
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] > eps)
-            eps = matrix[i * n + i];
-        for (j = (i + 1); j < n; j++)
-            matrix[j * n + i] = matrix[i * n + j];
-    }
-    eps *= toler;
-
-    rank = 0;
-    for (i = 0; i < n; i++)
-    {
-        pivot = matrix[i * n + i];
-        if (pivot < eps)
-        {
-            matrix[i * n + i] = 0;
-            if (pivot < -8 * eps)
-                nonneg = -1;
-        }
-        else
-        {
-            rank++;
-            for (j = (i + 1); j < n; j++)
-            {
-                temp = matrix[j * n + i] / pivot;
-                matrix[j * n + i] = temp;
-                matrix[j * n + j] -= temp * temp * pivot;
-                for (k = (j + 1); k < n; k++)
-                    matrix[k * n + j] -= temp * matrix[k * n + i];
-            }
-        }
-    }
-    return (rank * nonneg);
-}
-
-void chinv2_mm(mematrix<double> &matrix)
-{
-    if (matrix.ncol != matrix.nrow)
-    {
-        fprintf(stderr, "cholesky2_mm: matrix should be square\n");
-        exit(1);
-    }
-
-    int n = matrix.ncol;
-    double temp;
-    int i, j, k;
-
-    /*
-     ** invert the cholesky in the lower triangle
-     **   take full advantage of the cholesky's diagonal of 1's
-     */
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] > 0)
-        {
-            matrix[i * n + i] = 1 / matrix[i * n + i]; /*this line inverts D */
-            for (j = (i + 1); j < n; j++)
-            {
-                matrix[j * n + i] = -matrix[j * n + i];
-                for (k = 0; k < i; k++) /*sweep operator */
-                    matrix[j * n + k] += matrix[j * n + i] * matrix[i * n + k];
-            }
-        }
-    }
-
-    /*
-     ** lower triangle now contains inverse of cholesky
-     ** calculate F'DF (inverse of cholesky decomp process) to get inverse
-     **   of original matrix
-     */
-    for (i = 0; i < n; i++)
-    {
-        if (matrix[i * n + i] == 0)
-        { /* singular row */
-            for (j = 0; j < i; j++)
-                matrix[j * n + i] = 0;
-            for (j = i; j < n; j++)
-                matrix[i * n + j] = 0;
-        }
-        else
-        {
-            for (j = (i + 1); j < n; j++)
-            {
-                temp = matrix[j * n + i] * matrix[j * n + j];
-                if (j != i)
-                    matrix[i * n + j] = temp;
-                for (k = i; k < j; k++)
-                    matrix[i * n + k] += temp * matrix[j * n + k];
-            }
-        }
-    }
-    // ugly fix to return only inverse
-    for (int col = 1; col < n; col++)
-        for (int row = 0; row < col; row++)
-            matrix[col * n + row] = matrix[row * n + col];
-}

Deleted: pkg/ProbABEL/src/cholesky.h
===================================================================
--- pkg/ProbABEL/src/cholesky.h	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/cholesky.h	2014-04-27 09:01:42 UTC (rev 1700)
@@ -1,17 +0,0 @@
-/*
- * cholesky.h
- *
- *  Created on: Mar 15, 2012
- *      Author: mkooyman
- */
-
-#ifndef CHOLESKY_H_
-#define CHOLESKY_H_
-
-#include "eigen_mematrix.h"
-#include "eigen_mematrix.cpp"
-
-int cholesky2_mm(mematrix<double> &matrix, double toler);
-void chinv2_mm(mematrix<double> &matrix);
-
-#endif /* CHOLESKY_H_ */

Modified: pkg/ProbABEL/src/fvlib
===================================================================
--- pkg/ProbABEL/src/fvlib	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/fvlib	2014-04-27 09:01:42 UTC (rev 1700)
@@ -1 +1 @@
-link ../../../tags/filevector/v.1.0.0/fvlib
\ No newline at end of file
+link include/filevector/fvlib
\ No newline at end of file

Modified: pkg/ProbABEL/src/reg1.cpp
===================================================================
--- pkg/ProbABEL/src/reg1.cpp	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/reg1.cpp	2014-04-27 09:01:42 UTC (rev 1700)
@@ -275,10 +275,12 @@
             reg_data.is_interaction_excluded, false, nullmodel);
     beta.reinit(X.ncol, 1);
     sebeta.reinit(X.ncol, 1);
+    int length_beta=X.ncol;
     double N = static_cast<double>(resid.nrow);
     mematrix<double> tX = transpose(X);
-    if (invvarmatrix.length_of_mask != 0)
+    if (invvarmatrix.length_of_mask != 0){
         tX = tX * invvarmatrix.masked_data;
+    }
 
     mematrix<double> u = tX * resid;
     mematrix<double> v = tX * X;
@@ -287,12 +289,16 @@
     csum = csum * (1. / N);
     v = v - csum;
     // use cholesky to invert
-    mematrix<double> v_i = v;
-    cholesky2_mm(v_i, tol_chol);
-    chinv2_mm(v_i);
+
+    LDLT <MatrixXd> Ch = LDLT < MatrixXd > (v.data.selfadjointView<Lower>());
     // before was
     // mematrix<double> v_i = invert(v);
-    beta = v_i * u;
+    beta.data = Ch.solve(v.data.adjoint() * u.data);
+    //TODO(maartenk): set size of v_i directly or remove mematrix class
+    mematrix<double> v_i = v;
+    v_i.data = Ch.solve(MatrixXd(length_beta, length_beta).
+                                    Identity(length_beta, length_beta));
+
     double sr = 0.;
     double srr = 0.;
     for (int i = 0; i < resid.nrow; i++)
@@ -327,7 +333,7 @@
     sigma2 = (Y - tXW * beta_vec).squaredNorm();
     beta.data = beta_vec;
 }
-void linear_reg::LeastSquaredRegression(mematrix<double> X,LDLT<MatrixXd>& Ch) {
+void linear_reg::LeastSquaredRegression(const mematrix<double>& X, LDLT<MatrixXd>& Ch) {
     int m = X.ncol;
     MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(
             X.data.adjoint());
@@ -368,12 +374,11 @@
     //residuals[i] -= resid_sub;
     loglik -= (residuals.data.array().square() * halfrecsig2).sum();
     loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
-
 }
 
 
 
-void linear_reg::RobustSEandCovariance(mematrix<double> X, mematrix<double> robust_sigma2,
+void linear_reg::RobustSEandCovariance(const mematrix<double> &X, mematrix<double> robust_sigma2,
         MatrixXd tXX_inv, int offset) {
     MatrixXd Xresiduals = X.data.array().colwise()
             * residuals.data.col(0).array();
@@ -386,7 +391,7 @@
             robust_sigma2.data.bottomLeftCorner(offset, offset).diagonal();
 }
 
-void linear_reg::PlainSEandCovariance(double sigma2_internal, MatrixXd tXX_inv,
+void linear_reg::PlainSEandCovariance(double sigma2_internal,const MatrixXd &tXX_inv,
         int offset) {
     sebeta.data = (sigma2_internal * tXX_inv.diagonal().array()).sqrt();
     covariance.data = sigma2_internal
@@ -438,7 +443,6 @@
 
     double sigma2_internal;
 
-
     LDLT <MatrixXd> Ch;
     if (invvarmatrixin.length_of_mask != 0)
     {
@@ -481,10 +485,8 @@
 
     MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta).
                                 Identity(length_beta, length_beta));
-
     mematrix<double> robust_sigma2(X.ncol, X.ncol);
 
-
     int offset = X.ncol- 1;
      //if additive and interaction and 2 predictors and more then 2 betas
      if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){
@@ -499,10 +501,8 @@
     {
         PlainSEandCovariance(sigma2_internal, tXX_inv, offset);
     }
-
 }
 
-
 void linear_reg::score(mematrix<double>& resid,
         double tol_chol, int model, int interaction, int ngpreds,
         const masked_matrix& invvarmatrix, int nullmodel) {
@@ -511,7 +511,6 @@
             invvarmatrix, nullmodel = 0);
 }
 
-
 logistic_reg::logistic_reg(regdata& rdatain) {
     reg_data = rdatain.get_unmasked_data();
     int length_beta = reg_data.X.ncol;

Modified: pkg/ProbABEL/src/reg1.h
===================================================================
--- pkg/ProbABEL/src/reg1.h	2014-04-25 06:26:38 UTC (rev 1699)
+++ pkg/ProbABEL/src/reg1.h	2014-04-27 09:01:42 UTC (rev 1700)
@@ -50,11 +50,9 @@
 #ifndef REG1_H_
 #define REG1_H_
 #include <cmath>
-#include "cholesky.h"
 #include "regdata.h"
 #include "maskedmatrix.h"
 
-
 mematrix<double> apply_model(mematrix<double>& X, int model, int interaction,
         int ngpreds, bool is_interaction_excluded, bool iscox = false,
         int nullmodel = 0);
@@ -99,11 +97,10 @@
                             const masked_matrix& W_masked,
                             LDLT<MatrixXd>& Ch);
     void logLikelihood(const mematrix<double>& X);
-    void LeastSquaredRegression(mematrix<double> X, LDLT<MatrixXd>& Ch);
-    void RobustSEandCovariance(mematrix<double> X,
-                               mematrix<double> robust_sigma2,
-                               MatrixXd tXX_inv, int offset);
-    void PlainSEandCovariance(double sigma2_internal, MatrixXd tXX_inv,
+    void LeastSquaredRegression(const mematrix<double> & X,LDLT<MatrixXd>& Ch);
+    void RobustSEandCovariance(const mematrix<double> & X,
+            mematrix <double> robust_sigma2, MatrixXd tXX_inv, int offset);
+    void PlainSEandCovariance(double sigma2_internal, const MatrixXd & tXX_inv,
                               int offset);
 };