[Genabel-commits] r1612 - in branches/ProbABEL-0.50: checks/R-tests src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sat Feb 15 22:39:14 CET 2014
Author: maartenk
Date: 2014-02-15 22:39:14 +0100 (Sat, 15 Feb 2014)
New Revision: 1612
Modified:
branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
branches/ProbABEL-0.50/src/command_line_settings.cpp
branches/ProbABEL-0.50/src/command_line_settings.h
branches/ProbABEL-0.50/src/data.cpp
branches/ProbABEL-0.50/src/data.h
branches/ProbABEL-0.50/src/main.cpp
branches/ProbABEL-0.50/src/main_functions_dump.cpp
branches/ProbABEL-0.50/src/maskedmatrix.cpp
branches/ProbABEL-0.50/src/maskedmatrix.h
branches/ProbABEL-0.50/src/reg1.cpp
branches/ProbABEL-0.50/src/reg1.h
Log:
-Fixed typo in run_models_in_R_palinear.R in a variable name (test will now succeed)
-Comment out 2 unused functions (getProgramName andNmeasured ) made todo's to remove them in the future
-Solved some cpplint and cppcheck warnings
Modified: branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
===================================================================
--- branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R 2014-02-15 21:39:14 UTC (rev 1612)
@@ -123,7 +123,7 @@
}
colnames(prob.2df.R) <- cols2df
rownames(prob.2df.R) <- NULL
-#stopifnot( all.equal(prob.2df.PA1[1:5,], prob.2df.R[1:5,], tol=tol) )
+stopifnot( all.equal(prob.2df.PA[1:5,], prob.2df.R[1:5,], tol=tol) )
cat("2df\n")
cat("\t\t\t\t\t\tOK\n")
Modified: branches/ProbABEL-0.50/src/command_line_settings.cpp
===================================================================
--- branches/ProbABEL-0.50/src/command_line_settings.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/command_line_settings.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -120,12 +120,12 @@
{
return outfilename;
}
+//TODO(unknown) This function is not used. Remove in near future
+//char* cmdvars::getProgramName() const
+//{
+// return program_name;
+//}
-char* cmdvars::getProgramName() const
-{
- return program_name;
-}
-
int cmdvars::getRobust() const
{
return robust;
Modified: branches/ProbABEL-0.50/src/command_line_settings.h
===================================================================
--- branches/ProbABEL-0.50/src/command_line_settings.h 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/command_line_settings.h 2014-02-15 21:39:14 UTC (rev 1612)
@@ -116,7 +116,8 @@
int getNoutcomes() const;
int getNpeople() const;
string getOutfilename() const;
- char* getProgramName() const;
+//TODO(unknown) This function is not used. Remove in near future
+// char* getProgramName() const;
int getRobust() const;
int getScore() const;
string getSep() const;
Modified: branches/ProbABEL-0.50/src/data.cpp
===================================================================
--- branches/ProbABEL-0.50/src/data.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/data.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -47,49 +47,45 @@
#endif
#include "utilities.h"
+//TODO(unknown) This function is not used. Remove in near future
+//unsigned int Nmeasured(char * fname, int nphenocols, int npeople)
+//{
+//// first pass -- find unmeasured people
+// std::ifstream infile(fname);
+// if (!infile)
+// {
+// std::cerr << "Nmeasured: cannot open file " << fname << endl;
+// }
+// char tmp[100];
+//
+// for (int i = 0; i < nphenocols; i++)
+// {
+// infile >> tmp;
+// }
+//
+// unsigned short int * allmeasured = new unsigned short int[npeople];
+// int nids = 0;
+// for (int i = 0; i < npeople; i++)
+// {
+// allmeasured[i] = 1;
+// infile >> tmp;
+// for (int j = 1; j < nphenocols; j++)
+// {
+// infile >> tmp;
+// if (tmp[0] == 'N' || tmp[0] == 'n')
+// allmeasured[i] = 0;
+// }
+// if (allmeasured[i] == 1)
+// nids++;
+// }
+// infile.close();
+//
+// delete[] allmeasured;
+//
+// return (nids);
+//}
-unsigned int Nmeasured(char * fname, int nphenocols, int npeople)
-{
-//TODO: unused variables remove them for good if there is no reason to keep them
-//int ncov = nphenocols - 2;
-//int nids_all = npeople;
-// first pass -- find unmeasured people
- std::ifstream infile(fname);
- if (!infile)
- {
- std::cerr << "Nmeasured: cannot open file " << fname << endl;
- }
- char tmp[100];
-
- for (int i = 0; i < nphenocols; i++)
- {
- infile >> tmp;
- }
-
- unsigned short int * allmeasured = new unsigned short int[npeople];
- int nids = 0;
- for (int i = 0; i < npeople; i++)
- {
- allmeasured[i] = 1;
- infile >> tmp;
- for (int j = 1; j < nphenocols; j++)
- {
- infile >> tmp;
- if (tmp[0] == 'N' || tmp[0] == 'n')
- allmeasured[i] = 0;
- }
- if (allmeasured[i] == 1)
- nids++;
- }
- infile.close();
-
- delete[] allmeasured;
-
- return (nids);
-}
-
-
/**
* Read SNP information from an mlinfo file generated by the
* imputation software.
@@ -269,5 +265,3 @@
{
return matrix;
}
-
-//________________________________________Maksim_end
Modified: branches/ProbABEL-0.50/src/data.h
===================================================================
--- branches/ProbABEL-0.50/src/data.h 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/data.h 2014-02-15 21:39:14 UTC (rev 1612)
@@ -31,8 +31,8 @@
#include <string>
extern bool is_interaction_excluded;
-
-unsigned int Nmeasured(char * fname, int nphenocols, int npeople);
+//TODO(unknown) This function is not used. Remove in near future
+//unsigned int Nmeasured(char * fname, int nphenocols, int npeople);
#include "phedata.h"
#include "gendata.h"
@@ -80,4 +80,4 @@
~InvSigma();
};
-#endif /* DATA_H_ */
+#endif//DATA_H_
Modified: branches/ProbABEL-0.50/src/main.cpp
===================================================================
--- branches/ProbABEL-0.50/src/main.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/main.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -151,7 +151,7 @@
#if LOGISTIC
logistic_reg nrd = logistic_reg(nrgd);
- nrd.estimate( 0, MAXITER, EPS, 0,
+ nrd.estimate(0, MAXITER, EPS, 0,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -163,7 +163,7 @@
#if DEBUG
std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
#endif
- nrd.estimate( 0, CHOLTOL, 0, input_var.getInteraction(),
+ nrd.estimate(0, CHOLTOL, 0, input_var.getInteraction(),
input_var.getNgpreds(), invvarmatrix,
input_var.getRobust(), 1);
#elif COXPH
@@ -199,7 +199,8 @@
{
create_header(outfile, input_var, phd, interaction_cox);
}
- } else // Dosage data: Only additive model => only one output file
+ }
+ else // Dosage data: Only additive model => only one output file
{
outfile.push_back(
new std::ofstream((outfilename_str + "_add.out.txt").c_str()));
@@ -278,8 +279,7 @@
write_mlinfo(outfile, file, mli, csnp, input_var,
rgd.gcount, rgd.freq);
}
- } else
- {
+ } else{
// Dosage data: only additive model
int file = 0;
write_mlinfo(outfile, file, mli, csnp, input_var,
@@ -304,7 +304,7 @@
}
else
{
- rd.estimate( 0, MAXITER, EPS, model,
+ rd.estimate(0, MAXITER, EPS, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -321,7 +321,7 @@
}
else
{
- rd.estimate( 0, CHOLTOL, model,
+ rd.estimate(0, CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -405,7 +405,7 @@
regdata new_rgd = rgd;
new_rgd.remove_snp_from_X();
linear_reg new_null_rd(new_rgd);
- new_null_rd.estimate( 0,
+ new_null_rd.estimate(0,
CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
@@ -416,7 +416,7 @@
regdata new_rgd = rgd;
new_rgd.remove_snp_from_X();
logistic_reg new_null_rd(new_rgd);
- new_null_rd.estimate( 0, MAXITER, EPS,
+ new_null_rd.estimate(0, MAXITER, EPS,
model,
input_var.getInteraction(),
input_var.getNgpreds(),
@@ -440,8 +440,7 @@
// No missing SNP data, we can compute the LRT
*chi2[model] << 2. * (loglik - null_loglik);
}
- } else
- {
+ } else{
// We want score test output
*chi2[model] << rd.chi2_score;
}
@@ -481,8 +480,7 @@
if (input_var.getNgpreds() == 0)
{
end_pos = rgd.X.ncol;
- } else
- {
+ } else{
end_pos = rgd.X.ncol - 1;
}
@@ -511,16 +509,15 @@
*covvalue[model] << "nan"
<< input_var.getSep()
<< "nan";
- } else
- {
+ } else{
*covvalue[model] << "nan";
}
}
#endif
// Oct 26, 2009
*chi2[model] << "nan";
- } else
- { // ngpreds==1 (and SNP is rare)
+ } else{
+ // ngpreds==1 (and SNP is rare)
if (input_var.getInverseFilename() == NULL)
{
// Han Chen
Modified: branches/ProbABEL-0.50/src/main_functions_dump.cpp
===================================================================
--- branches/ProbABEL-0.50/src/main_functions_dump.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/main_functions_dump.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -417,8 +417,7 @@
if (input_var.getNgpreds() == 2)
{
start_pos = number_of_rows_or_columns - 2;
- } else
- {
+ } else{
start_pos = number_of_rows_or_columns - 1;
}
} else if (!input_var.getAllcov() && model == 0
@@ -427,8 +426,7 @@
if (input_var.getNgpreds() == 2)
{
start_pos = number_of_rows_or_columns - 4;
- } else
- {
+ } else{
start_pos = number_of_rows_or_columns - 2;
}
} else if (!input_var.getAllcov() && model != 0
@@ -439,8 +437,7 @@
&& input_var.getInteraction() != 0)
{
start_pos = number_of_rows_or_columns - 2;
- } else
- {
+ } else{
start_pos = 0;
}
Modified: branches/ProbABEL-0.50/src/maskedmatrix.cpp
===================================================================
--- branches/ProbABEL-0.50/src/maskedmatrix.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/maskedmatrix.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -26,6 +26,9 @@
*/
+
+
+#include <algorithm>
#include "maskedmatrix.h"
#if EIGEN
#include "eigen_mematrix.h"
@@ -47,11 +50,8 @@
// matrix_original = M;
masked_data = &matrix_original;
mask_of_old = new unsigned short int[M.nrow];
- std::fill (mask_of_old,mask_of_old+M.nrow,0);
- //TODO:set length of mask for all types
+ std::fill(mask_of_old, mask_of_old+M.nrow, 0);
length_of_mask = M.nrow;
- //TODO:set type (row,column,symmetric)
- //type="symmetric";
}
void masked_matrix::set_matrix(const mematrix<double> &M)
@@ -59,11 +59,8 @@
matrix_original = M;
masked_data = &matrix_original;
mask_of_old = new unsigned short int[M.nrow];
- std::fill (mask_of_old,mask_of_old+M.nrow,0);
- //TODO:set length of mask for all types
+ std::fill(mask_of_old, mask_of_old+M.nrow, 0);
length_of_mask = M.nrow;
- //TODO:set type (row,column,symmetric)
- //type="symmetric";
}
masked_matrix::~masked_matrix()
@@ -85,7 +82,7 @@
else
{
//Check update mask is the same as old matrix
- if (std::equal (newmask, newmask+length_of_mask, mask_of_old))
+ if (std::equal(newmask, newmask+length_of_mask, mask_of_old))
{
//new mask is the same as old matrix
masked_data = &matrix_masked_data;
@@ -94,7 +91,7 @@
{
// new mask differs from old matrix and create new.
// mask_of_old = newmask;
- std::copy(newmask, newmask+length_of_mask,mask_of_old);
+ std::copy(newmask, newmask+length_of_mask, mask_of_old);
mask_symmetric(nmeasured);
masked_data = &matrix_masked_data;
}
Modified: branches/ProbABEL-0.50/src/maskedmatrix.h
===================================================================
--- branches/ProbABEL-0.50/src/maskedmatrix.h 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/maskedmatrix.h 2014-02-15 21:39:14 UTC (rev 1612)
@@ -55,4 +55,4 @@
void mask_symmetric(int nmeasured);
};
-#endif /* MASKEDMATRIX_H_ */
+#endif//MASKEDMATRIX_H_
Modified: branches/ProbABEL-0.50/src/reg1.cpp
===================================================================
--- branches/ProbABEL-0.50/src/reg1.cpp 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/reg1.cpp 2014-02-15 21:39:14 UTC (rev 1612)
@@ -91,14 +91,14 @@
{
col_new++;
nX_without_interact_phe[row
- * nX_without_interact_phe.ncol + col_new] =
+ * nX_without_interact_phe.ncol + col_new] =
nX[row * nX.ncol + col];
}
if (col != interaction - 1 && iscox)
{
col_new++;
nX_without_interact_phe[row
- * nX_without_interact_phe.ncol + col_new] =
+ * nX_without_interact_phe.ncol + col_new] =
nX[row * nX.ncol + col];
}
} // interaction_only, model==0, ngpreds==2
@@ -148,14 +148,14 @@
{
col_new++;
nX_without_interact_phe[row
- * nX_without_interact_phe.ncol + col_new] =
+ * nX_without_interact_phe.ncol + col_new] =
nX[row * nX.ncol + col];
}
if (col != interaction - 1 && iscox)
{
col_new++;
nX_without_interact_phe[row
- * nX_without_interact_phe.ncol + col_new] =
+ * nX_without_interact_phe.ncol + col_new] =
nX[row * nX.ncol + col];
}
}
@@ -311,7 +311,7 @@
}
-void linear_reg::estimate( int verbose, double tol_chol,
+void linear_reg::estimate(int verbose, double tol_chol,
int model, int interaction, int ngpreds, masked_matrix& invvarmatrixin,
int robust, int nullmodel) {
// suda interaction parameter
@@ -357,7 +357,7 @@
double sigma2_internal;
#if EIGEN
- LDLT <MatrixXd> Ch ;
+ LDLT <MatrixXd> Ch;
#else
mematrix<double> tXX_i;
#endif
@@ -373,13 +373,15 @@
//Oct 26, 2009
#if EIGEN
- MatrixXd tXW = X.data.transpose() * invvarmatrixin.masked_data->data; // flops 5997000
+ // next line is 5997000 flops
+ MatrixXd tXW = X.data.transpose() * invvarmatrixin.masked_data->data;
Ch = LDLT <MatrixXd>(tXW * X.data); // 17991 flops
- beta.data = Ch.solve(tXW * reg_data.Y.data);
- sigma2 = (reg_data.Y.data - tXW.transpose() * beta.data).squaredNorm() ; //flops: 1000+5000+3000
+ beta.data = Ch.solve(tXW * reg_data.Y.data);//5997 flops
+ //next line is: 1000+5000+3000= 9000 flops
+ sigma2 = (reg_data.Y.data - tXW.transpose() * beta.data).squaredNorm();
#else
-
- mematrix<double> tXW = transpose(X) * invvarmatrixin.masked_data; // flops 5997000
+ // next line is 5997000 flops
+ mematrix<double> tXW = transpose(X) * invvarmatrixin.masked_data;
tXX_i = tXW * X; // 17991 flops
// use cholesky to invert
cholesky2_mm(tXX_i, tol_chol);
@@ -387,11 +389,12 @@
beta = tXX_i * (tXW * reg_data.Y); // flops 15+5997
// now compute residual variance
sigma2 = 0.;
+ //next line is: 1000+5000+= 6000 flops
mematrix<double> sigma2_matrix = reg_data.Y - (transpose(tXW) * beta); //flops: 1000+5000
for (int i = 0; i < sigma2_matrix.nrow; i++)
{
double val = sigma2_matrix.get(i, 0);
- sigma2 += val * val; // flops: 3000
+ sigma2 += val * val; // flops: 3000 (iterations counted)
}
#endif
double N = X.nrow;
@@ -409,9 +412,9 @@
int m = X.ncol;
MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().\
rankUpdate(X.data.adjoint());
- Ch=LDLT <MatrixXd>(txx.selfadjointView<Lower>());
- beta.data= Ch.solve(X.data.adjoint() * reg_data.Y.data);
- sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm() ;
+ Ch = LDLT <MatrixXd>(txx.selfadjointView<Lower>());
+ beta.data = Ch.solve(X.data.adjoint() * reg_data.Y.data);
+ sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm();
#else
mematrix<double> tX = transpose(X);
@@ -431,7 +434,7 @@
}
#endif
double N = static_cast<double>(X.nrow);
- double P=static_cast<double>(length_beta);
+ double P = static_cast<double>(length_beta);
sigma2_internal = sigma2 / (N - P);
sigma2 /= N;
}
@@ -456,16 +459,16 @@
#if EIGEN
double intercept = beta.get(0, 0);
- residuals.data= reg_data.Y.data.array()-intercept;
+ residuals.data = reg_data.Y.data.array()-intercept;
//matrix.
- ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1)\
+ ArrayXXd betacol = beta.data.block(1, 0, beta.data.rows()-1, 1)\
.array().transpose();
- ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)\
- *betacol.matrix().asDiagonal()).rowwise().sum() ;
+ ArrayXXd resid_sub = (X.data.block(0, 1, X.data.rows(), X.data.cols()-1)\
+ *betacol.matrix().asDiagonal()).rowwise().sum();
//std::cout << resid_sub << std::endl;
- residuals.data-=resid_sub.matrix();
+ residuals.data -= resid_sub.matrix();
//residuals[i] -= resid_sub;
- loglik-=(residuals.data.array().square()*halfrecsig2).sum();
+ loglik -= (residuals.data.array().square()*halfrecsig2).sum();
//loglik -= halfrecsig2 * residuals[i] * residuals[i];
@@ -483,7 +486,7 @@
loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
#if EIGEN
- MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).
+ MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta).
Identity(length_beta,length_beta));
#endif
@@ -512,7 +515,6 @@
#endif
-
}
//cout << "estimate 0\n";
#if EIGEN
@@ -526,18 +528,17 @@
(sigma2_internal
* tXX_inv.diagonal().array()).sqrt();
}
- int offset=X.ncol- 1;
+ int offset = X.ncol- 1;
//if additive and interaction and 2 predictors and more then 2 betas
if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){
- offset=X.ncol - 2;
+ offset = X.ncol - 2;
}
if (robust)
{
covariance.data = robust_sigma2.data.bottomLeftCorner(
offset, offset).diagonal();
-
}
else
{
@@ -603,13 +604,12 @@
}
}
#endif
-
}
void linear_reg::score(mematrix<double>& resid,
double tol_chol, int model, int interaction, int ngpreds,
const masked_matrix& invvarmatrix, int nullmodel) {
- // regdata rdata = rdatain.get_unmasked_data();
+ //regdata rdata = rdatain.get_unmasked_data();
base_score(resid, tol_chol, model, interaction, ngpreds,
invvarmatrix, nullmodel = 0);
}
@@ -632,7 +632,7 @@
chi2_score = -1.;
}
-void logistic_reg::estimate( int verbose, int maxiter,
+void logistic_reg::estimate(int verbose, int maxiter,
double eps, int model, int interaction, int ngpreds,
masked_matrix& invvarmatrixin, int robust, int nullmodel) {
// In contrast to the 'linear' case 'invvarmatrix' contains the
@@ -656,7 +656,6 @@
if (length_beta > 1)
{
-
if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
{
covariance.reinit(length_beta - 2, 1);
@@ -687,7 +686,7 @@
if (invvarmatrix.nrow != 0 && invvarmatrix.ncol != 0)
{
- //TODO(maarten):invvarmatix is symmetric:is there an more effective way?
+ //TODO(maarten) invvarmatix is symmetric:is there an more effective way?
tX = tX * invvarmatrix;
}
/*
@@ -706,7 +705,6 @@
*/
niter = 0;
double delta = 1.;
- double prevlik = 0.;
while (niter < maxiter && delta > eps)
{
mematrix<double> eMu = (X) * beta;
@@ -776,7 +774,7 @@
}
// std::cout << "beta:\n"; beta.print();
// compute likelihood
- prevlik = loglik;
+ double prevlik = loglik;
loglik = 0.;
for (int i = 0; i < eMu.nrow; i++)
loglik += reg_data.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));
Modified: branches/ProbABEL-0.50/src/reg1.h
===================================================================
--- branches/ProbABEL-0.50/src/reg1.h 2014-02-14 14:14:21 UTC (rev 1611)
+++ branches/ProbABEL-0.50/src/reg1.h 2014-02-15 21:39:14 UTC (rev 1612)
@@ -85,13 +85,13 @@
linear_reg(regdata& rdatain);
~linear_reg()
{
- delete [] reg_data.masked_data ;
+ delete [] reg_data.masked_data;
// delete beta;
// delete sebeta;
// delete residuals;
}
- void estimate( int verbose, double tol_chol, int model,
+ void estimate(int verbose, double tol_chol, int model,
int interaction, int ngpreds,
masked_matrix& invvarmatrixin,
int robust, int nullmodel = 0);
@@ -108,12 +108,12 @@
logistic_reg(regdata& rdatain);
~logistic_reg()
{
- delete [] reg_data.masked_data ;
+ delete [] reg_data.masked_data;
// delete beta;
// delete sebeta;
}
- void estimate( int verbose, int maxiter, double eps,
+ void estimate(int verbose, int maxiter, double eps,
int model, int interaction, int ngpreds,
masked_matrix& invvarmatrixin, int robust,
int nullmodel = 0);
@@ -123,4 +123,4 @@
masked_matrix& invvarmatrix, int nullmodel = 0);
};
-#endif
+#endif//REG1_H_
More information about the Genabel-commits
mailing list