[Genabel-commits] r1589 - branches/ProbABEL-0.50/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Feb 3 23:29:18 CET 2014
Author: maartenk
Date: 2014-02-03 23:29:17 +0100 (Mon, 03 Feb 2014)
New Revision: 1589
Modified:
branches/ProbABEL-0.50/src/main.cpp
branches/ProbABEL-0.50/src/reg1.cpp
branches/ProbABEL-0.50/src/reg1.h
branches/ProbABEL-0.50/src/regdata.cpp
branches/ProbABEL-0.50/src/regdata.h
Log:
Removes mutiple masking the data per snp in linear and logistic regression. This commit speeds up palinear by ~11%. This introduces minor memory leaks.
Modified: branches/ProbABEL-0.50/src/main.cpp
===================================================================
--- branches/ProbABEL-0.50/src/main.cpp 2014-02-03 22:05:56 UTC (rev 1588)
+++ branches/ProbABEL-0.50/src/main.cpp 2014-02-03 22:29:17 UTC (rev 1589)
@@ -126,7 +126,8 @@
std::cout << " loaded null data..." << std::flush;
#if LOGISTIC
logistic_reg nrd = logistic_reg(nrgd);
- nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
+
+ nrd.estimate( 0, MAXITER, EPS, CHOLTOL, 0,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -138,7 +139,7 @@
#if DEBUG
std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
#endif
- nrd.estimate(nrgd, 0, CHOLTOL, 0, input_var.getInteraction(),
+ nrd.estimate( 0, CHOLTOL, 0, input_var.getInteraction(),
input_var.getNgpreds(), invvarmatrix,
input_var.getRobust(), 1);
#elif COXPH
@@ -272,14 +273,14 @@
logistic_reg rd(rgd);
if (input_var.getScore())
{
- rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
+ rd.score(nrd.residuals, 0, CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix);
}
else
{
- rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
+ rd.estimate( 0, MAXITER, EPS, CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -289,14 +290,14 @@
linear_reg rd(rgd);
if (input_var.getScore())
{
- rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
+ rd.score(nrd.residuals, 0, CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix);
}
else
{
- rd.estimate(rgd, 0, CHOLTOL, model,
+ rd.estimate( 0, CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
invvarmatrix,
@@ -380,7 +381,7 @@
regdata new_rgd = rgd;
new_rgd.remove_snp_from_X();
linear_reg new_null_rd(new_rgd);
- new_null_rd.estimate(new_rgd, 0,
+ new_null_rd.estimate( 0,
CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
@@ -391,7 +392,7 @@
regdata new_rgd = rgd;
new_rgd.remove_snp_from_X();
logistic_reg new_null_rd(new_rgd);
- new_null_rd.estimate(new_rgd, 0, MAXITER, EPS,
+ new_null_rd.estimate( 0, MAXITER, EPS,
CHOLTOL, model,
input_var.getInteraction(),
input_var.getNgpreds(),
Modified: branches/ProbABEL-0.50/src/reg1.cpp
===================================================================
--- branches/ProbABEL-0.50/src/reg1.cpp 2014-02-03 22:05:56 UTC (rev 1588)
+++ branches/ProbABEL-0.50/src/reg1.cpp 2014-02-03 22:29:17 UTC (rev 1589)
@@ -224,10 +224,10 @@
}
linear_reg::linear_reg(regdata& rdatain) {
- regdata rdata = rdatain.get_unmasked_data();
+ reg_data = rdatain.get_unmasked_data();
// std::cout << "linear_reg: " << rdata.nids << " " << (rdata.X).ncol
// << " " << (rdata.Y).ncol << "\n";
- int length_beta = (rdata.X).ncol;
+ int length_beta = (reg_data.X).ncol;
beta.reinit(length_beta, 1);
sebeta.reinit(length_beta, 1);
//Han Chen
@@ -236,18 +236,18 @@
covariance.reinit(length_beta - 1, 1);
}
//Oct 26, 2009
- residuals.reinit(rdata.nids, 1);
+ residuals.reinit(reg_data.nids, 1);
sigma2 = -1.;
loglik = -9.999e+32;
chi2_score = -1.;
}
-void base_reg::base_score(mematrix<double>& resid, regdata& rdata, int verbose,
+void base_reg::base_score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
const masked_matrix& invvarmatrix, int nullmodel) {
- mematrix<double> oX = rdata.extract_genotypes();
+ mematrix<double> oX = reg_data.extract_genotypes();
mematrix<double> X = apply_model(oX, model, interaction, ngpreds,
- rdata.is_interaction_excluded, false, nullmodel);
+ reg_data.is_interaction_excluded, false, nullmodel);
beta.reinit(X.ncol, 1);
sebeta.reinit(X.ncol, 1);
double N = static_cast<double>(resid.nrow);
@@ -286,31 +286,31 @@
}
-void linear_reg::estimate(regdata& rdatain, int verbose, double tol_chol,
+void linear_reg::estimate( int verbose, double tol_chol,
int model, int interaction, int ngpreds, masked_matrix& invvarmatrixin,
int robust, int nullmodel) {
// suda interaction parameter
// model should come here
- regdata rdata = rdatain.get_unmasked_data();
+ //regdata rdata = rdatain.get_unmasked_data();
if (verbose)
{
- cout << rdata.is_interaction_excluded
+ cout << reg_data.is_interaction_excluded
<< " <-rdata.is_interaction_excluded\n";
// std::cout << "invvarmatrix:\n";
// invvarmatrixin.masked_data->print();
std::cout << "rdata.X:\n";
- rdata.X.print();
+ reg_data.X.print();
}
- mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
- rdata.is_interaction_excluded, false, nullmodel);
+ mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
+ reg_data.is_interaction_excluded, false, nullmodel);
if (verbose)
{
std::cout << "X:\n";
X.print();
std::cout << "Y:\n";
- rdata.Y.print();
+ reg_data.Y.print();
}
int length_beta = X.ncol;
@@ -337,7 +337,7 @@
if (invvarmatrixin.length_of_mask != 0)
{
//retrieve masked data W
- invvarmatrixin.update_mask(rdatain.masked_data);
+ invvarmatrixin.update_mask(reg_data.masked_data);
// This regression is Weighted Least Square: used for mmscore :
// FLOPS count are calculated for 3*1000 matrix as follow:
@@ -353,10 +353,10 @@
// use cholesky to invert
cholesky2_mm(tXX_i, tol_chol);
chinv2_mm(tXX_i);
- beta = tXX_i * (tXW * rdata.Y); // flops 15+5997
+ beta = tXX_i * (tXW * reg_data.Y); // flops 15+5997
// now compute residual variance
sigma2 = 0.;
- mematrix<double> sigma2_matrix = rdata.Y - (transpose(tXW) * beta); //flops: 1000+5000
+ mematrix<double> sigma2_matrix = reg_data.Y - (transpose(tXW) * beta); //flops: 1000+5000
for (int i = 0; i < sigma2_matrix.nrow; i++)
{
double val = sigma2_matrix.get(i, 0);
@@ -377,7 +377,7 @@
int m = X.ncol;
MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(X.data.adjoint());
Ch=LDLT <MatrixXd>(txx.selfadjointView<Lower>());
- beta.data= Ch.solve(X.data.adjoint() * rdata.Y.data);
+ beta.data= Ch.solve(X.data.adjoint() * reg_data.Y.data);
tXX_i.data=Ch.solve(MatrixXd(m, m).Identity(m,m));
tXX_i.nrow=tXX_i.data.rows();
@@ -390,12 +390,12 @@
tXX_i = tX * X;
cholesky2_mm(tXX_i, tol_chol);
chinv2_mm(tXX_i);
- beta = tXX_i * (tX * (rdata.Y));
+ beta = tXX_i * (tX * (reg_data.Y));
#endif
// now compute residual variance
sigma2 = 0.;
- mematrix<double> sigma2_matrix = rdata.Y - (X * beta);
+ mematrix<double> sigma2_matrix = reg_data.Y - (X * beta);
#if EIGEN
sigma2 = sigma2_matrix.data.squaredNorm() ;
#else
@@ -431,7 +431,7 @@
#if EIGEN
double intercept = beta.get(0, 0);
- residuals.data= rdata.Y.data.array()-intercept;
+ residuals.data= reg_data.Y.data.array()-intercept;
//matrix.
ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1).array().transpose();
ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)*betacol.matrix().asDiagonal()).rowwise().sum() ;
@@ -443,9 +443,9 @@
//loglik -= halfrecsig2 * residuals[i] * residuals[i];
#else
- for (int i = 0; i < rdata.nids; i++)
+ for (int i = 0; i < reg_data.nids; i++)
{
- double resid = rdata.Y[i] - beta.get(0, 0); // intercept
+ double resid = reg_data.Y[i] - beta.get(0, 0); // intercept
for (int j = 1; j < beta.nrow; j++){
resid -= beta.get(j, 0) * X.get(i, j);
}
@@ -454,7 +454,7 @@
}
#endif
- loglik -= static_cast<double>(rdata.nids) * log(sqrt(sigma2));
+ loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
#if EIGEN
MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).Identity(length_beta,length_beta));
#endif
@@ -576,17 +576,17 @@
}
-void linear_reg::score(mematrix<double>& resid, regdata& rdatain, int verbose,
+void linear_reg::score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
const masked_matrix& invvarmatrix, int nullmodel) {
- regdata rdata = rdatain.get_unmasked_data();
- base_score(resid, rdata, verbose, tol_chol, model, interaction, ngpreds,
+ // regdata rdata = rdatain.get_unmasked_data();
+ base_score(resid, verbose, tol_chol, model, interaction, ngpreds,
invvarmatrix, nullmodel = 0);
}
logistic_reg::logistic_reg(regdata& rdatain) {
- regdata rdata = rdatain.get_unmasked_data();
- int length_beta = (rdata.X).ncol;
+ reg_data = rdatain.get_unmasked_data();
+ int length_beta = reg_data.X.ncol;
beta.reinit(length_beta, 1);
sebeta.reinit(length_beta, 1);
//Han Chen
@@ -595,37 +595,38 @@
covariance.reinit(length_beta - 1, 1);
}
//Oct 26, 2009
- residuals.reinit((rdata.X).nrow, 1);
+ residuals.reinit(reg_data.X.nrow, 1);
sigma2 = -1.;
loglik = -9.999e+32; // should actually be MAX of the corresponding type
niter = -1;
chi2_score = -1.;
}
-void logistic_reg::estimate(regdata& rdatain, int verbose, int maxiter,
+void logistic_reg::estimate( int verbose, int maxiter,
double eps, double tol_chol, int model, int interaction, int ngpreds,
masked_matrix& invvarmatrixin, int robust, int nullmodel) {
// In contrast to the 'linear' case 'invvarmatrix' contains the
// inverse of correlation matrix (not the inverse of var-cov matrix)
// h2.object$InvSigma * h.object2$h2an$estimate[length(h2$h2an$estimate)]
// the inverse of var-cov matrix scaled by total variance
- regdata rdata = rdatain.get_unmasked_data();
+ //regdata rdata = rdatain.get_unmasked_data();
// a lot of code duplicated between linear and logistic...
// e.g. a piece below...
mematrix<double> invvarmatrix;
if (invvarmatrixin.length_of_mask != 0)
{
- invvarmatrixin.update_mask(rdatain.masked_data);
+ invvarmatrixin.update_mask(reg_data.masked_data);
}
-
- mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
- rdata.is_interaction_excluded, false, nullmodel);
+ mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
+ reg_data.is_interaction_excluded, false, nullmodel);
int length_beta = X.ncol;
beta.reinit(length_beta, 1);
sebeta.reinit(length_beta, 1);
//Han Chen
+
if (length_beta > 1)
{
+
if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
{
covariance.reinit(length_beta - 2, 1);
@@ -635,13 +636,14 @@
covariance.reinit(length_beta - 1, 1);
}
}
+
//Oct 26, 2009
mematrix<double> W((X).nrow, 1);
mematrix<double> z((X).nrow, 1);
mematrix<double> tXWX(length_beta, length_beta);
mematrix<double> tXWX_i(length_beta, length_beta);
mematrix<double> tXWz(length_beta, 1);
- double prev = (rdata.Y).column_mean(0);
+ double prev = (reg_data.Y).column_mean(0);
if (prev >= 1. || prev <= 0.)
{
std::cerr << "prevalence not within (0,1)\n";
@@ -652,6 +654,7 @@
beta.put(log(prev / (1. - prev)), 0, 0);
mematrix<double> tX = transpose(X);
+
if (invvarmatrix.nrow != 0 && invvarmatrix.ncol != 0)
{
//TODO(maarten):invvarmatix is symmetric:is there an more effective way?
@@ -684,12 +687,12 @@
double value = emu;
double zval;
value = exp(value) / (1. + exp(value));
- residuals[i] = (rdata.Y).get(i, 0) - value;
+ residuals[i] = (reg_data.Y).get(i, 0) - value;
eMu.put(value, i, 0);
W.put(value * (1. - value), i, 0);
zval = emu
+ (1. / (value * (1. - value)))
- * (((rdata.Y).get(i, 0)) - value);
+ * (((reg_data.Y).get(i, 0)) - value);
z.put(zval, i, 0);
}
@@ -746,7 +749,7 @@
prevlik = loglik;
loglik = 0.;
for (int i = 0; i < eMu.nrow; i++)
- loglik += rdata.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));
+ loglik += reg_data.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));
delta = fabs(1. - (prevlik / loglik));
niter++;
@@ -829,9 +832,9 @@
// exit(1);
}
-void logistic_reg::score(mematrix<double>& resid, regdata& rdata, int verbose,
+void logistic_reg::score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
masked_matrix& invvarmatrix, int nullmodel) {
- base_score(resid, rdata, verbose, tol_chol, model, interaction, ngpreds,
+ base_score(resid, verbose, tol_chol, model, interaction, ngpreds,
invvarmatrix, nullmodel = 0);
}
Modified: branches/ProbABEL-0.50/src/reg1.h
===================================================================
--- branches/ProbABEL-0.50/src/reg1.h 2014-02-03 22:05:56 UTC (rev 1588)
+++ branches/ProbABEL-0.50/src/reg1.h 2014-02-03 22:29:17 UTC (rev 1589)
@@ -49,8 +49,9 @@
double sigma2;
double loglik;
double chi2_score;
+ regdata reg_data;
- void base_score(mematrix<double>& resid, regdata& rdata, int verbose,
+ void base_score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
const masked_matrix& invvarmatrix, int nullmodel);
};
@@ -60,17 +61,18 @@
linear_reg(regdata& rdatain);
~linear_reg()
{
+ delete [] reg_data.masked_data ;
// delete beta;
// delete sebeta;
// delete residuals;
}
- void estimate(regdata& rdatain, int verbose, double tol_chol, int model,
+ void estimate( int verbose, double tol_chol, int model,
int interaction, int ngpreds,
masked_matrix& invvarmatrixin,
int robust, int nullmodel = 0);
- void score(mematrix<double>& resid, regdata& rdatain, int verbose,
+ void score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
const masked_matrix& invvarmatrix, int nullmodel = 0);
};
@@ -82,16 +84,17 @@
logistic_reg(regdata& rdatain);
~logistic_reg()
{
+ delete [] reg_data.masked_data ;
// delete beta;
// delete sebeta;
}
- void estimate(regdata& rdatain, int verbose, int maxiter, double eps,
+ void estimate( int verbose, int maxiter, double eps,
double tol_chol, int model, int interaction, int ngpreds,
masked_matrix& invvarmatrixin, int robust,
int nullmodel = 0);
// just a stupid copy from linear_reg
- void score(mematrix<double>& resid, regdata& rdata, int verbose,
+ void score(mematrix<double>& resid, int verbose,
double tol_chol, int model, int interaction, int ngpreds,
masked_matrix& invvarmatrix, int nullmodel = 0);
};
Modified: branches/ProbABEL-0.50/src/regdata.cpp
===================================================================
--- branches/ProbABEL-0.50/src/regdata.cpp 2014-02-03 22:05:56 UTC (rev 1588)
+++ branches/ProbABEL-0.50/src/regdata.cpp 2014-02-03 22:29:17 UTC (rev 1589)
@@ -175,15 +175,15 @@
}
}
+//
+//regdata::~regdata()
+//{
+// //delete[] regdata::masked_data;
+// // delete X;
+// // delete Y;
+//}
-regdata::~regdata()
-{
- delete[] regdata::masked_data;
- // delete X;
- // delete Y;
-}
-
regdata regdata::get_unmasked_data()
{
regdata to; // = regdata(*this);
Modified: branches/ProbABEL-0.50/src/regdata.h
===================================================================
--- branches/ProbABEL-0.50/src/regdata.h 2014-02-03 22:05:56 UTC (rev 1588)
+++ branches/ProbABEL-0.50/src/regdata.h 2014-02-03 22:29:17 UTC (rev 1589)
@@ -38,7 +38,7 @@
void update_snp(gendata *gend, const int snpnum);
void remove_snp_from_X();
regdata get_unmasked_data();
- ~regdata();
+ //~regdata();
private:
};
More information about the Genabel-commits
mailing list