[GenABEL-dev] [Genabel-commits] r1589 - branches/ProbABEL-0.50/src

L.C. Karssen lennart at karssen.org
Tue Feb 18 13:59:05 CET 2014


Hi Maarten,

This is another even older commit waiting for review.
One question: In the log message you mention this leads to minor memory
leaks. Any idea why? I assume they have been detected by valgrind? Or
was it cppcheck that warns about them?

Or have they been resolved in the mean time?


Thanks,

Lennart.


On 03-02-14 23:29, noreply at r-forge.r-project.org wrote:
> Author: maartenk
> Date: 2014-02-03 23:29:17 +0100 (Mon, 03 Feb 2014)
> New Revision: 1589
> 
> Modified:
>    branches/ProbABEL-0.50/src/main.cpp
>    branches/ProbABEL-0.50/src/reg1.cpp
>    branches/ProbABEL-0.50/src/reg1.h
>    branches/ProbABEL-0.50/src/regdata.cpp
>    branches/ProbABEL-0.50/src/regdata.h
> Log:
> Removes mutiple masking the data per snp in linear and logistic regression. This commit speeds up palinear by ~11%. This introduces minor memory leaks.
> 
> Modified: branches/ProbABEL-0.50/src/main.cpp
> ===================================================================
> --- branches/ProbABEL-0.50/src/main.cpp	2014-02-03 22:05:56 UTC (rev 1588)
> +++ branches/ProbABEL-0.50/src/main.cpp	2014-02-03 22:29:17 UTC (rev 1589)
> @@ -126,7 +126,8 @@
>      std::cout << " loaded null data..." << std::flush;
>  #if LOGISTIC
>      logistic_reg nrd = logistic_reg(nrgd);
> -    nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
> +
> +    nrd.estimate( 0, MAXITER, EPS, CHOLTOL, 0,
>                   input_var.getInteraction(),
>                   input_var.getNgpreds(),
>                   invvarmatrix,
> @@ -138,7 +139,7 @@
>  #if DEBUG
>      std::cout << "[DEBUG] linear_reg nrd = linear_reg(nrgd); DONE.";
>  #endif
> -    nrd.estimate(nrgd, 0, CHOLTOL, 0, input_var.getInteraction(),
> +    nrd.estimate( 0, CHOLTOL, 0, input_var.getInteraction(),
>                   input_var.getNgpreds(), invvarmatrix,
>                   input_var.getRobust(), 1);
>  #elif COXPH
> @@ -272,14 +273,14 @@
>                  logistic_reg rd(rgd);
>                  if (input_var.getScore())
>                  {
> -                    rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
> +                    rd.score(nrd.residuals,  0, CHOLTOL, model,
>                               input_var.getInteraction(),
>                               input_var.getNgpreds(),
>                               invvarmatrix);
>                  }
>                  else
>                  {
> -                    rd.estimate(rgd, 0, MAXITER, EPS, CHOLTOL, model,
> +                    rd.estimate( 0, MAXITER, EPS, CHOLTOL, model,
>                                  input_var.getInteraction(),
>                                  input_var.getNgpreds(),
>                                  invvarmatrix,
> @@ -289,14 +290,14 @@
>                  linear_reg rd(rgd);
>                  if (input_var.getScore())
>                  {
> -                    rd.score(nrd.residuals, rgd, 0, CHOLTOL, model,
> +                    rd.score(nrd.residuals, 0, CHOLTOL, model,
>                               input_var.getInteraction(),
>                               input_var.getNgpreds(),
>                               invvarmatrix);
>                  }
>                  else
>                  {
> -                    rd.estimate(rgd, 0, CHOLTOL, model,
> +                    rd.estimate( 0, CHOLTOL, model,
>                                  input_var.getInteraction(),
>                                  input_var.getNgpreds(),
>                                  invvarmatrix,
> @@ -380,7 +381,7 @@
>                              regdata new_rgd = rgd;
>                              new_rgd.remove_snp_from_X();
>                              linear_reg new_null_rd(new_rgd);
> -                            new_null_rd.estimate(new_rgd, 0,
> +                            new_null_rd.estimate( 0,
>                                                   CHOLTOL, model,
>                                                   input_var.getInteraction(),
>                                                   input_var.getNgpreds(),
> @@ -391,7 +392,7 @@
>                              regdata new_rgd = rgd;
>                              new_rgd.remove_snp_from_X();
>                              logistic_reg new_null_rd(new_rgd);
> -                            new_null_rd.estimate(new_rgd, 0, MAXITER, EPS,
> +                            new_null_rd.estimate( 0, MAXITER, EPS,
>                                                   CHOLTOL, model,
>                                                   input_var.getInteraction(),
>                                                   input_var.getNgpreds(),
> 
> Modified: branches/ProbABEL-0.50/src/reg1.cpp
> ===================================================================
> --- branches/ProbABEL-0.50/src/reg1.cpp	2014-02-03 22:05:56 UTC (rev 1588)
> +++ branches/ProbABEL-0.50/src/reg1.cpp	2014-02-03 22:29:17 UTC (rev 1589)
> @@ -224,10 +224,10 @@
>  }
>  
>  linear_reg::linear_reg(regdata& rdatain) {
> -    regdata rdata = rdatain.get_unmasked_data();
> +    reg_data = rdatain.get_unmasked_data();
>      // std::cout << "linear_reg: " << rdata.nids << " " << (rdata.X).ncol
>      //           << " " << (rdata.Y).ncol << "\n";
> -    int length_beta = (rdata.X).ncol;
> +    int length_beta = (reg_data.X).ncol;
>      beta.reinit(length_beta, 1);
>      sebeta.reinit(length_beta, 1);
>      //Han Chen
> @@ -236,18 +236,18 @@
>          covariance.reinit(length_beta - 1, 1);
>      }
>      //Oct 26, 2009
> -    residuals.reinit(rdata.nids, 1);
> +    residuals.reinit(reg_data.nids, 1);
>      sigma2 = -1.;
>      loglik = -9.999e+32;
>      chi2_score = -1.;
>  }
>  
> -void base_reg::base_score(mematrix<double>& resid, regdata& rdata, int verbose,
> +void base_reg::base_score(mematrix<double>& resid, int verbose,
>          double tol_chol, int model, int interaction, int ngpreds,
>          const masked_matrix& invvarmatrix, int nullmodel) {
> -    mematrix<double> oX = rdata.extract_genotypes();
> +    mematrix<double> oX = reg_data.extract_genotypes();
>      mematrix<double> X = apply_model(oX, model, interaction, ngpreds,
> -            rdata.is_interaction_excluded, false, nullmodel);
> +            reg_data.is_interaction_excluded, false, nullmodel);
>      beta.reinit(X.ncol, 1);
>      sebeta.reinit(X.ncol, 1);
>      double N = static_cast<double>(resid.nrow);
> @@ -286,31 +286,31 @@
>  }
>  
>  
> -void linear_reg::estimate(regdata& rdatain, int verbose, double tol_chol,
> +void linear_reg::estimate( int verbose, double tol_chol,
>          int model, int interaction, int ngpreds, masked_matrix& invvarmatrixin,
>          int robust, int nullmodel) {
>      // suda interaction parameter
>      // model should come here
> -    regdata rdata = rdatain.get_unmasked_data();
> +    //regdata rdata = rdatain.get_unmasked_data();
>  
>      if (verbose)
>      {
> -        cout << rdata.is_interaction_excluded
> +        cout << reg_data.is_interaction_excluded
>                  << " <-rdata.is_interaction_excluded\n";
>          // std::cout << "invvarmatrix:\n";
>          // invvarmatrixin.masked_data->print();
>          std::cout << "rdata.X:\n";
> -        rdata.X.print();
> +        reg_data.X.print();
>      }
>  
> -    mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
> -            rdata.is_interaction_excluded, false, nullmodel);
> +    mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
> +            reg_data.is_interaction_excluded, false, nullmodel);
>      if (verbose)
>      {
>          std::cout << "X:\n";
>          X.print();
>          std::cout << "Y:\n";
> -        rdata.Y.print();
> +        reg_data.Y.print();
>      }
>  
>      int length_beta = X.ncol;
> @@ -337,7 +337,7 @@
>      if (invvarmatrixin.length_of_mask != 0)
>      {
>          //retrieve masked data W
> -        invvarmatrixin.update_mask(rdatain.masked_data);
> +        invvarmatrixin.update_mask(reg_data.masked_data);
>  
>          // This regression is Weighted Least Square: used for mmscore :
>          // FLOPS count are calculated for 3*1000 matrix as follow:
> @@ -353,10 +353,10 @@
>          // use cholesky to invert
>          cholesky2_mm(tXX_i, tol_chol);
>          chinv2_mm(tXX_i);
> -        beta = tXX_i * (tXW * rdata.Y);        // flops 15+5997
> +        beta = tXX_i * (tXW * reg_data.Y);        // flops 15+5997
>          // now compute residual variance
>          sigma2 = 0.;
> -        mematrix<double> sigma2_matrix = rdata.Y - (transpose(tXW) * beta); //flops: 1000+5000
> +        mematrix<double> sigma2_matrix = reg_data.Y - (transpose(tXW) * beta); //flops: 1000+5000
>          for (int i = 0; i < sigma2_matrix.nrow; i++)
>          {
>              double val = sigma2_matrix.get(i, 0);
> @@ -377,7 +377,7 @@
>          int m = X.ncol;
>          MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(X.data.adjoint());
>          Ch=LDLT <MatrixXd>(txx.selfadjointView<Lower>());
> -        beta.data= Ch.solve(X.data.adjoint() * rdata.Y.data);
> +        beta.data= Ch.solve(X.data.adjoint() * reg_data.Y.data);
>  
>         tXX_i.data=Ch.solve(MatrixXd(m, m).Identity(m,m));
>         tXX_i.nrow=tXX_i.data.rows();
> @@ -390,12 +390,12 @@
>                  tXX_i = tX * X;
>                  cholesky2_mm(tXX_i, tol_chol);
>                  chinv2_mm(tXX_i);
> -                beta = tXX_i * (tX * (rdata.Y));
> +                beta = tXX_i * (tX * (reg_data.Y));
>  #endif
>  
>          // now compute residual variance
>          sigma2 = 0.;
> -        mematrix<double> sigma2_matrix = rdata.Y - (X * beta);
> +        mematrix<double> sigma2_matrix = reg_data.Y - (X * beta);
>  #if EIGEN
>          sigma2 = sigma2_matrix.data.squaredNorm() ;
>  #else
> @@ -431,7 +431,7 @@
>  
>  #if EIGEN
>      double intercept = beta.get(0, 0);
> -    residuals.data= rdata.Y.data.array()-intercept;
> +    residuals.data= reg_data.Y.data.array()-intercept;
>      //matrix.
>      ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1).array().transpose();
>      ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)*betacol.matrix().asDiagonal()).rowwise().sum() ;
> @@ -443,9 +443,9 @@
>      //loglik -= halfrecsig2 * residuals[i] * residuals[i];
>  
>  #else
> -    for (int i = 0; i < rdata.nids; i++)
> +    for (int i = 0; i < reg_data.nids; i++)
>       {
> -         double resid = rdata.Y[i] - beta.get(0, 0); // intercept
> +         double resid = reg_data.Y[i] - beta.get(0, 0); // intercept
>           for (int j = 1; j < beta.nrow; j++){
>               resid -= beta.get(j, 0) * X.get(i, j);
>           }
> @@ -454,7 +454,7 @@
>       }
>  #endif
>  
> -    loglik -= static_cast<double>(rdata.nids) * log(sqrt(sigma2));
> +    loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
>  #if EIGEN
>      MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).Identity(length_beta,length_beta));
>  #endif
> @@ -576,17 +576,17 @@
>  
>  }
>  
> -void linear_reg::score(mematrix<double>& resid, regdata& rdatain, int verbose,
> +void linear_reg::score(mematrix<double>& resid, int verbose,
>          double tol_chol, int model, int interaction, int ngpreds,
>          const masked_matrix& invvarmatrix, int nullmodel) {
> -    regdata rdata = rdatain.get_unmasked_data();
> -    base_score(resid, rdata, verbose, tol_chol, model, interaction, ngpreds,
> +   // regdata rdata = rdatain.get_unmasked_data();
> +    base_score(resid, verbose, tol_chol, model, interaction, ngpreds,
>              invvarmatrix, nullmodel = 0);
>  }
>  
>  logistic_reg::logistic_reg(regdata& rdatain) {
> -    regdata rdata = rdatain.get_unmasked_data();
> -    int length_beta = (rdata.X).ncol;
> +    reg_data = rdatain.get_unmasked_data();
> +    int length_beta = reg_data.X.ncol;
>      beta.reinit(length_beta, 1);
>      sebeta.reinit(length_beta, 1);
>      //Han Chen
> @@ -595,37 +595,38 @@
>          covariance.reinit(length_beta - 1, 1);
>      }
>      //Oct 26, 2009
> -    residuals.reinit((rdata.X).nrow, 1);
> +    residuals.reinit(reg_data.X.nrow, 1);
>      sigma2 = -1.;
>      loglik = -9.999e+32; // should actually be MAX of the corresponding type
>      niter = -1;
>      chi2_score = -1.;
>  }
>  
> -void logistic_reg::estimate(regdata& rdatain, int verbose, int maxiter,
> +void logistic_reg::estimate( int verbose, int maxiter,
>          double eps, double tol_chol, int model, int interaction, int ngpreds,
>          masked_matrix& invvarmatrixin, int robust, int nullmodel) {
>      // In contrast to the 'linear' case 'invvarmatrix' contains the
>      // inverse of correlation matrix (not the inverse of var-cov matrix)
>      // h2.object$InvSigma * h.object2$h2an$estimate[length(h2$h2an$estimate)]
>      // the inverse of var-cov matrix scaled by total variance
> -    regdata rdata = rdatain.get_unmasked_data();
> +    //regdata rdata = rdatain.get_unmasked_data();
>      // a lot of code duplicated between linear and logistic...
>      // e.g. a piece below...
>      mematrix<double> invvarmatrix;
>      if (invvarmatrixin.length_of_mask != 0)
>      {
> -        invvarmatrixin.update_mask(rdatain.masked_data);
> +        invvarmatrixin.update_mask(reg_data.masked_data);
>      }
> -
> -    mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
> -            rdata.is_interaction_excluded, false, nullmodel);
> +    mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
> +            reg_data.is_interaction_excluded, false, nullmodel);
>      int length_beta = X.ncol;
>      beta.reinit(length_beta, 1);
>      sebeta.reinit(length_beta, 1);
>      //Han Chen
> +
>      if (length_beta > 1)
>      {
> +
>          if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
>          {
>              covariance.reinit(length_beta - 2, 1);
> @@ -635,13 +636,14 @@
>              covariance.reinit(length_beta - 1, 1);
>          }
>      }
> +
>      //Oct 26, 2009
>      mematrix<double> W((X).nrow, 1);
>      mematrix<double> z((X).nrow, 1);
>      mematrix<double> tXWX(length_beta, length_beta);
>      mematrix<double> tXWX_i(length_beta, length_beta);
>      mematrix<double> tXWz(length_beta, 1);
> -    double prev = (rdata.Y).column_mean(0);
> +    double prev = (reg_data.Y).column_mean(0);
>      if (prev >= 1. || prev <= 0.)
>      {
>          std::cerr << "prevalence not within (0,1)\n";
> @@ -652,6 +654,7 @@
>  
>      beta.put(log(prev / (1. - prev)), 0, 0);
>      mematrix<double> tX = transpose(X);
> +
>      if (invvarmatrix.nrow != 0 && invvarmatrix.ncol != 0)
>      {
>          //TODO(maarten):invvarmatix is symmetric:is there an more effective way?
> @@ -684,12 +687,12 @@
>              double value = emu;
>              double zval;
>              value = exp(value) / (1. + exp(value));
> -            residuals[i] = (rdata.Y).get(i, 0) - value;
> +            residuals[i] = (reg_data.Y).get(i, 0) - value;
>              eMu.put(value, i, 0);
>              W.put(value * (1. - value), i, 0);
>              zval = emu
>                      + (1. / (value * (1. - value)))
> -                            * (((rdata.Y).get(i, 0)) - value);
> +                            * (((reg_data.Y).get(i, 0)) - value);
>              z.put(zval, i, 0);
>          }
>  
> @@ -746,7 +749,7 @@
>          prevlik = loglik;
>          loglik = 0.;
>          for (int i = 0; i < eMu.nrow; i++)
> -            loglik += rdata.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));
> +            loglik += reg_data.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));
>  
>          delta = fabs(1. - (prevlik / loglik));
>          niter++;
> @@ -829,9 +832,9 @@
>      // exit(1);
>  }
>  
> -void logistic_reg::score(mematrix<double>& resid, regdata& rdata, int verbose,
> +void logistic_reg::score(mematrix<double>& resid, int verbose,
>          double tol_chol, int model, int interaction, int ngpreds,
>          masked_matrix& invvarmatrix, int nullmodel) {
> -    base_score(resid, rdata, verbose, tol_chol, model, interaction, ngpreds,
> +    base_score(resid,  verbose, tol_chol, model, interaction, ngpreds,
>              invvarmatrix, nullmodel = 0);
>  }
> 
> Modified: branches/ProbABEL-0.50/src/reg1.h
> ===================================================================
> --- branches/ProbABEL-0.50/src/reg1.h	2014-02-03 22:05:56 UTC (rev 1588)
> +++ branches/ProbABEL-0.50/src/reg1.h	2014-02-03 22:29:17 UTC (rev 1589)
> @@ -49,8 +49,9 @@
>      double sigma2;
>      double loglik;
>      double chi2_score;
> +    regdata reg_data;
>  
> -    void base_score(mematrix<double>& resid, regdata& rdata, int verbose,
> +    void base_score(mematrix<double>& resid,  int verbose,
>              double tol_chol, int model, int interaction, int ngpreds,
>              const masked_matrix& invvarmatrix, int nullmodel);
>  };
> @@ -60,17 +61,18 @@
>      linear_reg(regdata& rdatain);
>      ~linear_reg()
>      {
> +        delete [] reg_data.masked_data ;
>          //		delete beta;
>          //		delete sebeta;
>          //		delete residuals;
>      }
>  
> -    void estimate(regdata& rdatain, int verbose, double tol_chol, int model,
> +    void estimate( int verbose, double tol_chol, int model,
>                    int interaction, int ngpreds,
>                    masked_matrix& invvarmatrixin,
>                    int robust, int nullmodel = 0);
>  
> -    void score(mematrix<double>& resid, regdata& rdatain, int verbose,
> +    void score(mematrix<double>& resid,  int verbose,
>                 double tol_chol, int model, int interaction, int ngpreds,
>                 const masked_matrix& invvarmatrix, int nullmodel = 0);
>  };
> @@ -82,16 +84,17 @@
>      logistic_reg(regdata& rdatain);
>      ~logistic_reg()
>      {
> +        delete [] reg_data.masked_data ;
>          //		delete beta;
>          //		delete sebeta;
>      }
>  
> -    void estimate(regdata& rdatain, int verbose, int maxiter, double eps,
> +    void estimate( int verbose, int maxiter, double eps,
>                    double tol_chol, int model, int interaction, int ngpreds,
>                    masked_matrix& invvarmatrixin, int robust,
>                    int nullmodel = 0);
>      // just a stupid copy from linear_reg
> -    void score(mematrix<double>& resid, regdata& rdata, int verbose,
> +    void score(mematrix<double>& resid,  int verbose,
>                 double tol_chol, int model, int interaction, int ngpreds,
>                 masked_matrix& invvarmatrix, int nullmodel = 0);
>  };
> 
> Modified: branches/ProbABEL-0.50/src/regdata.cpp
> ===================================================================
> --- branches/ProbABEL-0.50/src/regdata.cpp	2014-02-03 22:05:56 UTC (rev 1588)
> +++ branches/ProbABEL-0.50/src/regdata.cpp	2014-02-03 22:29:17 UTC (rev 1589)
> @@ -175,15 +175,15 @@
>      }
>  }
>  
> +//
> +//regdata::~regdata()
> +//{
> +//    //delete[] regdata::masked_data;
> +//    // delete X;
> +//    // delete Y;
> +//}
>  
> -regdata::~regdata()
> -{
> -    delete[] regdata::masked_data;
> -    // delete X;
> -    // delete Y;
> -}
>  
> -
>  regdata regdata::get_unmasked_data()
>  {
>      regdata to;  // = regdata(*this);
> 
> Modified: branches/ProbABEL-0.50/src/regdata.h
> ===================================================================
> --- branches/ProbABEL-0.50/src/regdata.h	2014-02-03 22:05:56 UTC (rev 1588)
> +++ branches/ProbABEL-0.50/src/regdata.h	2014-02-03 22:29:17 UTC (rev 1589)
> @@ -38,7 +38,7 @@
>      void update_snp(gendata *gend, const int snpnum);
>      void remove_snp_from_X();
>      regdata get_unmasked_data();
> -    ~regdata();
> +    //~regdata();
>  
>   private:
>  };
> 
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits
> 

-- 
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
L.C. Karssen
Utrecht
The Netherlands

lennart at karssen.org
http://blog.karssen.org
GPG key ID: A88F554A
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 230 bytes
Desc: OpenPGP digital signature
URL: <http://lists.r-forge.r-project.org/pipermail/genabel-devel/attachments/20140218/f409478a/attachment-0001.sig>


More information about the genabel-devel mailing list