[GenABEL-dev] [Genabel-commits] r1286 - pkg/ProbABEL/src

Yurii Aulchenko yurii.aulchenko at gmail.com
Tue Aug 13 17:53:44 CEST 2013


This is a long-awaited-for improvement! - great work!

----------------------
Yurii Aulchenko
(sent from mobile device)

On 8 Aug 2013, at 15:19, "noreply at r-forge.r-project.org"
<noreply at r-forge.r-project.org> wrote:

> Author: lckarssen
> Date: 2013-08-08 13:19:30 +0200 (Thu, 08 Aug 2013)
> New Revision: 1286
>
> Modified:
>   pkg/ProbABEL/src/eigen_mematrix.cpp
>   pkg/ProbABEL/src/eigen_mematrix.h
>   pkg/ProbABEL/src/main.cpp
>   pkg/ProbABEL/src/mematri1.h
>   pkg/ProbABEL/src/mematrix.h
>   pkg/ProbABEL/src/reg1.cpp
>   pkg/ProbABEL/src/regdata.cpp
>   pkg/ProbABEL/src/regdata.h
> Log:
> Added chi^2 information to the ProbABEL output for linear regression.
> NOTE: for palogist and pacoxph this still needs to be fixed!!!
>
> The chi^2 values are based on the LRT. The null model is calculated at
> the beginning (this was already part of ProbABEL for a long time). In
> the case of missing genotype data the null model is recalculated for
> that SNP only. So for people with imputed data there should be no
> difference in computation time.
>
> This is a bit of a rough implementation. Maybe some more work is
> needed to make it better (in terms of programming style/efficiency).
>
> Changes per file:
> src/main.cpp:
> - Some small (unrelated) changes to the way progress information is printed
> - Changed output precision of beta, se_beta, chi^2 to 6 instead of 9 digits
> - around line 700 is where the recalculation of the null model is done.
> src/regdata.h, src/regdata.cpp:
> - Add a function remove_snp_from_X() that removes the genotype data
>   from the design matrix. This is necessary, because in order to know
>   which individuals have missing genotype data (and therefore should
>   be excluded from the null estimation), we first need to have the
>   genotype data in.
> src/reg1.cpp:
> - At the beginning of apply_model() check if we are calculating the
>   null model. if so, we don't need to apply the genotypic model at
>   all.
> src/eigen_mematrix.h, src/eigen_mematrix.cpp:
> - Implement the delete_column() function. When transitioning to Eigen
>   this function wasn't used anywhere in the code, so it wasn't
>   carried over from the mematrix files.
> src/mematri1.h, src/mematrix.h:
> - Set the col/row number argument to const in the delete_column() and
>   delete_row() functions.
>
>
>
>
>
> Modified: pkg/ProbABEL/src/eigen_mematrix.cpp
> ===================================================================
> --- pkg/ProbABEL/src/eigen_mematrix.cpp    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/eigen_mematrix.cpp    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -362,4 +362,30 @@
>     return temp;
> }
>
> +
> +template<class DT>
> +void mematrix<DT>::delete_column(const int delcol)
> +{
> +    if (delcol > ncol || delcol < 0)
> +    {
> +        fprintf(stderr, "mematrix::delete_column: column out of range\n");
> +        exit(1);
> +    }
> +
> +    // Eigen::Matrix<DT,-1,-1,0,-1,-1> *auxdata =
> +    //     new Eigen::Matrix<DT,-1,-1,0,-1,-1>;
> +    MatrixXd auxdata = data;
> +
> +    data.resize(data.rows(), data.cols()-1);
> +
> +    int rightColsSize = auxdata.cols() - delcol - 1;
> +
> +    data.leftCols(delcol) = auxdata.leftCols(delcol);
> +    data.rightCols(rightColsSize) = auxdata.rightCols(rightColsSize);
> +
> +    ncol--;
> +}
> +
> +
> +
> #endif
>
> Modified: pkg/ProbABEL/src/eigen_mematrix.h
> ===================================================================
> --- pkg/ProbABEL/src/eigen_mematrix.h    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/eigen_mematrix.h    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -37,6 +37,8 @@
>     mematrix operator*(const mematrix &M);
>     mematrix operator*(const mematrix *M);
>
> +    void delete_column(const int delcol);
> +
>     void reinit(int nr, int nc);
>
>     unsigned int getnrow(void)
>
> Modified: pkg/ProbABEL/src/main.cpp
> ===================================================================
> --- pkg/ProbABEL/src/main.cpp    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/main.cpp    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -208,9 +208,9 @@
>                 << input_var.getSep()
>                 << "sebeta_SNP_recA1";
>     *outfile[4] << input_var.getSep()
> -                << "beta_SNP_odom"
> +                << "beta_SNP_odomA1"
>                 << input_var.getSep()
> -                << "sebeta_SNP_odom";
> +                << "sebeta_SNP_odomA1";
>     if (input_var.getInteraction() != 0)
>     {
>         //Han Chen
> @@ -263,7 +263,7 @@
>     *outfile[1] << input_var.getSep() << "chi2_SNP_A1\n";   // "loglik\n";
>     *outfile[2] << input_var.getSep() << "chi2_SNP_domA1\n";// "loglik\n";
>     *outfile[3] << input_var.getSep() << "chi2_SNP_recA1\n";// "loglik\n";
> -    *outfile[4] << input_var.getSep() << "chi2_SNP_odom\n"; // "loglik\n";
> +    *outfile[4] << input_var.getSep() << "chi2_SNP_odomA1\n"; // "loglik\n";
> }
>
> void create_header2(std::vector<std::ofstream*>& outfile, cmdvars& input_var,
> @@ -389,7 +389,7 @@
>
>     masked_matrix invvarmatrix;
>
> -    std::cout << "Reading data ..." << std::flush;
> +    std::cout << "Reading data..." << std::flush;
>     if (input_var.getInverseFilename() != NULL)
>     {
>         loadInvSigma(input_var, phd, invvarmatrix);
> @@ -412,7 +412,7 @@
>                        phd.allmeasured, phd.idnames);
>     }
>
> -    std::cout << " loaded genotypic data ..." << std::flush;
> +    std::cout << " loaded genotypic data..." << std::flush;
>
>     // estimate null model
> #if COXPH
> @@ -421,7 +421,7 @@
>     regdata nrgd = regdata(phd, gtd, -1, input_var.isIsInteractionExcluded());
> #endif
>
> -    std::cout << " loaded null data ..." << std::flush;
> +    std::cout << " loaded null data..." << std::flush;
> #if LOGISTIC
>     logistic_reg nrd = logistic_reg(nrgd);
>     nrd.estimate(nrgd, 0, MAXITER, EPS, CHOLTOL, 0,
> @@ -446,14 +446,14 @@
> #endif
>     double null_loglik = nrd.loglik;
>
> -    std::cout << " estimated null model ...";
> +    std::cout << " estimated null model...";
>     // end null
> #if COXPH
>     coxph_data rgd(phd, gtd, 0);
> #else
>     regdata rgd(phd, gtd, 0, input_var.isIsInteractionExcluded());
> #endif
> -    std::cout << " formed regression object ...";
> +    std::cout << " formed regression object...\n";
>
>
>     // Open a vector of files that will be used for output. Depending
> @@ -505,13 +505,16 @@
>     for (int i = 0; i < maxmod; i++)
>     {
>         beta_sebeta.push_back(new std::ostringstream());
> -        beta_sebeta[i]->precision(9);
> +        beta_sebeta[i]->precision(6);
> +        //*beta_sebeta[i] << scientific;
>         //Han Chen
>         covvalue.push_back(new std::ostringstream());
> -        covvalue[i]->precision(9);
> +        covvalue[i]->precision(6);
> +        //*covvalue[i] << scientific;
>         //Oct 26, 2009
>         chi2.push_back(new std::ostringstream());
> -        chi2[i]->precision(9);
> +        chi2[i]->precision(6);
> +        //*chi2[i] << scientific;
>     }
>
>
> @@ -565,10 +568,10 @@
>             poly = 0;
>         }
>
> +        // Write mlinfo information to the output file(s)
>         // Prob data: All models output. One file per model
>         if (input_var.getNgpreds() == 2)
>         {
> -            // Write mlinfo to output:
>             for (unsigned int file = 0; file < outfile.size(); file++)
>             {
>                 write_mlinfo(outfile, file, mli, csnp, input_var,
> @@ -679,7 +682,7 @@
>                 } // END for(pos = start_pos; pos < rd.beta.nrow; pos++)
>
>
> -                //calculate chi2
> +                //calculate chi^2
>                 //________________________________
>                 //cout <<  rd.loglik<<" "<<input_var.getNgpreds() << "\n";
>
> @@ -690,23 +693,41 @@
>
>                     if (input_var.getScore() == 0)
>                     {
> +                        double loglik = rd.loglik;
>                         if (gcount != gtd.nids)
>                         {
>                             // If SNP data is missing we didn't
>                             // correctly compute the null likelihood
> -                            *chi2[model] << "NaN";
> +
> +                            // Recalculate null likelihood by
> +                            // stripping the SNP data column(s) from
> +                            // the X matrix in the regression object
> +                            // and run the null model estimation again
> +                            // for this SNP.
> +// BEWARE, ONLY IMPLEMENTED FOR LINEAR REG!!!
> +// TODO LCK
> +#ifdef LINEAR
> +                            regdata new_rgd = rgd;
> +                            new_rgd.remove_snp_from_X();
> +                            linear_reg new_null_rd(new_rgd);
> +                            new_null_rd.estimate(new_rgd, 0, CHOLTOL, model,
> +                                                 input_var.getInteraction(),
> +                                                 input_var.getNgpreds(),
> +                                                 invvarmatrix,
> +                                                 input_var.getRobust(), 1);
> +
> +                            *chi2[model] << 2. * (loglik - new_null_rd.loglik);
> +#endif
>                         }
>                         else
>                         {
>                             // No missing SNP data, we can compute the LRT
> -                            *chi2[model] << 2. * (rd.loglik - null_loglik);
> +                            *chi2[model] << 2. * (loglik - null_loglik);
>                         }
> -                        //*chi2[model] << rd.loglik;
>                     } else
>                     {
>                         // We want score test output
>                         *chi2[model] << rd.chi2_score;
> -                        //*chi2[model] << "nan";
>                     }
>                 }
>             } // END first part of if(poly); allele not too rare
>
> Modified: pkg/ProbABEL/src/mematri1.h
> ===================================================================
> --- pkg/ProbABEL/src/mematri1.h    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/mematri1.h    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -301,7 +301,7 @@
> }
>
> template<class DT>
> -void mematrix<DT>::delete_column(int delcol)
> +void mematrix<DT>::delete_column(const int delcol)
> {
>     if (delcol > ncol || delcol < 0)
>     {
> @@ -333,7 +333,7 @@
> }
>
> template<class DT>
> -void mematrix<DT>::delete_row(int delrow)
> +void mematrix<DT>::delete_row(const int delrow)
> {
>     if (delrow > nrow || delrow < 0)
>     {
>
> Modified: pkg/ProbABEL/src/mematrix.h
> ===================================================================
> --- pkg/ProbABEL/src/mematrix.h    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/mematrix.h    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -48,8 +48,8 @@
>     void put(DT value, int nr, int nc);
>     DT column_mean(int nc);
>     void print(void);
> -    void delete_column(int delcol);
> -    void delete_row(int delrow);
> +    void delete_column(const int delcol);
> +    void delete_row(const int delrow);
>
> };
>
>
> Modified: pkg/ProbABEL/src/reg1.cpp
> ===================================================================
> --- pkg/ProbABEL/src/reg1.cpp    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/reg1.cpp    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -4,12 +4,22 @@
> mematrix<double> apply_model(mematrix<double>& X, int model, int interaction,
>                              int ngpreds, bool is_interaction_excluded,
>                              bool iscox, int nullmodel)
> +// if ngpreds==1 (dose data):
> +// model 0 = additive 1 df
> +// if ngpreds==2 (prob data):
> // model 0 = 2 df
> // model 1 = additive 1 df
> // model 2 = dominant 1 df
> // model 3 = recessive 1 df
> // model 4 = over-dominant 1 df
> {
> +    if(nullmodel)
> +    {
> +        // No need to apply any genotypic model when calculating the
> +        // null model
> +        return (X);
> +    }
> +
>     if (model == 0)
>     {
>         if (interaction != 0 && !nullmodel)
> @@ -295,12 +305,13 @@
>     if (verbose)
>     {
>         cout << rdata.is_interaction_excluded
> -                << " <-irdata.is_interaction_excluded\n";
> +             << " <-rdata.is_interaction_excluded\n";
>         // std::cout << "invvarmatrix:\n";
>         // invvarmatrixin.masked_data->print();
>         std::cout << "rdata.X:\n";
>         rdata.X.print();
>     }
> +
>     mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
>                                      rdata.is_interaction_excluded, false,
>                                      nullmodel);
> @@ -311,6 +322,7 @@
>         std::cout << "Y:\n";
>         rdata.Y.print();
>     }
> +
>     int length_beta = X.ncol;
>     beta.reinit(length_beta, 1);
>     sebeta.reinit(length_beta, 1);
>
> Modified: pkg/ProbABEL/src/regdata.cpp
> ===================================================================
> --- pkg/ProbABEL/src/regdata.cpp    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/regdata.cpp    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -39,7 +39,7 @@
>
>     for (int i = 0; i < nids; i++)
>     {
> -        masked_data[i] = 0;
> +        masked_data[i] = obj.masked_data[i];
>     }
> }
>
> @@ -95,6 +95,9 @@
>
> void regdata::update_snp(gendata &gend, int snpnum)
> {
> +    // Add genotypic data (dosage or probabilities) to the design
> +    // matrix X.
> +
>     for (int j = 0; j < ngpreds; j++)
>     {
>         double snpdata[nids];
> @@ -109,11 +112,34 @@
>         {
>             X.put(snpdata[i], i, (ncov - j));
>             if (isnan(snpdata[i]))
> +            {
>                 masked_data[i] = 1;
> +            }
>         }
>     }
> }
>
> +void regdata::remove_snp_from_X()
> +{
> +    // update_snp() adds SNP information to the design matrix. This
> +    // function allows you to strip that information from X again.
> +    // This is used for example when calculating the null model.
> +
> +    if(ngpreds == 1)
> +    {
> +        X.delete_column(X.ncol -1);
> +    }
> +    else if(ngpreds == 2)
> +    {
> +        X.delete_column(X.ncol -1);
> +        X.delete_column(X.ncol -1);
> +    }
> +    else
> +    {
> +        cerr << "ngpreds should be 1 or 2. you should never come here!\n";
> +    }
> +}
> +
> regdata::~regdata()
> {
>     delete[] regdata::masked_data;
>
> Modified: pkg/ProbABEL/src/regdata.h
> ===================================================================
> --- pkg/ProbABEL/src/regdata.h    2013-08-08 10:07:32 UTC (rev 1285)
> +++ pkg/ProbABEL/src/regdata.h    2013-08-08 11:19:30 UTC (rev 1286)
> @@ -34,6 +34,7 @@
>             bool ext_is_interaction_excluded);
>     mematrix<double> extract_genotypes();
>     void update_snp(gendata &gend, int snpnum);
> +    void remove_snp_from_X();
>     regdata get_unmasked_data();
>     ~regdata();
>
>
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits


More information about the genabel-devel mailing list