[GenABEL-dev] [Genabel-commits] r1611 - in branches/ProbABEL-0.50: checks/R-tests examples src

Tue Feb 18 13:56:15 CET 2014

Hi Maarten,

I found this commit e-mail in my inbox still waiting for review.

Please have a look at the comments below.

Thanks a lot for your contributions,

Lennart.

On 14-02-14 15:14, noreply at r-forge.r-project.org wrote:
> Author: maartenk
> Date: 2014-02-14 15:14:21 +0100 (Fri, 14 Feb 2014)
> New Revision: 1611
> 
> Modified:
>    branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
>    branches/ProbABEL-0.50/examples/mmscore.R
>    branches/ProbABEL-0.50/src/reg1.cpp
> Log:
> -removed unnecessary lines from examples/mmscore.R

Nice! Having this lying around in the examples would only confuse a user.

> -Disabled in run_models_in_R_palinear.R last snp in 2df model because 2 variables were completely anti correlated. There should be a check on Rank of Cholesky decompositions for this kind of problem

I agree that such a check would be appropriate. Doesn't EIGEN give a
warning? Maybe someone else knows how we can tell R's lm() or glm() to
do these kinds of checks?

> -rewrote mmscore weighted regression to EIGEN form for palinear
> 
> Modified: branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
> ===================================================================
> --- branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R	2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R	2014-02-14 14:14:21 UTC (rev 1611)
> @@ -123,7 +123,7 @@
>  }
>  colnames(prob.2df.R) <- cols2df
>  rownames(prob.2df.R) <- NULL
> -stopifnot( all.equal(prob.2df.PA, prob.2df.R, tol=tol) )
> +#stopifnot( all.equal(prob.2df.PA1[1:5,], prob.2df.R[1:5,], tol=tol) )
>  cat("2df\n")
>  
>  cat("\t\t\t\t\t\tOK\n")
> 
> Modified: branches/ProbABEL-0.50/examples/mmscore.R
> ===================================================================
> --- branches/ProbABEL-0.50/examples/mmscore.R	2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/examples/mmscore.R	2014-02-14 14:14:21 UTC (rev 1611)
> @@ -90,83 +90,3 @@
>  
>  ## Mow, go to ProbABEL and start analysis
>  
> -
> -
> -
> -
> -##____________________________________________________
> -## The following part is for historic purposes only. It is not
> -## necessary for using the --mmscore option of ProbABEL's palinear.
> -
> -## Create test file with genotypes
> -
> -data_cut   <- data[, snps]
> -gen_table  <- as.numeric(data_cut)
> -prob_table <- matrix()
> -
> -#Replace NA by mean for each snp. NA is forbiden in genotypes in ProbABEL input (!).
> -#for(snpnum in 1:dim(gen_table)[2])
> -#	{
> -#	mean <- mean(gen_table[,snpnum], na.rm=T)
> -#	gen_table[is.na(gen_table[,snpnum]),snpnum]	<- mean
> -#	}
> -
> -
> -gen_table_df        <- data.frame(gen_table)
> -gen_table_df$MLDOSE <- gen_table_df[, 1]
> -gen_table_df[,1]    <- "MLDOSE"
> -colnam <- colnames(gen_table_df)
> -
> -#colnam[-c(1:length(colnam)-1)] <- colnam[1]
> -#colnam[1] <- "MLDOSE"
> -
> -colnam[length(colnam)] <- colnam[1]
> -colnam[1]              <- "MLDOSE"
> -colnames(gen_table_df) <- colnam
> -
> -
> -rownames <- rownames(gen_table_df)
> -rownames <- paste("1->", rownames, sep="")
> -rownames(gen_table_df) <- rownames
> -
> -write.table(file="mmscore_gen.mldose",
> -            gen_table_df,
> -            row.names=TRUE,
> -            col.names=FALSE,
> -            quote=FALSE,
> -            na="NaN")
> -
> -mlinfo <- data.frame(SNP=colnam[2:length(colnam)])
> -mlinfo$Al1 <- "A"
> -mlinfo$Al2 <- "B"
> -mlinfo$Freq1 <- 0.5847
> -mlinfo$MAF <- 0.5847
> -mlinfo$Quality <- 0.5847
> -mlinfo$Rsq <- 0.5847
> -
> -write.table(mlinfo, "mmscore_gen.mlinfo", row.names=FALSE, quote=FALSE)
> -
> -## arrange probability-file
> -prob_table <- matrix(NA,
> -                     ncol=(dim(gen_table_df)[2]-1) * 2,
> -                     nrow=dim(gen_table_df)[1])
> -j <- 1
> -for (i in (1:(dim(gen_table_df)[2]-1))) {
> -    prob_table[,j] <- rep(0, dim(prob_table)[1])
> -    prob_table[gen_table_df[,i+1]==2, j] <- 1
> -    prob_table[is.na(gen_table_df[, i+1]), j] <- NA
> -    j <- j + 1
> -    prob_table[, j] <- rep(0,dim(prob_table)[1])
> -    prob_table[gen_table_df[, i+1]==1, j] <- 1
> -    prob_table[is.na(gen_table_df[, i+1]),j] <- NA
> -    j <- j + 1
> -}
> -prob_table_df <- data.frame(MLPROB="MLPROB", prob_table)
> -rownames(prob_table_df) <- rownames(gen_table_df)
> -
> -write.table(file="mmscore_gen.mlprob",
> -            prob_table_df,
> -            row.names=TRUE,
> -            col.names=FILE,
> -            quote=FILE,
> -            na="NaN")
> 
> Modified: branches/ProbABEL-0.50/src/reg1.cpp
> ===================================================================
> --- branches/ProbABEL-0.50/src/reg1.cpp	2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/src/reg1.cpp	2014-02-14 14:14:21 UTC (rev 1611)
> @@ -355,9 +355,11 @@
>      }
>  
>      double sigma2_internal;
> -    mematrix<double> tXX_i;
> +
>  #if EIGEN
>      LDLT <MatrixXd> Ch ;
> +#else
> +    mematrix<double> tXX_i;
>  #endif
>      if (invvarmatrixin.length_of_mask != 0)
>      {
> @@ -369,12 +371,16 @@
>          //C=AB (m X n matrix A and n x P matrix B)
>          //flops=mp(2n-1) (when n is big enough flops=mpn2)
>          //Oct 26, 2009
> +
> +#if EIGEN
> +        MatrixXd tXW = X.data.transpose() * invvarmatrixin.masked_data->data; // flops 5997000
> +        Ch = LDLT <MatrixXd>(tXW * X.data); // 17991 flops
> +        beta.data = Ch.solve(tXW * reg_data.Y.data);
> +        sigma2 = (reg_data.Y.data - tXW.transpose() * beta.data).squaredNorm() ; //flops: 1000+5000+3000
> +#else
> +
>  

Are you sure you want to keep these flops values here? I guess they are
no longer accurate when using EIGEN.

>        mematrix<double> tXW = transpose(X) * invvarmatrixin.masked_data; // flops 5997000
>          tXX_i = tXW * X;        // 17991 flops
> -#if EIGEN
> -        Ch=LDLT <MatrixXd>(tXX_i.data.selfadjointView<Lower>());
> -#endif
> -
>          // use cholesky to invert
>          cholesky2_mm(tXX_i, tol_chol);
>          chinv2_mm(tXX_i);
> @@ -387,6 +393,7 @@
>              double val = sigma2_matrix.get(i, 0);
>              sigma2 += val * val; // flops: 3000
>          }
> +#endif
>          double N = X.nrow;
>          //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
>          // Ugly fix to the fact that if we do mmscore, sigma2 is already
> @@ -400,15 +407,12 @@
>      {
>  #if EIGEN
>          int m = X.ncol;
> -        MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(X.data.adjoint());
> +        MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().\
> +                rankUpdate(X.data.adjoint());
>          Ch=LDLT <MatrixXd>(txx.selfadjointView<Lower>());
>          beta.data= Ch.solve(X.data.adjoint() * reg_data.Y.data);
> +        sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm() ;
>  
> -       tXX_i.data=Ch.solve(MatrixXd(m, m).Identity(m,m));
> -       tXX_i.nrow=tXX_i.data.rows();
> -       tXX_i.ncol=tXX_i.data.cols();
> -       tXX_i.nelements=tXX_i.ncol*tXX_i.nrow;
> -
>  #else
>          mematrix<double> tX = transpose(X);
>          // use cholesky to invert
> @@ -416,14 +420,10 @@
>                  cholesky2_mm(tXX_i, tol_chol);
>                  chinv2_mm(tXX_i);
>                  beta = tXX_i * (tX * (reg_data.Y));
> -#endif
>  
>          // now compute residual variance
>          sigma2 = 0.;
>          mematrix<double> sigma2_matrix = reg_data.Y - (X * beta);
> -#if EIGEN
> -        sigma2 = sigma2_matrix.data.squaredNorm() ;
> -#else
>          for (int i = 0; i < sigma2_matrix.nrow; i++)
>          {
>              double val = sigma2_matrix.get(i, 0);
> @@ -458,8 +458,10 @@
>      double intercept = beta.get(0, 0);
>      residuals.data= reg_data.Y.data.array()-intercept;
>      //matrix.
> -    ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1).array().transpose();
> -    ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)*betacol.matrix().asDiagonal()).rowwise().sum() ;
> +    ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1)\
> +            .array().transpose();
> +    ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)\
> +            *betacol.matrix().asDiagonal()).rowwise().sum() ;
>      //std::cout << resid_sub << std::endl;
>      residuals.data-=resid_sub.matrix();
>      //residuals[i] -= resid_sub;
> @@ -481,15 +483,18 @@
>  
>      loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
>  #if EIGEN
> -    MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).Identity(length_beta,length_beta));
> +    MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).
> +            Identity(length_beta,length_beta));
>  #endif
>  
>      mematrix<double> robust_sigma2(X.ncol, X.ncol);
>      if (robust)
>      {
>  #if EIGEN
> -        MatrixXd Xresiduals = X.data.array().colwise()*residuals.data.col(0).array();
> -        MatrixXd  XbyR = MatrixXd(X.ncol, X.ncol).setZero().selfadjointView<Lower>().rankUpdate(Xresiduals.adjoint());
> +        MatrixXd Xresiduals = X.data.array().colwise()\
> +                *residuals.data.col(0).array();
> +        MatrixXd  XbyR = MatrixXd(X.ncol, X.ncol).setZero()\
> +                .selfadjointView<Lower>().rankUpdate(Xresiduals.adjoint());
>          robust_sigma2.data= tXX_inv*XbyR *tXX_inv;
>  #else
>  
> 
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits
> 

Best regards,

Lennart.

-- 
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
L.C. Karssen
Utrecht
The Netherlands

lennart at karssen.org
http://blog.karssen.org
GPG key ID: A88F554A
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 230 bytes
Desc: OpenPGP digital signature
URL: <http://lists.r-forge.r-project.org/pipermail/genabel-devel/attachments/20140218/4c2656fe/attachment.sig>