[GenABEL-dev] [Genabel-commits] r1611 - in branches/ProbABEL-0.50: checks/R-tests examples src
L.C. Karssen
lennart at karssen.org
Tue Feb 18 13:56:15 CET 2014
Hi Maarten,
I found this commit e-mail in my inbox still waiting for review.
Please have a look at the comments below.
Thanks a lot for your contributions,
Lennart.
On 14-02-14 15:14, noreply at r-forge.r-project.org wrote:
> Author: maartenk
> Date: 2014-02-14 15:14:21 +0100 (Fri, 14 Feb 2014)
> New Revision: 1611
>
> Modified:
> branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
> branches/ProbABEL-0.50/examples/mmscore.R
> branches/ProbABEL-0.50/src/reg1.cpp
> Log:
> -removed unnecessary lines from examples/mmscore.R
Nice! Having this lying around in the examples would only confuse a user.
> -Disabled in run_models_in_R_palinear.R last snp in 2df model because 2 variables were completely anti correlated. There should be a check on Rank of Cholesky decompositions for this kind of problem
I agree that such a check would be appropriate. Doesn't EIGEN give a
warning? Maybe someone else knows how we can tell R's lm() or glm() to
do these kinds of checks?
> -rewrote mmscore weighted regression to EIGEN form for palinear
>
> Modified: branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R
> ===================================================================
> --- branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R 2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/checks/R-tests/run_models_in_R_palinear.R 2014-02-14 14:14:21 UTC (rev 1611)
> @@ -123,7 +123,7 @@
> }
> colnames(prob.2df.R) <- cols2df
> rownames(prob.2df.R) <- NULL
> -stopifnot( all.equal(prob.2df.PA, prob.2df.R, tol=tol) )
> +#stopifnot( all.equal(prob.2df.PA1[1:5,], prob.2df.R[1:5,], tol=tol) )
> cat("2df\n")
>
> cat("\t\t\t\t\t\tOK\n")
>
> Modified: branches/ProbABEL-0.50/examples/mmscore.R
> ===================================================================
> --- branches/ProbABEL-0.50/examples/mmscore.R 2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/examples/mmscore.R 2014-02-14 14:14:21 UTC (rev 1611)
> @@ -90,83 +90,3 @@
>
> ## Mow, go to ProbABEL and start analysis
>
> -
> -
> -
> -
> -##____________________________________________________
> -## The following part is for historic purposes only. It is not
> -## necessary for using the --mmscore option of ProbABEL's palinear.
> -
> -## Create test file with genotypes
> -
> -data_cut <- data[, snps]
> -gen_table <- as.numeric(data_cut)
> -prob_table <- matrix()
> -
> -#Replace NA by mean for each snp. NA is forbiden in genotypes in ProbABEL input (!).
> -#for(snpnum in 1:dim(gen_table)[2])
> -# {
> -# mean <- mean(gen_table[,snpnum], na.rm=T)
> -# gen_table[is.na(gen_table[,snpnum]),snpnum] <- mean
> -# }
> -
> -
> -gen_table_df <- data.frame(gen_table)
> -gen_table_df$MLDOSE <- gen_table_df[, 1]
> -gen_table_df[,1] <- "MLDOSE"
> -colnam <- colnames(gen_table_df)
> -
> -#colnam[-c(1:length(colnam)-1)] <- colnam[1]
> -#colnam[1] <- "MLDOSE"
> -
> -colnam[length(colnam)] <- colnam[1]
> -colnam[1] <- "MLDOSE"
> -colnames(gen_table_df) <- colnam
> -
> -
> -rownames <- rownames(gen_table_df)
> -rownames <- paste("1->", rownames, sep="")
> -rownames(gen_table_df) <- rownames
> -
> -write.table(file="mmscore_gen.mldose",
> - gen_table_df,
> - row.names=TRUE,
> - col.names=FALSE,
> - quote=FALSE,
> - na="NaN")
> -
> -mlinfo <- data.frame(SNP=colnam[2:length(colnam)])
> -mlinfo$Al1 <- "A"
> -mlinfo$Al2 <- "B"
> -mlinfo$Freq1 <- 0.5847
> -mlinfo$MAF <- 0.5847
> -mlinfo$Quality <- 0.5847
> -mlinfo$Rsq <- 0.5847
> -
> -write.table(mlinfo, "mmscore_gen.mlinfo", row.names=FALSE, quote=FALSE)
> -
> -## arrange probability-file
> -prob_table <- matrix(NA,
> - ncol=(dim(gen_table_df)[2]-1) * 2,
> - nrow=dim(gen_table_df)[1])
> -j <- 1
> -for (i in (1:(dim(gen_table_df)[2]-1))) {
> - prob_table[,j] <- rep(0, dim(prob_table)[1])
> - prob_table[gen_table_df[,i+1]==2, j] <- 1
> - prob_table[is.na(gen_table_df[, i+1]), j] <- NA
> - j <- j + 1
> - prob_table[, j] <- rep(0,dim(prob_table)[1])
> - prob_table[gen_table_df[, i+1]==1, j] <- 1
> - prob_table[is.na(gen_table_df[, i+1]),j] <- NA
> - j <- j + 1
> -}
> -prob_table_df <- data.frame(MLPROB="MLPROB", prob_table)
> -rownames(prob_table_df) <- rownames(gen_table_df)
> -
> -write.table(file="mmscore_gen.mlprob",
> - prob_table_df,
> - row.names=TRUE,
> - col.names=FILE,
> - quote=FILE,
> - na="NaN")
>
> Modified: branches/ProbABEL-0.50/src/reg1.cpp
> ===================================================================
> --- branches/ProbABEL-0.50/src/reg1.cpp 2014-02-14 07:58:43 UTC (rev 1610)
> +++ branches/ProbABEL-0.50/src/reg1.cpp 2014-02-14 14:14:21 UTC (rev 1611)
> @@ -355,9 +355,11 @@
> }
>
> double sigma2_internal;
> - mematrix<double> tXX_i;
> +
> #if EIGEN
> LDLT <MatrixXd> Ch ;
> +#else
> + mematrix<double> tXX_i;
> #endif
> if (invvarmatrixin.length_of_mask != 0)
> {
> @@ -369,12 +371,16 @@
> //C=AB (m X n matrix A and n x P matrix B)
> //flops=mp(2n-1) (when n is big enough flops=mpn2)
> //Oct 26, 2009
> +
> +#if EIGEN
> + MatrixXd tXW = X.data.transpose() * invvarmatrixin.masked_data->data; // flops 5997000
> + Ch = LDLT <MatrixXd>(tXW * X.data); // 17991 flops
> + beta.data = Ch.solve(tXW * reg_data.Y.data);
> + sigma2 = (reg_data.Y.data - tXW.transpose() * beta.data).squaredNorm() ; //flops: 1000+5000+3000
> +#else
> +
>
Are you sure you want to keep these flops values here? I guess they are
no longer accurate when using EIGEN.
> mematrix<double> tXW = transpose(X) * invvarmatrixin.masked_data; // flops 5997000
> tXX_i = tXW * X; // 17991 flops
> -#if EIGEN
> - Ch=LDLT <MatrixXd>(tXX_i.data.selfadjointView<Lower>());
> -#endif
> -
> // use cholesky to invert
> cholesky2_mm(tXX_i, tol_chol);
> chinv2_mm(tXX_i);
> @@ -387,6 +393,7 @@
> double val = sigma2_matrix.get(i, 0);
> sigma2 += val * val; // flops: 3000
> }
> +#endif
> double N = X.nrow;
> //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
> // Ugly fix to the fact that if we do mmscore, sigma2 is already
> @@ -400,15 +407,12 @@
> {
> #if EIGEN
> int m = X.ncol;
> - MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(X.data.adjoint());
> + MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().\
> + rankUpdate(X.data.adjoint());
> Ch=LDLT <MatrixXd>(txx.selfadjointView<Lower>());
> beta.data= Ch.solve(X.data.adjoint() * reg_data.Y.data);
> + sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm() ;
>
> - tXX_i.data=Ch.solve(MatrixXd(m, m).Identity(m,m));
> - tXX_i.nrow=tXX_i.data.rows();
> - tXX_i.ncol=tXX_i.data.cols();
> - tXX_i.nelements=tXX_i.ncol*tXX_i.nrow;
> -
> #else
> mematrix<double> tX = transpose(X);
> // use cholesky to invert
> @@ -416,14 +420,10 @@
> cholesky2_mm(tXX_i, tol_chol);
> chinv2_mm(tXX_i);
> beta = tXX_i * (tX * (reg_data.Y));
> -#endif
>
> // now compute residual variance
> sigma2 = 0.;
> mematrix<double> sigma2_matrix = reg_data.Y - (X * beta);
> -#if EIGEN
> - sigma2 = sigma2_matrix.data.squaredNorm() ;
> -#else
> for (int i = 0; i < sigma2_matrix.nrow; i++)
> {
> double val = sigma2_matrix.get(i, 0);
> @@ -458,8 +458,10 @@
> double intercept = beta.get(0, 0);
> residuals.data= reg_data.Y.data.array()-intercept;
> //matrix.
> - ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1).array().transpose();
> - ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)*betacol.matrix().asDiagonal()).rowwise().sum() ;
> + ArrayXXd betacol = beta.data.block(1,0,beta.data.rows()-1,1)\
> + .array().transpose();
> + ArrayXXd resid_sub = (X.data.block(0,1,X.data.rows(),X.data.cols()-1)\
> + *betacol.matrix().asDiagonal()).rowwise().sum() ;
> //std::cout << resid_sub << std::endl;
> residuals.data-=resid_sub.matrix();
> //residuals[i] -= resid_sub;
> @@ -481,15 +483,18 @@
>
> loglik -= static_cast<double>(reg_data.nids) * log(sqrt(sigma2));
> #if EIGEN
> - MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).Identity(length_beta,length_beta));
> + MatrixXd tXX_inv=Ch.solve(MatrixXd(length_beta, length_beta).
> + Identity(length_beta,length_beta));
> #endif
>
> mematrix<double> robust_sigma2(X.ncol, X.ncol);
> if (robust)
> {
> #if EIGEN
> - MatrixXd Xresiduals = X.data.array().colwise()*residuals.data.col(0).array();
> - MatrixXd XbyR = MatrixXd(X.ncol, X.ncol).setZero().selfadjointView<Lower>().rankUpdate(Xresiduals.adjoint());
> + MatrixXd Xresiduals = X.data.array().colwise()\
> + *residuals.data.col(0).array();
> + MatrixXd XbyR = MatrixXd(X.ncol, X.ncol).setZero()\
> + .selfadjointView<Lower>().rankUpdate(Xresiduals.adjoint());
> robust_sigma2.data= tXX_inv*XbyR *tXX_inv;
> #else
>
>
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits
>
Best regards,
Lennart.
--
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
L.C. Karssen
Utrecht
The Netherlands
lennart at karssen.org
http://blog.karssen.org
GPG key ID: A88F554A
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 230 bytes
Desc: OpenPGP digital signature
URL: <http://lists.r-forge.r-project.org/pipermail/genabel-devel/attachments/20140218/4c2656fe/attachment.sig>
More information about the genabel-devel
mailing list