[Genabel-commits] r1291 - in pkg/ProbABEL: src tests/R-tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Aug 13 00:30:30 CEST 2013
Author: lckarssen
Date: 2013-08-13 00:30:29 +0200 (Tue, 13 Aug 2013)
New Revision: 1291
Modified:
pkg/ProbABEL/src/coxph_data.cpp
pkg/ProbABEL/src/coxph_data.h
pkg/ProbABEL/src/eigen_mematrix.cpp
pkg/ProbABEL/src/eigen_mematrix.h
pkg/ProbABEL/src/main.cpp
pkg/ProbABEL/src/regdata.cpp
pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R
Log:
Added LRT-based chi^2 values to the output of pacoxph. See the commit message of r1286 for more info.
Changes per file:
src/main.cpp:
- Added null estimation call for pacoxph so that LRT is estimated correctly in case of missing data.
src/coxph_data.cpp, src/coxph_data.h:
- fix in copy constructor: masked data was set to 0 instead of copied.
- added check to see if call to coxfit2 was succesful
- added the remove_snp_from_X() function. Because X is transposed for the call to coxfit2() this necessitates the delete_row() \
function in eigen_mematrix.*
src/eigen_mematrix.cpp, src/eigen_mematrix.h:
- Added delete_row() function
- Removed some commented leftovers from the delete_column() function
src/regdata.cpp:
- minor change in error message to align it with its brother in coxph_data.cpp
tests/R-tests/run_models_in_R_pacox.R:
- Created checks for Cox regression and pacoxph output. Structure is now similar to run_models_in_R_palinear.R
Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================
--- pkg/ProbABEL/src/coxph_data.cpp 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/coxph_data.cpp 2013-08-12 22:30:29 UTC (rev 1291)
@@ -59,7 +59,7 @@
for (int i = 0; i < nids; i++)
{
- masked_data[i] = 0;
+ masked_data[i] = obj.masked_data[i];
}
}
@@ -168,8 +168,10 @@
strata = reorder(strata, order);
offset = reorder(offset, order);
X = reorder(X, order);
- X = transpose(X);
+ // The coxfit2() function expects data in column major order.
+ X = transpose(X);
+
// X.print();
// offset.print();
// weights.print();
@@ -212,6 +214,30 @@
}
}
+
+void coxph_data::remove_snp_from_X()
+{
+ // update_snp() adds SNP information to the design matrix. This
+ // function allows you to strip that information from X again.
+ // This is used for example when calculating the null model.
+
+ if(ngpreds == 1)
+ {
+ X.delete_row(X.nrow -1);
+ }
+ else if(ngpreds == 2)
+ {
+ X.delete_row(X.nrow -1);
+ X.delete_row(X.nrow -1);
+ }
+ else
+ {
+ cerr << "Error: ngpreds should be 1 or 2. "
+ << "You should never come here!\n";
+ }
+}
+
+
coxph_data::~coxph_data()
{
delete[] coxph_data::masked_data;
@@ -283,7 +309,7 @@
coxph_data cdata = cdatain.get_unmasked_data();
beta.reinit(cdata.X.nrow, 1);
sebeta.reinit(cdata.X.nrow, 1);
- loglik = -9.999e+32;
+ loglik = - INFINITY;
sigma2 = -1.;
chi2_score = -1.;
niter = 0;
@@ -295,6 +321,7 @@
int nullmodel)
{
coxph_data cdata = cdatain.get_unmasked_data();
+
mematrix<double> X = t_apply_model(cdata.X, model, interaction, ngpreds,
iscox, nullmodel);
@@ -312,6 +339,7 @@
mematrix<double> u(X.nrow, 1);
mematrix<double> imat(X.nrow, X.nrow);
+
double work[X.ncol * 2 + 2 * (X.nrow) * (X.nrow) + 3 * (X.nrow)];
double loglik_int[2];
int flag;
@@ -335,6 +363,11 @@
&sctest);
#endif
+ if (flag == 1000)
+ {
+ cerr << "Error: Cox regression did not converge\n";
+ }
+
for (int i = 0; i < X.nrow; i++)
{
sebeta[i] = sqrt(imat.get(i, i));
Modified: pkg/ProbABEL/src/coxph_data.h
===================================================================
--- pkg/ProbABEL/src/coxph_data.h 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/coxph_data.h 2013-08-12 22:30:29 UTC (rev 1291)
@@ -35,6 +35,7 @@
coxph_data(const coxph_data &obj);
coxph_data(phedata &phed, gendata &gend, int snpnum);
void update_snp(gendata &gend, int snpnum);
+ void remove_snp_from_X();
~coxph_data();
int nids;
Modified: pkg/ProbABEL/src/eigen_mematrix.cpp
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.cpp 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/eigen_mematrix.cpp 2013-08-12 22:30:29 UTC (rev 1291)
@@ -372,10 +372,7 @@
exit(1);
}
- // Eigen::Matrix<DT,-1,-1,0,-1,-1> *auxdata =
- // new Eigen::Matrix<DT,-1,-1,0,-1,-1>;
MatrixXd auxdata = data;
-
data.resize(data.rows(), data.cols()-1);
int rightColsSize = auxdata.cols() - delcol - 1;
@@ -387,5 +384,27 @@
}
+template<class DT>
+void mematrix<DT>::delete_row(const int delrow)
+{
+ if (delrow > nrow || delrow < 0)
+ {
+ fprintf(stderr, "mematrix::delete_row: row out of range\n");
+ exit(1);
+ }
+
+ MatrixXd auxdata = data;
+ data.resize(data.rows()-1, data.cols());
+
+ int bottomRowsSize = auxdata.rows() - delrow - 1;
+
+ data.topRows(delrow) = auxdata.topRows(delrow);
+ data.bottomRows(bottomRowsSize) = auxdata.bottomRows(bottomRowsSize);
+
+ nrow--;
+}
+
+
+
#endif
Modified: pkg/ProbABEL/src/eigen_mematrix.h
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.h 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/eigen_mematrix.h 2013-08-12 22:30:29 UTC (rev 1291)
@@ -38,6 +38,7 @@
mematrix operator*(const mematrix *M);
void delete_column(const int delcol);
+ void delete_row(const int delrow);
void reinit(int nr, int nc);
Modified: pkg/ProbABEL/src/main.cpp
===================================================================
--- pkg/ProbABEL/src/main.cpp 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/main.cpp 2013-08-12 22:30:29 UTC (rev 1291)
@@ -716,8 +716,17 @@
invvarmatrix,
input_var.getRobust(), 1);
+#elif COXPH
+ coxph_data new_rgd = rgd;
+ new_rgd.remove_snp_from_X();
+ coxph_reg new_null_rd(new_rgd);
+ new_null_rd.estimate(new_rgd, 0, MAXITER, EPS,
+ CHOLTOL, model,
+ input_var.getInteraction(),
+ input_var.getNgpreds(),
+ true, 1);
+#endif
*chi2[model] << 2. * (loglik - new_null_rd.loglik);
-#endif
}
else
{
Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/regdata.cpp 2013-08-12 22:30:29 UTC (rev 1291)
@@ -136,7 +136,8 @@
}
else
{
- cerr << "ngpreds should be 1 or 2. you should never come here!\n";
+ cerr << "Error: ngpreds should be 1 or 2. "
+ << "You should never come here!\n";
}
}
Modified: pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R
===================================================================
--- pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R 2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R 2013-08-12 22:30:29 UTC (rev 1291)
@@ -1,31 +1,39 @@
+cat("Checking Cox PH regression...\n")
library(survival)
+## Set tolerance for comparing various outputs
+tol <- 1e-5
+
###
# load the data
###
+example.path <- "../../examples/"
# load phenotypic data
pheno <- read.table("../../examples/coxph_data.txt", head=TRUE,
string=FALSE)
# load genetic DOSE data
-dose <- read.table("../../examples/test.mldose", head=FALSE,
- string=FALSE)
+dose <- read.table("../../examples/test.mldose",
+ head=FALSE, string=FALSE)
# remove "1->" from the names of dose-IDs
-idNames <- dose[, 1]
-idNames <- sub("[0-9]+->", "", idNames)
-dose[,1] <- idNames
-# check consistency of names
-table(dose[, 1] == pheno[, 1])
+idNames <- dose[, 1]
+idNames <- sub("[0-9]+->", "", idNames)
+dose[, 1] <- idNames
+cat("Dose: check consistency of names\t\t")
+stopifnot( all.equal(dose[, 1], pheno[, 1], tol) )
+cat("OK\n")
# load genetic PROB data
-prob <- read.table("../../examples/test.mlprob", head=FALSE, string=FALSE)
+prob <- read.table("../../examples/test.mlprob",
+ head=FALSE, string=FALSE)
# remove "1->" from the names of prob-IDs
-idNames <- prob[, 1]
-idNames <- sub("[0-9]+->", "", idNames)
-prob[,1] <- idNames
-# check consistency of names
-table(prob[,1] == pheno[,1])
+idNames <- prob[, 1]
+idNames <- sub("[0-9]+->", "", idNames)
+prob[, 1] <- idNames
+cat("Prob: check consistency of names\t\t")
+stopifnot( all.equal(prob[, 1], pheno[, 1], tol) )
+cat("OK\n")
# check consistency DOSE <-> PROB
doseFromProb <- matrix(NA, ncol=dim(dose)[2], nrow=dim(dose)[1])
@@ -33,79 +41,220 @@
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
doseFromProb[, i] <- prob[, indexHom] * 2 + prob[, indexHet]
- print( table( ( dose[,i] - doseFromProb[,i] ) > 1e-8 ) )
}
+cat("Check consistency dose <-> prob gtdata\t\t")
+stopifnot( all.equal(dose[, 3:ncol(dose)],
+ as.data.frame(doseFromProb)[,3:ncol(doseFromProb)],
+ tol=tol )
+ )
+cat("OK\n")
-###
-# run analysis
-###
+####
+## Run ProbABEL to get the output data we want to compare/verify
+####
+cat("Running ProbABEL...\t\t\t\t")
+tmp <- system("cd ../../examples/; sh example_cox.sh; cd -",
+ intern=TRUE)
+cat("OK\n")
-# run ProbABEL
-system("cd ../../examples/; sh example_cox.sh; cd -")
-resPaAddDose <-
- read.table("../../examples/coxph_dose_add.out.txt",
- head=TRUE)[, c("beta_SNP_add","sebeta_SNP_add")]
-resPaAddProb <-
- read.table("../../examples/coxph_prob_add.out.txt",
- head=TRUE)[, c("beta_SNP_addA1","sebeta_SNP_addA1")]
-resPaDom <-
- read.table("../../examples/coxph_prob_domin.out.txt",
- head=TRUE)[, c("beta_SNP_domA1","sebeta_SNP_domA1")]
-resPaRec <-
- read.table("../../examples/coxph_prob_recess.out.txt",
- head=TRUE)[, c("beta_SNP_recA1","sebeta_SNP_recA1")]
-resPaOdom <-
- read.table("../../examples/coxph_prob_over_domin.out.txt",
- head=TRUE)[, c("beta_SNP_odom","sebeta_SNP_odom")]
+resPaAddDose <- read.table(
+ paste0(example.path, "coxph_dose_add.out.txt"),
+ head=TRUE)[,
+ c("beta_SNP_add",
+ "sebeta_SNP_add",
+ "chi2_SNP")]
+resPaAddProb <- read.table(
+ paste0(example.path, "coxph_prob_add.out.txt"),
+ head=TRUE)[, c("beta_SNP_addA1",
+ "sebeta_SNP_addA1",
+ "chi2_SNP_A1")]
+resPaDom <- read.table(
+ paste0(example.path, "coxph_prob_domin.out.txt"),
+ head=TRUE)[,
+ c("beta_SNP_domA1",
+ "sebeta_SNP_domA1",
+ "chi2_SNP_domA1")]
+resPaRec <- read.table(
+ paste0(example.path, "coxph_prob_recess.out.txt"),
+ head=TRUE)[,
+ c("beta_SNP_recA1",
+ "sebeta_SNP_recA1",
+ "chi2_SNP_recA1")]
+resPaOdom <- read.table(
+ paste0(example.path, "coxph_prob_over_domin.out.txt"),
+ head=TRUE)[,
+ c("beta_SNP_odomA1",
+ "sebeta_SNP_odomA1",
+ "chi2_SNP_odomA1")]
+resPa2df <- read.table(
+ paste0(example.path, "coxph_prob_2df.out.txt"),
+ head=TRUE)[,
+ c("beta_SNP_A1A2",
+ "sebeta_SNP_A1A2",
+ "beta_SNP_A1A1",
+ "sebeta_SNP_A1A1",
+ "chi2_SNP_2df")]
+####
+## run analysis in R
+####
attach(pheno)
+cat("Comparing R output with ProbABEL output\t\t")
# run analysis on dose
-emptyRes <- resPaAddDose
-emptyRes[] <- NA
-resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
+## emptyRes <- resPaAddDose
+## emptyRes[] <- NA
+## resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
+
+dose.add.R <- data.frame(beta_SNP_add = numeric(),
+ sebeta_SNP_add = numeric(),
+ chi2_SNP = numeric())
+prob.add.R <- data.frame(beta_SNP_addA1 = numeric(),
+ sebeta_SNP_addA1 = numeric(),
+ chi2_SNP_A1 = numeric())
for (i in 3:dim(dose)[2]) {
- smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + dose[,i] ) )
- resRAddDose[i-2,] <- smr$coef[4, c(1,3)]
- smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + doseFromProb[,i] ) )
- resRAddProb[i-2,] <- smr$coef[4, c(1,3)]
+ noNA <- which( !is.na(dose[, i]) )
+ model.0 <- coxph(
+ Surv(fupt_chd, chd)[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA] )
+
+ model.dose <- coxph(Surv(fupt_chd, chd) ~ sex + age +
+ othercov + dose[,i] )
+ sm.dose <- summary(model.dose)$coef[4, c(1,3)]
+
+ model.prob <- coxph(Surv(fupt_chd, chd) ~ sex + age +
+ othercov + doseFromProb[,i] )
+ sm.prob <- summary(model.prob)$coef[4, c(1,3)]
+
+ lrt.dose <- 2 * ( model.dose$loglik[2] - model.0$loglik[2] )
+ lrt.prob <- 2 * ( model.prob$loglik[2] - model.0$loglik[2] )
+
+ row <- data.frame(
+ beta_SNP_add = sm.dose[1],
+ sebeta_SNP_add = sm.dose[2],
+ chi2_SNP = lrt.dose)
+ dose.add.R <- rbind(dose.add.R, row)
+
+ row <- data.frame(
+ beta_SNP_addA1 = sm.prob[1],
+ sebeta_SNP_addA1 = sm.prob[2],
+ chi2_SNP_A1 = lrt.prob)
+ prob.add.R <- rbind(prob.add.R, row)
}
-cat("additive model (dose):\n")
-resPaAddDose/resRAddDose
-cat("additive model (prob):\n")
-resPaAddProb/resRAddProb
+rownames(dose.add.R) <- NULL
+rownames(prob.add.R) <- NULL
+stopifnot( all.equal(resPaAddDose, dose.add.R, tol=tol) )
-cat("dominant model (prob):\n")
+stopifnot( all.equal(resPaAddProb, prob.add.R, tol=tol) )
+
+
+## dominant model
+prob.dom.R <- data.frame(beta_SNP_domA1 = numeric(),
+ sebeta_SNP_domA1 = numeric(),
+ chi2_SNP_domA1 = numeric())
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
regProb <- prob[,indexHom] + prob[,indexHet]
- smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb ) )
- resRDom[i-2,] <- smr$coef[4, c(1,3)]
+
+ noNA <- which( !is.na(regProb) )
+ model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+ age[noNA] + othercov[noNA])
+ model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+ othercov + regProb )
+ sm <- summary(model)$coef[4, c(1,3)]
+ lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
+
+ row <- data.frame(
+ beta_SNP_domA1 = sm[1],
+ sebeta_SNP_domA1 = sm[2],
+ chi2_SNP_domA1 = lrt)
+ prob.dom.R <- rbind(prob.dom.R, row)
}
-resPaDom/resRDom
+rownames(prob.dom.R) <- NULL
+stopifnot( all.equal(resPaDom, prob.dom.R, tol=tol) )
-cat("recessive model (prob):\n")
+## recessive model
+prob.rec.R <- data.frame(beta_SNP_recA1 = numeric(),
+ sebeta_SNP_recA1 = numeric(),
+ chi2_SNP_recA1 = numeric())
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
regProb <- prob[,indexHom]
- smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb ) )
- resRRec[i-2,] <- smr$coef[4, c(1,3)]
+
+ noNA <- which( !is.na(regProb) )
+ model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+ age[noNA] + othercov[noNA])
+ model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+ othercov + regProb )
+ sm <- summary(model)$coef[4, c(1,3)]
+ lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
+
+ row <- data.frame(
+ beta_SNP_recA1 = sm[1],
+ sebeta_SNP_recA1 = sm[2],
+ chi2_SNP_recA1 = lrt)
+ prob.rec.R <- rbind(prob.rec.R, row)
}
-resPaRec/resRRec
+rownames(prob.rec.R) <- NULL
+stopifnot( all.equal(resPaRec, prob.rec.R, tol=tol) )
-cat("over-dominant model (prob):\n")
+
+## over-dominant model
+prob.odom.R <- data.frame(beta_SNP_odomA1 = numeric(),
+ sebeta_SNP_odomA1 = numeric(),
+ chi2_SNP_odomA1 = numeric())
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
- regProb <- prob[,indexHet]
- smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb ) )
- resROdom[i-2,] <- smr$coef[4, c(1,3)]
+ regProb <- prob[, indexHet]
+
+ noNA <- which( !is.na(regProb) )
+ model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+ age[noNA] + othercov[noNA])
+ model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+ othercov + regProb )
+ sm <- summary(model)$coef[4, c(1,3)]
+ lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
+
+ row <- data.frame(
+ beta_SNP_odomA1 = sm[1],
+ sebeta_SNP_odomA1 = sm[2],
+ chi2_SNP_odomA1 = lrt)
+ prob.odom.R <- rbind(prob.odom.R, row)
}
-resPaOdom/resROdom
+rownames(prob.odom.R) <- NULL
+stopifnot( all.equal(resPaOdom, prob.odom.R, tol=tol) )
+
+## 2df model
+prob.2df.R <- data.frame(beta_SNP_A1A2 = numeric(),
+ sebeta_SNP_A1A2 = numeric(),
+ beta_SNP_A1A1 = numeric(),
+ sebeta_SNP_A1A1 = numeric(),
+ chi2_SNP_2df = numeric())
+for (i in 3:dim(dose)[2]) {
+ indexHom <- 3 + ( i - 3 ) * 2
+ indexHet <- indexHom + 1
+ regProb <- prob[, indexHet]
+
+ noNA <- which( !is.na(regProb) )
+ model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+ age[noNA] + othercov[noNA])
+ model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+ othercov + prob[, indexHet] + prob[, indexHom] )
+ smA1A2 <- summary(model)$coef[4, c(1,3)]
+ smA1A1 <- summary(model)$coef[5, c(1,3)]
+ lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
+
+ row <- data.frame(
+ beta_SNP_A1A2 = smA1A2[1],
+ sebeta_SNP_A1A2 = smA1A2[2],
+ beta_SNP_A1A1 = smA1A1[1],
+ sebeta_SNP_A1A1 = smA1A1[2],
+ chi2_SNP_2df = lrt)
+ prob.2df.R <- rbind(prob.2df.R, row)
+}
+rownames(prob.2df.R) <- NULL
+stopifnot( all.equal(resPa2df, prob.2df.R, tol=tol) )
+
+cat("OK\n")
More information about the Genabel-commits
mailing list