[Genabel-commits] r1291 - in pkg/ProbABEL: src tests/R-tests

Tue Aug 13 00:30:30 CEST 2013

Author: lckarssen
Date: 2013-08-13 00:30:29 +0200 (Tue, 13 Aug 2013)
New Revision: 1291

Modified:
   pkg/ProbABEL/src/coxph_data.cpp
   pkg/ProbABEL/src/coxph_data.h
   pkg/ProbABEL/src/eigen_mematrix.cpp
   pkg/ProbABEL/src/eigen_mematrix.h
   pkg/ProbABEL/src/main.cpp
   pkg/ProbABEL/src/regdata.cpp
   pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R
Log:
Added LRT-based	chi^2 values to	the output of pacoxph. See the commit message of r1286 for more	info.

Changes		per file:
src/main.cpp:
 - Added null estimation call for pacoxph so that LRT is estimated correctly in	case of	missing	data.
src/coxph_data.cpp, src/coxph_data.h:
 - fix in copy constructor: masked data	was set	to 0 instead of	copied.
 - added check to see if call to coxfit2 was succesful
 - added the remove_snp_from_X() function. Because X is	transposed for the call	to coxfit2() this necessitates the delete_row() \
function in eigen_mematrix.*
src/eigen_mematrix.cpp, src/eigen_mematrix.h:
 - Added delete_row() function
 - Removed some	      commented leftovers from the delete_column() function
src/regdata.cpp:
 - minor change in error message to align it with its brother in coxph_data.cpp
tests/R-tests/run_models_in_R_pacox.R:
 - Created checks for Cox regression and pacoxph output. Structure is now similar to run_models_in_R_palinear.R



Modified: pkg/ProbABEL/src/coxph_data.cpp
===================================================================

--- pkg/ProbABEL/src/coxph_data.cpp	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/coxph_data.cpp	2013-08-12 22:30:29 UTC (rev 1291)
@@ -59,7 +59,7 @@
 
     for (int i = 0; i < nids; i++)
     {
-        masked_data[i] = 0;
+        masked_data[i] = obj.masked_data[i];
     }
 }
 
@@ -168,8 +168,10 @@
     strata  = reorder(strata, order);
     offset  = reorder(offset, order);
     X       = reorder(X, order);
-    X       = transpose(X);
 
+    // The coxfit2() function expects data in column major order.
+    X = transpose(X);
+
     // X.print();
     // offset.print();
     // weights.print();
@@ -212,6 +214,30 @@
     }
 }
 
+
+void coxph_data::remove_snp_from_X()
+{
+    // update_snp() adds SNP information to the design matrix. This
+    // function allows you to strip that information from X again.
+    // This is used for example when calculating the null model.
+
+    if(ngpreds == 1)
+    {
+        X.delete_row(X.nrow -1);
+    }
+    else if(ngpreds == 2)
+    {
+        X.delete_row(X.nrow -1);
+        X.delete_row(X.nrow -1);
+    }
+    else
+    {
+        cerr << "Error: ngpreds should be 1 or 2. "
+             << "You should never come here!\n";
+    }
+}
+
+
 coxph_data::~coxph_data()
 {
     delete[] coxph_data::masked_data;
@@ -283,7 +309,7 @@
     coxph_data cdata = cdatain.get_unmasked_data();
     beta.reinit(cdata.X.nrow, 1);
     sebeta.reinit(cdata.X.nrow, 1);
-    loglik = -9.999e+32;
+    loglik = - INFINITY;
     sigma2 = -1.;
     chi2_score = -1.;
     niter = 0;
@@ -295,6 +321,7 @@
                          int nullmodel)
 {
     coxph_data cdata = cdatain.get_unmasked_data();
+
     mematrix<double> X = t_apply_model(cdata.X, model, interaction, ngpreds,
                                        iscox, nullmodel);
 
@@ -312,6 +339,7 @@
 
     mematrix<double> u(X.nrow, 1);
     mematrix<double> imat(X.nrow, X.nrow);
+
     double work[X.ncol * 2 + 2 * (X.nrow) * (X.nrow) + 3 * (X.nrow)];
     double loglik_int[2];
     int flag;
@@ -335,6 +363,11 @@
             &sctest);
 #endif
 
+    if (flag == 1000)
+    {
+        cerr << "Error: Cox regression did not converge\n";
+    }
+
     for (int i = 0; i < X.nrow; i++)
     {
         sebeta[i] = sqrt(imat.get(i, i));

Modified: pkg/ProbABEL/src/coxph_data.h
===================================================================
--- pkg/ProbABEL/src/coxph_data.h	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/coxph_data.h	2013-08-12 22:30:29 UTC (rev 1291)
@@ -35,6 +35,7 @@
     coxph_data(const coxph_data &obj);
     coxph_data(phedata &phed, gendata &gend, int snpnum);
     void update_snp(gendata &gend, int snpnum);
+    void remove_snp_from_X();
     ~coxph_data();
 
     int nids;

Modified: pkg/ProbABEL/src/eigen_mematrix.cpp
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.cpp	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/eigen_mematrix.cpp	2013-08-12 22:30:29 UTC (rev 1291)
@@ -372,10 +372,7 @@
         exit(1);
     }
 
-    // Eigen::Matrix<DT,-1,-1,0,-1,-1> *auxdata =
-    //     new Eigen::Matrix<DT,-1,-1,0,-1,-1>;
     MatrixXd auxdata = data;
-
     data.resize(data.rows(), data.cols()-1);
 
     int rightColsSize = auxdata.cols() - delcol - 1;
@@ -387,5 +384,27 @@
 }
 
 
+template<class DT>
+void mematrix<DT>::delete_row(const int delrow)
+{
+    if (delrow > nrow || delrow < 0)
+    {
+        fprintf(stderr, "mematrix::delete_row: row out of range\n");
+        exit(1);
+    }
 
+
+    MatrixXd auxdata = data;
+    data.resize(data.rows()-1, data.cols());
+
+    int bottomRowsSize = auxdata.rows() - delrow - 1;
+
+    data.topRows(delrow) = auxdata.topRows(delrow);
+    data.bottomRows(bottomRowsSize) = auxdata.bottomRows(bottomRowsSize);
+
+    nrow--;
+}
+
+
+
 #endif

Modified: pkg/ProbABEL/src/eigen_mematrix.h
===================================================================
--- pkg/ProbABEL/src/eigen_mematrix.h	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/eigen_mematrix.h	2013-08-12 22:30:29 UTC (rev 1291)
@@ -38,6 +38,7 @@
     mematrix operator*(const mematrix *M);
 
     void delete_column(const int delcol);
+    void delete_row(const int delrow);
 
     void reinit(int nr, int nc);
 

Modified: pkg/ProbABEL/src/main.cpp
===================================================================
--- pkg/ProbABEL/src/main.cpp	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/main.cpp	2013-08-12 22:30:29 UTC (rev 1291)
@@ -716,8 +716,17 @@
                                                  invvarmatrix,
                                                  input_var.getRobust(), 1);
 
+#elif COXPH
+                            coxph_data new_rgd = rgd;
+                            new_rgd.remove_snp_from_X();
+                            coxph_reg new_null_rd(new_rgd);
+                            new_null_rd.estimate(new_rgd, 0, MAXITER, EPS,
+                                                 CHOLTOL, model,
+                                                 input_var.getInteraction(),
+                                                 input_var.getNgpreds(),
+                                                 true, 1);
+#endif
                             *chi2[model] << 2. * (loglik - new_null_rd.loglik);
-#endif
                         }
                         else
                         {

Modified: pkg/ProbABEL/src/regdata.cpp
===================================================================
--- pkg/ProbABEL/src/regdata.cpp	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/src/regdata.cpp	2013-08-12 22:30:29 UTC (rev 1291)
@@ -136,7 +136,8 @@
     }
     else
     {
-        cerr << "ngpreds should be 1 or 2. you should never come here!\n";
+        cerr << "Error: ngpreds should be 1 or 2. "
+             << "You should never come here!\n";
     }
 }
 

Modified: pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R
===================================================================
--- pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R	2013-08-12 16:11:20 UTC (rev 1290)
+++ pkg/ProbABEL/tests/R-tests/run_models_in_R_pacox.R	2013-08-12 22:30:29 UTC (rev 1291)
@@ -1,31 +1,39 @@
+cat("Checking Cox PH regression...\n")
 library(survival)
 
+## Set tolerance for comparing various outputs
+tol <- 1e-5
+
 ###
 # load the data
 ###
+example.path <- "../../examples/"
 
 # load phenotypic data
 pheno <- read.table("../../examples/coxph_data.txt", head=TRUE,
                     string=FALSE)
 
 # load genetic DOSE data
-dose <- read.table("../../examples/test.mldose", head=FALSE,
-                   string=FALSE)
+dose <- read.table("../../examples/test.mldose",
+                   head=FALSE, string=FALSE)
 # remove "1->" from the names of dose-IDs
-idNames <- dose[, 1]
-idNames <- sub("[0-9]+->", "", idNames)
-dose[,1] <- idNames
-# check consistency of names
-table(dose[, 1] == pheno[, 1])
+idNames   <- dose[, 1]
+idNames   <- sub("[0-9]+->", "", idNames)
+dose[, 1] <- idNames
+cat("Dose: check consistency of names\t\t")
+stopifnot( all.equal(dose[, 1], pheno[, 1], tol) )
+cat("OK\n")
 
 # load genetic PROB data
-prob <- read.table("../../examples/test.mlprob", head=FALSE, string=FALSE)
+prob <- read.table("../../examples/test.mlprob",
+                   head=FALSE, string=FALSE)
 # remove "1->" from the names of prob-IDs
-idNames <- prob[, 1]
-idNames <- sub("[0-9]+->", "", idNames)
-prob[,1] <- idNames
-# check consistency of names
-table(prob[,1] == pheno[,1])
+idNames   <- prob[, 1]
+idNames   <- sub("[0-9]+->", "", idNames)
+prob[, 1] <- idNames
+cat("Prob: check consistency of names\t\t")
+stopifnot( all.equal(prob[, 1], pheno[, 1], tol) )
+cat("OK\n")
 
 # check consistency DOSE <-> PROB
 doseFromProb <- matrix(NA, ncol=dim(dose)[2], nrow=dim(dose)[1])
@@ -33,79 +41,220 @@
         indexHom <- 3 + ( i - 3 ) * 2
         indexHet <- indexHom + 1
         doseFromProb[, i] <- prob[, indexHom] * 2 + prob[, indexHet]
-        print( table( ( dose[,i] - doseFromProb[,i] ) > 1e-8 ) )
 }
+cat("Check consistency dose <-> prob gtdata\t\t")
+stopifnot( all.equal(dose[, 3:ncol(dose)],
+                     as.data.frame(doseFromProb)[,3:ncol(doseFromProb)],
+                     tol=tol )
+          )
+cat("OK\n")
 
-###
-# run analysis
-###
+####
+## Run ProbABEL to get the output data we want to compare/verify
+####
+cat("Running ProbABEL...\t\t\t\t")
+tmp <- system("cd ../../examples/; sh example_cox.sh; cd -",
+              intern=TRUE)
+cat("OK\n")
 
-# run ProbABEL
-system("cd ../../examples/; sh example_cox.sh; cd -")
-resPaAddDose <-
-  read.table("../../examples/coxph_dose_add.out.txt",
-             head=TRUE)[, c("beta_SNP_add","sebeta_SNP_add")]
-resPaAddProb <-
-  read.table("../../examples/coxph_prob_add.out.txt",
-             head=TRUE)[, c("beta_SNP_addA1","sebeta_SNP_addA1")]
-resPaDom <-
-  read.table("../../examples/coxph_prob_domin.out.txt",
-             head=TRUE)[, c("beta_SNP_domA1","sebeta_SNP_domA1")]
-resPaRec <-
-  read.table("../../examples/coxph_prob_recess.out.txt",
-             head=TRUE)[, c("beta_SNP_recA1","sebeta_SNP_recA1")]
-resPaOdom <-
-  read.table("../../examples/coxph_prob_over_domin.out.txt",
-             head=TRUE)[, c("beta_SNP_odom","sebeta_SNP_odom")]
+resPaAddDose <- read.table(
+    paste0(example.path, "coxph_dose_add.out.txt"),
+    head=TRUE)[,
+        c("beta_SNP_add",
+          "sebeta_SNP_add",
+          "chi2_SNP")]
+resPaAddProb <- read.table(
+    paste0(example.path, "coxph_prob_add.out.txt"),
+    head=TRUE)[, c("beta_SNP_addA1",
+        "sebeta_SNP_addA1",
+        "chi2_SNP_A1")]
+resPaDom <- read.table(
+    paste0(example.path, "coxph_prob_domin.out.txt"),
+    head=TRUE)[,
+        c("beta_SNP_domA1",
+          "sebeta_SNP_domA1",
+          "chi2_SNP_domA1")]
+resPaRec <- read.table(
+    paste0(example.path, "coxph_prob_recess.out.txt"),
+    head=TRUE)[,
+        c("beta_SNP_recA1",
+          "sebeta_SNP_recA1",
+          "chi2_SNP_recA1")]
+resPaOdom <- read.table(
+    paste0(example.path, "coxph_prob_over_domin.out.txt"),
+    head=TRUE)[,
+        c("beta_SNP_odomA1",
+          "sebeta_SNP_odomA1",
+          "chi2_SNP_odomA1")]
+resPa2df <- read.table(
+    paste0(example.path, "coxph_prob_2df.out.txt"),
+    head=TRUE)[,
+        c("beta_SNP_A1A2",
+          "sebeta_SNP_A1A2",
+          "beta_SNP_A1A1",
+          "sebeta_SNP_A1A1",
+          "chi2_SNP_2df")]
 
+####
+## run analysis in R
+####
 attach(pheno)
 
+cat("Comparing R output with ProbABEL output\t\t")
 # run analysis on dose
-emptyRes <- resPaAddDose
-emptyRes[] <- NA
-resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
+## emptyRes <- resPaAddDose
+## emptyRes[] <- NA
+## resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
+
+dose.add.R <- data.frame(beta_SNP_add   = numeric(),
+                         sebeta_SNP_add = numeric(),
+                         chi2_SNP       = numeric())
+prob.add.R <- data.frame(beta_SNP_addA1   = numeric(),
+                         sebeta_SNP_addA1 = numeric(),
+                         chi2_SNP_A1      = numeric())
 for (i in 3:dim(dose)[2]) {
-        smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
-                              othercov + dose[,i] ) )
-        resRAddDose[i-2,] <-  smr$coef[4, c(1,3)]
-        smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
-                              othercov + doseFromProb[,i] ) )
-        resRAddProb[i-2,] <- smr$coef[4, c(1,3)]
+        noNA <- which( !is.na(dose[, i]) )
+        model.0 <- coxph(
+            Surv(fupt_chd, chd)[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA] )
+
+        model.dose <- coxph(Surv(fupt_chd, chd) ~ sex + age +
+                            othercov + dose[,i] )
+        sm.dose <- summary(model.dose)$coef[4, c(1,3)]
+
+        model.prob <- coxph(Surv(fupt_chd, chd) ~ sex + age +
+                            othercov + doseFromProb[,i] )
+        sm.prob <- summary(model.prob)$coef[4, c(1,3)]
+
+        lrt.dose <- 2 * ( model.dose$loglik[2] - model.0$loglik[2] )
+        lrt.prob <- 2 * ( model.prob$loglik[2] - model.0$loglik[2] )
+
+        row <- data.frame(
+            beta_SNP_add = sm.dose[1],
+            sebeta_SNP_add = sm.dose[2],
+            chi2_SNP = lrt.dose)
+        dose.add.R <- rbind(dose.add.R, row)
+
+        row <- data.frame(
+            beta_SNP_addA1 = sm.prob[1],
+            sebeta_SNP_addA1 = sm.prob[2],
+            chi2_SNP_A1 = lrt.prob)
+        prob.add.R <- rbind(prob.add.R, row)
 }
-cat("additive model (dose):\n")
-resPaAddDose/resRAddDose
-cat("additive model (prob):\n")
-resPaAddProb/resRAddProb
+rownames(dose.add.R) <- NULL
+rownames(prob.add.R) <- NULL
+stopifnot( all.equal(resPaAddDose, dose.add.R, tol=tol) )
 
-cat("dominant model (prob):\n")
+stopifnot( all.equal(resPaAddProb, prob.add.R, tol=tol) )
+
+
+## dominant model
+prob.dom.R <- data.frame(beta_SNP_domA1   = numeric(),
+                         sebeta_SNP_domA1 = numeric(),
+                         chi2_SNP_domA1   = numeric())
 for (i in 3:dim(dose)[2]) {
         indexHom <- 3 + ( i - 3 ) * 2
         indexHet <- indexHom + 1
         regProb <- prob[,indexHom] + prob[,indexHet]
-        smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
-                              othercov + regProb ) )
-        resRDom[i-2,] <- smr$coef[4, c(1,3)]
+
+        noNA    <- which( !is.na(regProb) )
+        model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+                         age[noNA] + othercov[noNA])
+        model   <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+                         othercov + regProb )
+        sm      <- summary(model)$coef[4, c(1,3)]
+        lrt     <- 2 * ( model$loglik[2] - model.0$loglik[2] )
+
+        row <- data.frame(
+            beta_SNP_domA1   = sm[1],
+            sebeta_SNP_domA1 = sm[2],
+            chi2_SNP_domA1   = lrt)
+        prob.dom.R <- rbind(prob.dom.R, row)
 }
-resPaDom/resRDom
+rownames(prob.dom.R) <- NULL
+stopifnot( all.equal(resPaDom, prob.dom.R, tol=tol) )
 
-cat("recessive model (prob):\n")
+## recessive model
+prob.rec.R <- data.frame(beta_SNP_recA1   = numeric(),
+                         sebeta_SNP_recA1 = numeric(),
+                         chi2_SNP_recA1   = numeric())
 for (i in 3:dim(dose)[2]) {
         indexHom <- 3 + ( i - 3 ) * 2
         indexHet <- indexHom + 1
         regProb <- prob[,indexHom]
-        smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
-                              othercov + regProb ) )
-        resRRec[i-2,] <- smr$coef[4, c(1,3)]
+
+        noNA    <- which( !is.na(regProb) )
+        model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+                         age[noNA] + othercov[noNA])
+        model   <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+                         othercov + regProb )
+        sm      <- summary(model)$coef[4, c(1,3)]
+        lrt     <- 2 * (  model$loglik[2] - model.0$loglik[2] )
+
+        row <- data.frame(
+            beta_SNP_recA1   = sm[1],
+            sebeta_SNP_recA1 = sm[2],
+            chi2_SNP_recA1   = lrt)
+        prob.rec.R <- rbind(prob.rec.R, row)
 }
-resPaRec/resRRec
+rownames(prob.rec.R) <- NULL
+stopifnot( all.equal(resPaRec, prob.rec.R, tol=tol) )
 
-cat("over-dominant model (prob):\n")
+
+## over-dominant model
+prob.odom.R <- data.frame(beta_SNP_odomA1   = numeric(),
+                         sebeta_SNP_odomA1 = numeric(),
+                         chi2_SNP_odomA1   = numeric())
 for (i in 3:dim(dose)[2]) {
         indexHom <- 3 + ( i - 3 ) * 2
         indexHet <- indexHom + 1
-        regProb <- prob[,indexHet]
-        smr <- summary( coxph(Surv(fupt_chd, chd) ~ sex + age +
-                              othercov + regProb ) )
-        resROdom[i-2,] <- smr$coef[4, c(1,3)]
+        regProb <- prob[, indexHet]
+
+        noNA    <- which( !is.na(regProb) )
+        model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+                         age[noNA] + othercov[noNA])
+        model   <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+                         othercov + regProb )
+        sm      <- summary(model)$coef[4, c(1,3)]
+        lrt     <- 2 * (  model$loglik[2] - model.0$loglik[2] )
+
+        row <- data.frame(
+            beta_SNP_odomA1   = sm[1],
+            sebeta_SNP_odomA1 = sm[2],
+            chi2_SNP_odomA1   = lrt)
+        prob.odom.R <- rbind(prob.odom.R, row)
 }
-resPaOdom/resROdom
+rownames(prob.odom.R) <- NULL
+stopifnot( all.equal(resPaOdom, prob.odom.R, tol=tol) )
+
+## 2df model
+prob.2df.R <- data.frame(beta_SNP_A1A2   = numeric(),
+                          sebeta_SNP_A1A2 = numeric(),
+                          beta_SNP_A1A1   = numeric(),
+                          sebeta_SNP_A1A1 = numeric(),
+                         chi2_SNP_2df     = numeric())
+for (i in 3:dim(dose)[2]) {
+        indexHom <- 3 + ( i - 3 ) * 2
+        indexHet <- indexHom + 1
+        regProb <- prob[, indexHet]
+
+        noNA    <- which( !is.na(regProb) )
+        model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
+                         age[noNA] + othercov[noNA])
+        model   <- coxph( Surv(fupt_chd, chd) ~ sex + age +
+                         othercov + prob[, indexHet] + prob[, indexHom] )
+        smA1A2  <- summary(model)$coef[4, c(1,3)]
+        smA1A1  <- summary(model)$coef[5, c(1,3)]
+        lrt     <- 2 * (  model$loglik[2] - model.0$loglik[2] )
+
+        row <- data.frame(
+            beta_SNP_A1A2   = smA1A2[1],
+            sebeta_SNP_A1A2 = smA1A2[2],
+            beta_SNP_A1A1   = smA1A1[1],
+            sebeta_SNP_A1A1 = smA1A1[2],
+            chi2_SNP_2df   = lrt)
+        prob.2df.R <- rbind(prob.2df.R, row)
+}
+rownames(prob.2df.R) <- NULL
+stopifnot( all.equal(resPa2df, prob.2df.R, tol=tol) )
+
+cat("OK\n")