[Genabel-commits] r1322 - in pkg/ProbABEL: . checks/R-tests checks/inputfiles checks/verified_results doc src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Aug 29 23:25:08 CEST 2013
Author: lckarssen
Date: 2013-08-29 23:25:08 +0200 (Thu, 29 Aug 2013)
New Revision: 1322
Modified:
pkg/ProbABEL/checks/R-tests/initial_checks.R
pkg/ProbABEL/checks/R-tests/run_models_in_R_pacox.R
pkg/ProbABEL/checks/R-tests/run_models_in_R_palinear.R
pkg/ProbABEL/checks/R-tests/run_models_in_R_palogist.R
pkg/ProbABEL/checks/inputfiles/mmscore_gen.mlinfo
pkg/ProbABEL/checks/inputfiles/test.mlinfo
pkg/ProbABEL/checks/verified_results/height_add.out.txt
pkg/ProbABEL/checks/verified_results/height_base_add.out.txt
pkg/ProbABEL/checks/verified_results/height_ngp2_2df.out.txt
pkg/ProbABEL/checks/verified_results/height_ngp2_add.out.txt
pkg/ProbABEL/checks/verified_results/height_ngp2_domin.out.txt
pkg/ProbABEL/checks/verified_results/height_ngp2_over_domin.out.txt
pkg/ProbABEL/checks/verified_results/height_ngp2_recess.out.txt
pkg/ProbABEL/configure.ac
pkg/ProbABEL/doc/ChangeLog
pkg/ProbABEL/doc/packaging.txt
pkg/ProbABEL/src/main.cpp
Log:
Fix of ProbABEL bug #4854: When using mmscore, there is one (nan) column missing in the output for low-frequency SNPs. The .mlinfo test files are modified to include a SNP with very low Rsq, this triggers also the low-MAF section of the code.
Also includes a simplification of the R-based test scripts.
doc/packaging.txt:
small update of the text mentioning autoreconf -i
src/main.cpp:
the fix of the missing nan when running mmscore and having low-MAF
checks/inputfiles/mmscore_gen.mlinfo,
checks/inputfiles/test.mlinfo:
Changed the Rsq of one of the SNPs to trigger the 'nan' path of the code
checks/verified_results/height_ngp2_domin.out.txt
checks/verified_results/height_ngp2_add.out.txt
checks/verified_results/height_ngp2_2df.out.txt
checks/verified_results/height_base_add.out.txt
checks/verified_results/height_ngp2_over_domin.out.txt
checks/verified_results/height_add.out.txt
checks/verified_results/height_ngp2_recess.out.txt
Update the verified results accordingly
checks/R-tests/run_models_in_R_palogist.R
checks/R-tests/run_models_in_R_pacox.R
checks/R-tests/initial_checks.R
checks/R-tests/run_models_in_R_palinear.R
While we are at it: simplify the R-test scripts to make maintenance easier. Only the 2df model still needs to be done/"harmonised".
configure.ac:
Updated version number
Modified: pkg/ProbABEL/checks/R-tests/initial_checks.R
===================================================================
--- pkg/ProbABEL/checks/R-tests/initial_checks.R 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/R-tests/initial_checks.R 2013-08-29 21:25:08 UTC (rev 1322)
@@ -5,6 +5,11 @@
## Set tolerance for comparing various outputs
tol <- 1e-5
+## R^2 threshold from the mlinfo file. If R^2 smaller than this
+## threshold, beta, se_beta and chi^2 are set to NaN.
+## Check this value against the one in the ProbABEL code (main.cpp),
+## look for the variables freq and poly.
+rsq.thresh <- 1e-16
####
## load the data
@@ -29,7 +34,7 @@
## load genetic PROB data
prob <- read.table(paste0(inputfiles.path, "test.mlprob"),
- head=FALSE, string=FALSE)
+ header=FALSE, stringsAsFactors=FALSE)
## remove "1->" from the names of prob-IDs
idNames <- prob[, 1]
idNames <- sub("[0-9]+->", "", idNames)
@@ -51,3 +56,38 @@
tol=tol )
)
cat("OK\n")
+
+
+## Read the imputed Rsq from the info file
+Rsq <- read.table(paste0(inputfiles.path, "test.mlinfo"),
+ header=TRUE,
+ stringsAsFactors=FALSE)[, c("Rsq")]
+
+
+## Define column names of the various ProbABEL output file headers
+colsAddDose <- c("Rsq",
+ "beta_SNP_add",
+ "sebeta_SNP_add",
+ "chi2_SNP")
+colsAddProb <- c("Rsq",
+ "beta_SNP_addA1",
+ "sebeta_SNP_addA1",
+ "chi2_SNP_A1")
+colsDom <- c("Rsq",
+ "beta_SNP_domA1",
+ "sebeta_SNP_domA1",
+ "chi2_SNP_domA1")
+colsRec <- c("Rsq",
+ "beta_SNP_recA1",
+ "sebeta_SNP_recA1",
+ "chi2_SNP_recA1")
+colsOdom <-c("Rsq",
+ "beta_SNP_odomA1",
+ "sebeta_SNP_odomA1",
+ "chi2_SNP_odomA1")
+cols2df <- c("Rsq",
+ "beta_SNP_A1A2",
+ "sebeta_SNP_A1A2",
+ "beta_SNP_A1A1",
+ "sebeta_SNP_A1A1",
+ "chi2_SNP_2df")
Modified: pkg/ProbABEL/checks/R-tests/run_models_in_R_pacox.R
===================================================================
--- pkg/ProbABEL/checks/R-tests/run_models_in_R_pacox.R 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/R-tests/run_models_in_R_pacox.R 2013-08-29 21:25:08 UTC (rev 1322)
@@ -22,41 +22,22 @@
resPaAddDose <- read.table(
paste0(tests.path, "coxph_dose_add.out.txt"),
- head=TRUE)[,
- c("beta_SNP_add",
- "sebeta_SNP_add",
- "chi2_SNP")]
+ head=TRUE)[, colsAddDose]
resPaAddProb <- read.table(
paste0(tests.path, "coxph_prob_add.out.txt"),
- head=TRUE)[, c("beta_SNP_addA1",
- "sebeta_SNP_addA1",
- "chi2_SNP_A1")]
+ head=TRUE)[, colsAddProb]
resPaDom <- read.table(
paste0(tests.path, "coxph_prob_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_domA1",
- "sebeta_SNP_domA1",
- "chi2_SNP_domA1")]
+ head=TRUE)[, colsDom]
resPaRec <- read.table(
paste0(tests.path, "coxph_prob_recess.out.txt"),
- head=TRUE)[,
- c("beta_SNP_recA1",
- "sebeta_SNP_recA1",
- "chi2_SNP_recA1")]
+ head=TRUE)[, colsRec]
resPaOdom <- read.table(
paste0(tests.path, "coxph_prob_over_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_odomA1",
- "sebeta_SNP_odomA1",
- "chi2_SNP_odomA1")]
+ head=TRUE)[, colsOdom]
resPa2df <- read.table(
paste0(tests.path, "coxph_prob_2df.out.txt"),
- head=TRUE)[,
- c("beta_SNP_A1A2",
- "sebeta_SNP_A1A2",
- "beta_SNP_A1A1",
- "sebeta_SNP_A1A1",
- "chi2_SNP_2df")]
+ head=TRUE)[, cols2df]
####
## run analysis in R
@@ -64,137 +45,79 @@
attach(pheno)
cat("Comparing R output with ProbABEL output\t\t")
-# run analysis on dose
-## emptyRes <- resPaAddDose
-## emptyRes[] <- NA
-## resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
-dose.add.R <- data.frame(beta_SNP_add = numeric(),
- sebeta_SNP_add = numeric(),
- chi2_SNP = numeric())
-prob.add.R <- data.frame(beta_SNP_addA1 = numeric(),
- sebeta_SNP_addA1 = numeric(),
- chi2_SNP_A1 = numeric())
-for (i in 3:dim(dose)[2]) {
- noNA <- which( !is.na(dose[, i]) )
- model.0 <- coxph(
- Surv(fupt_chd, chd)[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA] )
+run.model <- function(model0.txt, model.txt, snpdata) {
+ resultR <- data.frame()
+ for (i in 3:dim(dose)[2]) {
+ indexHom <- 3 + ( i - 3 ) * 2
+ indexHet <- indexHom + 1
+ snp <- eval(parse(text=snpdata))
- model.dose <- coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + dose[,i] )
- sm.dose <- summary(model.dose)$coef[4, c(1,3)]
+ noNA <- which( !is.na(snp) )
+ model.0 <- eval(parse(text=model0.txt))
+ model <- eval(parse(text=model.txt))
+ sm <- summary(model)$coef[4, c(1,3)]
+ lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
- model.prob <- coxph(Surv(fupt_chd, chd) ~ sex + age +
- othercov + doseFromProb[,i] )
- sm.prob <- summary(model.prob)$coef[4, c(1,3)]
+ rsq <- Rsq[i-2]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, sm[1], sm[2], lrt)
+ }
+ resultR <- rbind(resultR, row)
+ }
+ return(resultR)
+}
- lrt.dose <- 2 * ( model.dose$loglik[2] - model.0$loglik[2] )
- lrt.prob <- 2 * ( model.prob$loglik[2] - model.0$loglik[2] )
- row <- data.frame(
- beta_SNP_add = sm.dose[1],
- sebeta_SNP_add = sm.dose[2],
- chi2_SNP = lrt.dose)
- dose.add.R <- rbind(dose.add.R, row)
+model.fn.0 <-
+ "coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA] )"
+model.fn <- "coxph( Surv(fupt_chd, chd) ~ sex + age + othercov + snp )"
- row <- data.frame(
- beta_SNP_addA1 = sm.prob[1],
- sebeta_SNP_addA1 = sm.prob[2],
- chi2_SNP_A1 = lrt.prob)
- prob.add.R <- rbind(prob.add.R, row)
-}
+## Additive model, dosages
+snpdose <- "dose[, i]"
+dose.add.R <- run.model(model.fn.0, model.fn, snpdose)
+colnames(dose.add.R) <- colsAddDose
rownames(dose.add.R) <- NULL
-rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddDose, dose.add.R, tol=tol) )
+cat("additive ")
+## Additive model, probabilities
+snpprob <- "doseFromProb[, i]"
+prob.add.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.add.R) <- colsAddProb
+rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddProb, prob.add.R, tol=tol) )
+cat("additive ")
-
## dominant model
-prob.dom.R <- data.frame(beta_SNP_domA1 = numeric(),
- sebeta_SNP_domA1 = numeric(),
- chi2_SNP_domA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[,indexHom] + prob[,indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
- age[noNA] + othercov[noNA])
- model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb )
- sm <- summary(model)$coef[4, c(1,3)]
- lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
-
- row <- data.frame(
- beta_SNP_domA1 = sm[1],
- sebeta_SNP_domA1 = sm[2],
- chi2_SNP_domA1 = lrt)
- prob.dom.R <- rbind(prob.dom.R, row)
-}
+snpprob <- "prob[, indexHom] + prob[, indexHet]"
+prob.dom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.dom.R) <- colsDom
rownames(prob.dom.R) <- NULL
stopifnot( all.equal(resPaDom, prob.dom.R, tol=tol) )
+cat("dominant ")
## recessive model
-prob.rec.R <- data.frame(beta_SNP_recA1 = numeric(),
- sebeta_SNP_recA1 = numeric(),
- chi2_SNP_recA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[,indexHom]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
- age[noNA] + othercov[noNA])
- model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb )
- sm <- summary(model)$coef[4, c(1,3)]
- lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
-
- row <- data.frame(
- beta_SNP_recA1 = sm[1],
- sebeta_SNP_recA1 = sm[2],
- chi2_SNP_recA1 = lrt)
- prob.rec.R <- rbind(prob.rec.R, row)
-}
+snpprob <- "prob[, indexHom]"
+prob.rec.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.rec.R) <- colsRec
rownames(prob.rec.R) <- NULL
stopifnot( all.equal(resPaRec, prob.rec.R, tol=tol) )
+cat("recessive ")
-
## over-dominant model
-prob.odom.R <- data.frame(beta_SNP_odomA1 = numeric(),
- sebeta_SNP_odomA1 = numeric(),
- chi2_SNP_odomA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- coxph( Surv(fupt_chd, chd)[noNA] ~ sex[noNA] +
- age[noNA] + othercov[noNA])
- model <- coxph( Surv(fupt_chd, chd) ~ sex + age +
- othercov + regProb )
- sm <- summary(model)$coef[4, c(1,3)]
- lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
-
- row <- data.frame(
- beta_SNP_odomA1 = sm[1],
- sebeta_SNP_odomA1 = sm[2],
- chi2_SNP_odomA1 = lrt)
- prob.odom.R <- rbind(prob.odom.R, row)
-}
+snpprob <- "prob[, indexHet]"
+prob.odom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.odom.R) <- colsOdom
rownames(prob.odom.R) <- NULL
stopifnot( all.equal(resPaOdom, prob.odom.R, tol=tol) )
+cat("overdominant ")
+
## 2df model
-prob.2df.R <- data.frame(beta_SNP_A1A2 = numeric(),
- sebeta_SNP_A1A2 = numeric(),
- beta_SNP_A1A1 = numeric(),
- sebeta_SNP_A1A1 = numeric(),
- chi2_SNP_2df = numeric())
+prob.2df.R <- data.frame()
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
@@ -209,15 +132,18 @@
smA1A1 <- summary(model)$coef[5, c(1,3)]
lrt <- 2 * ( model$loglik[2] - model.0$loglik[2] )
- row <- data.frame(
- beta_SNP_A1A2 = smA1A2[1],
- sebeta_SNP_A1A2 = smA1A2[2],
- beta_SNP_A1A1 = smA1A1[1],
- sebeta_SNP_A1A1 = smA1A1[2],
- chi2_SNP_2df = lrt)
+ rsq <- resPa2df[i-2, "Rsq"]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, smA1A2[1], smA1A2[2], smA1A1[1], smA1A1[2], lrt)
+
+ }
prob.2df.R <- rbind(prob.2df.R, row)
}
+colnames(prob.2df.R) <- cols2df
rownames(prob.2df.R) <- NULL
stopifnot( all.equal(resPa2df, prob.2df.R, tol=tol) )
+cat("2df\n")
-cat("OK\n")
+cat("\t\t\t\t\t\tOK\n")
Modified: pkg/ProbABEL/checks/R-tests/run_models_in_R_palinear.R
===================================================================
--- pkg/ProbABEL/checks/R-tests/run_models_in_R_palinear.R 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/R-tests/run_models_in_R_palinear.R 2013-08-29 21:25:08 UTC (rev 1322)
@@ -21,41 +21,22 @@
resPaAddDose <- read.table(
paste0(tests.path, "height_base_add.out.txt"),
- head=TRUE)[,
- c("beta_SNP_add",
- "sebeta_SNP_add",
- "chi2_SNP")]
+ head=TRUE)[, colsAddDose]
resPaAddProb <- read.table(
paste0(tests.path, "height_ngp2_add.out.txt"),
- head=TRUE)[, c("beta_SNP_addA1",
- "sebeta_SNP_addA1",
- "chi2_SNP_A1")]
+ head=TRUE)[, colsAddProb]
resPaDom <- read.table(
paste0(tests.path, "height_ngp2_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_domA1",
- "sebeta_SNP_domA1",
- "chi2_SNP_domA1")]
+ head=TRUE)[, colsDom]
resPaRec <- read.table(
paste0(tests.path, "height_ngp2_recess.out.txt"),
- head=TRUE)[,
- c("beta_SNP_recA1",
- "sebeta_SNP_recA1",
- "chi2_SNP_recA1")]
+ head=TRUE)[, colsRec]
resPaOdom <- read.table(
paste0(tests.path, "height_ngp2_over_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_odomA1",
- "sebeta_SNP_odomA1",
- "chi2_SNP_odomA1")]
+ head=TRUE)[, colsOdom]
resPa2df <- read.table(
paste0(tests.path, "height_ngp2_2df.out.txt"),
- head=TRUE)[,
- c("beta_SNP_A1A2",
- "sebeta_SNP_A1A2",
- "beta_SNP_A1A1",
- "sebeta_SNP_A1A1",
- "chi2_SNP_2df")]
+ head=TRUE)[, cols2df]
####
## run analysis in R
@@ -63,126 +44,76 @@
attach(pheno)
cat("Comparing R output with ProbABEL output\t\t")
-## run analysis on dose
-dose.add.R <- data.frame(beta_SNP_add = numeric(),
- sebeta_SNP_add = numeric(),
- chi2_SNP = numeric())
-prob.add.R <- data.frame(beta_SNP_addA1 = numeric(),
- sebeta_SNP_addA1 = numeric(),
- chi2_SNP_A1 = numeric())
-for (i in 3:dim(dose)[2]) {
- noNA <- which( !is.na(dose[, i]) )
- model.0 <- lm( height[noNA] ~ sex[noNA] + age[noNA] )
- model.dose <- lm( height ~ sex + age + dose[, i] )
- sm.dose <- summary(model.dose)$coef[4, 1:2]
+run.model <- function(model0.txt, model.txt, snpdata) {
+ resultR <- data.frame()
+ for (i in 3:dim(dose)[2]) {
+ indexHom <- 3 + ( i - 3 ) * 2
+ indexHet <- indexHom + 1
+ snp <- eval(parse(text=snpdata))
- model.prob <- lm( height ~ sex + age + doseFromProb[, i] )
- sm.prob <- summary(model.prob)$coef[4, 1:2]
+ noNA <- which( !is.na(snp) )
+ model.0 <- eval(parse(text=model0.txt))
+ model <- eval(parse(text=model.txt))
+ sm <- summary(model)$coef[4, 1:2]
+ lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
- lrt.dose <- 2 * ( logLik( model.dose ) - logLik( model.0 ) )
- lrt.prob <- 2 * ( logLik( model.prob ) - logLik( model.0 ) )
+ rsq <- Rsq[i-2]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, sm[1], sm[2], lrt)
+ }
+ resultR <- rbind(resultR, row)
+ }
+ return(resultR)
+}
- row <- data.frame(
- beta_SNP_add = sm.dose[1],
- sebeta_SNP_add = sm.dose[2],
- chi2_SNP = lrt.dose)
- dose.add.R <- rbind(dose.add.R, row)
+model.fn.0 <- "lm( height[noNA] ~ sex[noNA] + age[noNA] )"
+model.fn <- "lm( height ~ sex + age + snp )"
- row <- data.frame(
- beta_SNP_addA1 = sm.prob[1],
- sebeta_SNP_addA1 = sm.prob[2],
- chi2_SNP_A1 = lrt.prob)
- prob.add.R <- rbind(prob.add.R, row)
-}
+## Additive model, dosages
+snpdose <- "dose[, i]"
+dose.add.R <- run.model(model.fn.0, model.fn, snpdose)
+colnames(dose.add.R) <- colsAddDose
rownames(dose.add.R) <- NULL
-rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddDose, dose.add.R, tol=tol) )
+cat("additive ")
+## Additive model, probabilities
+snpprob <- "doseFromProb[, i]"
+prob.add.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.add.R) <- colsAddProb
+rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddProb, prob.add.R, tol=tol) )
+cat("additive ")
-
## dominant model
-prob.dom.R <- data.frame(beta_SNP_domA1 = numeric(),
- sebeta_SNP_domA1 = numeric(),
- chi2_SNP_domA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHom] + prob[, indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- lm( height[noNA] ~ sex[noNA] + age[noNA] )
- model <- lm( height ~ sex + age + regProb )
- sm <- summary(model)$coef[4, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_domA1 = sm[1],
- sebeta_SNP_domA1 = sm[2],
- chi2_SNP_domA1 = lrt)
- prob.dom.R <- rbind(prob.dom.R, row)
-}
+snpprob <- "prob[, indexHom] + prob[, indexHet]"
+prob.dom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.dom.R) <- colsDom
rownames(prob.dom.R) <- NULL
stopifnot( all.equal(resPaDom, prob.dom.R, tol=tol) )
+cat("dominant ")
-
## recessive model
-prob.rec.R <- data.frame(beta_SNP_recA1 = numeric(),
- sebeta_SNP_recA1 = numeric(),
- chi2_SNP_recA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHom]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- lm( height[noNA] ~ sex[noNA] + age[noNA] )
- model <- lm( height ~ sex + age + regProb )
- sm <- summary(model)$coef[4, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_recA1 = sm[1],
- sebeta_SNP_recA1 = sm[2],
- chi2_SNP_recA1 = lrt)
- prob.rec.R <- rbind(prob.rec.R, row)
-}
+snpprob <- "prob[, indexHom]"
+prob.rec.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.rec.R) <- colsRec
rownames(prob.rec.R) <- NULL
stopifnot( all.equal(resPaRec, prob.rec.R, tol=tol) )
+cat("recessive ")
-
## over-dominant model
-prob.odom.R <- data.frame(beta_SNP_odomA1 = numeric(),
- sebeta_SNP_odomA1 = numeric(),
- chi2_SNP_odomA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- lm( height[noNA] ~ sex[noNA] + age[noNA] )
- model <- lm( height ~ sex + age + regProb )
- sm <- summary(model)$coef[4, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_odomA1 = sm[1],
- sebeta_SNP_odomA1 = sm[2],
- chi2_SNP_odomA1 = lrt)
- prob.odom.R <- rbind(prob.odom.R, row)
-}
+snpprob <- "prob[, indexHet]"
+prob.odom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.odom.R) <- colsOdom
rownames(prob.odom.R) <- NULL
stopifnot( all.equal(resPaOdom, prob.odom.R, tol=tol) )
+cat("overdominant ")
-
## 2df model
-prob.2df.R <- data.frame(beta_SNP_A1A2 = numeric(),
- sebeta_SNP_A1A2 = numeric(),
- beta_SNP_A1A1 = numeric(),
- sebeta_SNP_A1A1 = numeric(),
- chi2_SNP_2df = numeric())
+prob.2df.R <- data.frame()
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
@@ -195,15 +126,18 @@
smA1A1 <- summary(model)$coef[5, 1:2]
lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
- row <- data.frame(
- beta_SNP_A1A2 = smA1A2[1],
- sebeta_SNP_A1A2 = smA1A2[2],
- beta_SNP_A1A1 = smA1A1[1],
- sebeta_SNP_A1A1 = smA1A1[2],
- chi2_SNP_2df = lrt)
+ rsq <- resPa2df[i-2, "Rsq"]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, smA1A2[1], smA1A2[2], smA1A1[1], smA1A1[2], lrt)
+
+ }
prob.2df.R <- rbind(prob.2df.R, row)
}
+colnames(prob.2df.R) <- cols2df
rownames(prob.2df.R) <- NULL
stopifnot( all.equal(resPa2df, prob.2df.R, tol=tol) )
+cat("2df\n")
-cat("OK\n")
+cat("\t\t\t\t\t\tOK\n")
Modified: pkg/ProbABEL/checks/R-tests/run_models_in_R_palogist.R
===================================================================
--- pkg/ProbABEL/checks/R-tests/run_models_in_R_palogist.R 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/R-tests/run_models_in_R_palogist.R 2013-08-29 21:25:08 UTC (rev 1322)
@@ -11,7 +11,6 @@
source(paste0(srcdir, "initial_checks.R"))
-
####
## Run ProbABEL to get the output data we want to compare/verify
####
@@ -22,41 +21,22 @@
resPaAddDose <- read.table(
paste0(tests.path, "logist_add.out.txt"),
- head=TRUE)[,
- c("beta_SNP_add",
- "sebeta_SNP_add",
- "chi2_SNP")]
+ head=TRUE)[, colsAddDose]
resPaAddProb <- read.table(
paste0(tests.path, "logist_prob_add.out.txt"),
- head=TRUE)[, c("beta_SNP_addA1",
- "sebeta_SNP_addA1",
- "chi2_SNP_A1")]
+ head=TRUE)[, colsAddProb]
resPaDom <- read.table(
paste0(tests.path, "logist_prob_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_domA1",
- "sebeta_SNP_domA1",
- "chi2_SNP_domA1")]
+ head=TRUE)[, colsDom]
resPaRec <- read.table(
paste0(tests.path, "logist_prob_recess.out.txt"),
- head=TRUE)[,
- c("beta_SNP_recA1",
- "sebeta_SNP_recA1",
- "chi2_SNP_recA1")]
+ head=TRUE)[, colsRec]
resPaOdom <- read.table(
paste0(tests.path, "logist_prob_over_domin.out.txt"),
- head=TRUE)[,
- c("beta_SNP_odomA1",
- "sebeta_SNP_odomA1",
- "chi2_SNP_odomA1")]
+ head=TRUE)[, colsOdom]
resPa2df <- read.table(
paste0(tests.path, "logist_prob_2df.out.txt"),
- head=TRUE)[,
- c("beta_SNP_A1A2",
- "sebeta_SNP_A1A2",
- "beta_SNP_A1A1",
- "sebeta_SNP_A1A1",
- "chi2_SNP_2df")]
+ head=TRUE)[, cols2df]
####
## run analysis in R
@@ -64,136 +44,79 @@
attach(pheno)
cat("Comparing R output with ProbABEL output\t\t")
-## run analysis on dose
-dose.add.R <- data.frame(beta_SNP_add = numeric(),
- sebeta_SNP_add = numeric(),
- chi2_SNP = numeric())
-prob.add.R <- data.frame(beta_SNP_addA1 = numeric(),
- sebeta_SNP_addA1 = numeric(),
- chi2_SNP_A1 = numeric())
-for (i in 3:dim(dose)[2]) {
- noNA <- which( !is.na(dose[, i]) )
- model.0 <- glm( chd[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA],
- family=binomial)
- model.dose <- glm( chd ~ sex + age + othercov + dose[, i],
- family=binomial)
- sm.dose <- summary(model.dose)$coef[5, 1:2]
+run.model <- function(model0.txt, model.txt, snpdata) {
+ resultR <- data.frame()
+ for (i in 3:dim(dose)[2]) {
+ indexHom <- 3 + ( i - 3 ) * 2
+ indexHet <- indexHom + 1
+ snp <- eval(parse(text=snpdata))
- model.prob <- glm( chd ~ sex + age + othercov + doseFromProb[,
- i],
- family=binomial )
- sm.prob <- summary(model.prob)$coef[5, 1:2]
+ noNA <- which( !is.na(snp) )
+ model.0 <- eval(parse(text=model0.txt))
+ model <- eval(parse(text=model.txt))
+ sm <- summary(model)$coef[5, 1:2]
+ lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
- lrt.dose <- 2 * ( logLik( model.dose ) - logLik( model.0 ) )
- lrt.prob <- 2 * ( logLik( model.prob ) - logLik( model.0 ) )
+ rsq <- Rsq[i-2]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, sm[1], sm[2], lrt)
+ }
+ resultR <- rbind(resultR, row)
+ }
+ return(resultR)
+}
- row <- data.frame(
- beta_SNP_add = sm.dose[1],
- sebeta_SNP_add = sm.dose[2],
- chi2_SNP = lrt.dose)
- dose.add.R <- rbind(dose.add.R, row)
- row <- data.frame(
- beta_SNP_addA1 = sm.prob[1],
- sebeta_SNP_addA1 = sm.prob[2],
- chi2_SNP_A1 = lrt.prob)
- prob.add.R <- rbind(prob.add.R, row)
-}
+model.fn.0 <-
+ "glm( chd[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA], family=binomial)"
+model.fn <- "glm( chd ~ sex + age + othercov + snp, family=binomial )"
+
+## Additive model, dosages
+snpdose <- "dose[, i]"
+dose.add.R <- run.model(model.fn.0, model.fn, snpdose)
+colnames(dose.add.R) <- colsAddDose
rownames(dose.add.R) <- NULL
-rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddDose, dose.add.R, tol=tol) )
+cat("additive ")
+## Additive model, probabilities
+snpprob <- "doseFromProb[, i]"
+prob.add.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.add.R) <- colsAddProb
+rownames(prob.add.R) <- NULL
stopifnot( all.equal(resPaAddProb, prob.add.R, tol=tol) )
+cat("additive ")
-
## dominant model
-prob.dom.R <- data.frame(beta_SNP_domA1 = numeric(),
- sebeta_SNP_domA1 = numeric(),
- chi2_SNP_domA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHom] + prob[, indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- glm( chd[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA],
- family=binomial )
- model <- glm( chd ~ sex + age + othercov + regProb,
- family=binomial )
- sm <- summary(model)$coef[5, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_domA1 = sm[1],
- sebeta_SNP_domA1 = sm[2],
- chi2_SNP_domA1 = lrt)
- prob.dom.R <- rbind(prob.dom.R, row)
-}
+snpprob <- "prob[, indexHom] + prob[, indexHet]"
+prob.dom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.dom.R) <- colsDom
rownames(prob.dom.R) <- NULL
stopifnot( all.equal(resPaDom, prob.dom.R, tol=tol) )
+cat("dominant ")
-
## recessive model
-prob.rec.R <- data.frame(beta_SNP_recA1 = numeric(),
- sebeta_SNP_recA1 = numeric(),
- chi2_SNP_recA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHom]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- glm( chd[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA],
- family=binomial )
- model <- glm( chd ~ sex + age + othercov + regProb,
- family=binomial )
- sm <- summary(model)$coef[5, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_recA1 = sm[1],
- sebeta_SNP_recA1 = sm[2],
- chi2_SNP_recA1 = lrt)
- prob.rec.R <- rbind(prob.rec.R, row)
-}
+snpprob <- "prob[, indexHom]"
+prob.rec.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.rec.R) <- colsRec
rownames(prob.rec.R) <- NULL
stopifnot( all.equal(resPaRec, prob.rec.R, tol=tol) )
+cat("recessive ")
-
## over-dominant model
-prob.odom.R <- data.frame(beta_SNP_odomA1 = numeric(),
- sebeta_SNP_odomA1 = numeric(),
- chi2_SNP_odomA1 = numeric())
-for (i in 3:dim(dose)[2]) {
- indexHom <- 3 + ( i - 3 ) * 2
- indexHet <- indexHom + 1
- regProb <- prob[, indexHet]
-
- noNA <- which( !is.na(regProb) )
- model.0 <- glm( chd[noNA] ~ sex[noNA] + age[noNA] + othercov[noNA],
- family=binomial )
- model <- glm( chd ~ sex + age + othercov + regProb,
- family=binomial )
- sm <- summary(model)$coef[5, 1:2]
- lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
-
- row <- data.frame(
- beta_SNP_odomA1 = sm[1],
- sebeta_SNP_odomA1 = sm[2],
- chi2_SNP_odomA1 = lrt)
- prob.odom.R <- rbind(prob.odom.R, row)
-}
+snpprob <- "prob[, indexHet]"
+prob.odom.R <- run.model(model.fn.0, model.fn, snpprob)
+colnames(prob.odom.R) <- colsOdom
rownames(prob.odom.R) <- NULL
stopifnot( all.equal(resPaOdom, prob.odom.R, tol=tol) )
+cat("overdominant ")
## 2df model
-prob.2df.R <- data.frame(beta_SNP_A1A2 = numeric(),
- sebeta_SNP_A1A2 = numeric(),
- beta_SNP_A1A1 = numeric(),
- sebeta_SNP_A1A1 = numeric(),
- chi2_SNP_2df = numeric())
+prob.2df.R <- data.frame()
for (i in 3:dim(dose)[2]) {
indexHom <- 3 + ( i - 3 ) * 2
indexHet <- indexHom + 1
@@ -208,15 +131,18 @@
smA1A1 <- summary(model)$coef[6, 1:2]
lrt <- 2 * ( logLik( model ) - logLik( model.0 ) )
- row <- data.frame(
- beta_SNP_A1A2 = smA1A2[1],
- sebeta_SNP_A1A2 = smA1A2[2],
- beta_SNP_A1A1 = smA1A1[1],
- sebeta_SNP_A1A1 = smA1A1[2],
- chi2_SNP_2df = lrt)
+ rsq <- resPa2df[i-2, "Rsq"]
+ if( rsq < rsq.thresh) {
+ row <- c(rsq, NaN, NaN, NaN, NaN, NaN)
+ } else {
+ row <- c(rsq, smA1A2[1], smA1A2[2], smA1A1[1], smA1A1[2], lrt)
+
+ }
prob.2df.R <- rbind(prob.2df.R, row)
}
+colnames(prob.2df.R) <- cols2df
rownames(prob.2df.R) <- NULL
stopifnot( all.equal(resPa2df, prob.2df.R, tol=tol) )
+cat("2df\n")
-cat("OK\n")
+cat("\t\t\t\t\t\tOK\n")
Modified: pkg/ProbABEL/checks/inputfiles/mmscore_gen.mlinfo
===================================================================
--- pkg/ProbABEL/checks/inputfiles/mmscore_gen.mlinfo 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/inputfiles/mmscore_gen.mlinfo 2013-08-29 21:25:08 UTC (rev 1322)
@@ -1,6 +1,6 @@
SNP Al1 Al2 Freq1 MAF Quality Rsq
-rs735579 A B 0.5847 0.5847 0.5847 0.5847
-rs9088604 A B 0.5847 0.5847 0.5847 0.5847
+rs735579 AC B 0.5847 0.5847 0.5847 0.5847
+rs9088604 A CTG 0.5847 0.5847 0.5847 0.5847
rs1413801 A B 0.5847 0.5847 0.5847 0.5847
rs4911638 A B 0.5847 0.5847 0.5847 0.5847
-rs70099 A B 0.5847 0.5847 0.5847 0.5847
+rs70099 A B 0.5847 0.5847 0.5847 1.2e-17
Modified: pkg/ProbABEL/checks/inputfiles/test.mlinfo
===================================================================
--- pkg/ProbABEL/checks/inputfiles/test.mlinfo 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/inputfiles/test.mlinfo 2013-08-29 21:25:08 UTC (rev 1322)
@@ -3,4 +3,4 @@
rs8102643 C TGGT 0.5847 0.4150 0.9308 0.8685
rs8102615 T A 0.5006 0.4702 0.9375 0.8932
rs8105536 G A 0.5783 0.4213 0.9353 0.8832
-rs2312724 T C 0.9122 0.0877 0.9841 0.9232
+rs2312724 T C 0.9122 0.0877 0.9841 1.3e-17
Modified: pkg/ProbABEL/checks/verified_results/height_add.out.txt
===================================================================
--- pkg/ProbABEL/checks/verified_results/height_add.out.txt 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/verified_results/height_add.out.txt 2013-08-29 21:25:08 UTC (rev 1322)
@@ -3,4 +3,4 @@
rs8102643 C TGGT 0.5847 0.415 0.9308 0.8685 181 0.808177 1 207859 -25.7632 86.4877 0.0907169
rs8102615 T A 0.5006 0.4702 0.9375 0.8932 182 0.8665 2 211970 -40.7722 102.559 0.161525
rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.791701 2 212033 35.3602 81.7869 0.191023
-rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.933464 2 217034 -112.235 141.169 0.645146
+rs2312724 T C 0.9122 0.0877 0.9841 1.3e-17 182 0.933464 2 217034 nan nan nan
Modified: pkg/ProbABEL/checks/verified_results/height_base_add.out.txt
===================================================================
--- pkg/ProbABEL/checks/verified_results/height_base_add.out.txt 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/verified_results/height_base_add.out.txt 2013-08-29 21:25:08 UTC (rev 1322)
@@ -3,4 +3,4 @@
rs8102643 C TGGT 0.5847 0.415 0.9308 0.8685 181 0.808177 19 207859 -25.7632 86.4877 0.0907169
rs8102615 T A 0.5006 0.4702 0.9375 0.8932 182 0.8665 19 211970 -40.7722 102.559 0.161525
rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.791701 19 212033 35.3602 81.7869 0.191023
-rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.933464 19 217034 -112.235 141.169 0.645146
+rs2312724 T C 0.9122 0.0877 0.9841 1.3e-17 182 0.933464 19 217034 nan nan nan
Modified: pkg/ProbABEL/checks/verified_results/height_ngp2_2df.out.txt
===================================================================
--- pkg/ProbABEL/checks/verified_results/height_ngp2_2df.out.txt 2013-08-27 19:24:17 UTC (rev 1321)
+++ pkg/ProbABEL/checks/verified_results/height_ngp2_2df.out.txt 2013-08-29 21:25:08 UTC (rev 1322)
@@ -3,4 +3,4 @@
rs8102643 C TGGT 0.5847 0.415 0.9308 0.8685 181 0.808177 1 207859 -820.466 1392.51 -646.111 1054.18 0.426657
rs8102615 T A 0.5006 0.4702 0.9375 0.8932 182 0.8665 2 211970 -239.796 1308.51 -246.734 1102.07 0.185459
rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.791701 2 212033 782.482 1474.58 604.324 1064.24 0.45561
-rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.933464 2 217034 1180.51 1104.39 912.561 1003.84 2.07178
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1322
More information about the Genabel-commits
mailing list