[Genabel-commits] r902 - in branches/ProbABEL-pacox/v.0.1-3: examples examples/test_cox_ngpreds2 src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sun Apr 29 17:23:12 CEST 2012


Author: yurii
Date: 2012-04-29 17:23:11 +0200 (Sun, 29 Apr 2012)
New Revision: 902

Added:
   branches/ProbABEL-pacox/v.0.1-3/examples/example_palinear_simple.sh
   branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/
   branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R.R
   branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R_palinear.R
Modified:
   branches/ProbABEL-pacox/v.0.1-3/src/data.h
   branches/ProbABEL-pacox/v.0.1-3/src/reg1.h
Log:
fixing bug [#1846] (results from mldose and mlprob are not the same in Cox analysis). It turns out that probabilities P(A1A1) and P(A1A2) were swapped specifically in Cox 'update_snp' procedure. Added R scripts testing all models (except 2df) via comparison of pacoxph and R.

Added: branches/ProbABEL-pacox/v.0.1-3/examples/example_palinear_simple.sh
===================================================================
--- branches/ProbABEL-pacox/v.0.1-3/examples/example_palinear_simple.sh	                        (rev 0)
+++ branches/ProbABEL-pacox/v.0.1-3/examples/example_palinear_simple.sh	2012-04-29 15:23:11 UTC (rev 902)
@@ -0,0 +1,7 @@
+echo "analysing Linear model"
+
+../bin/palinear -p height.txt -d test.mldose.2 -i test.mlinfo -m test.map -c 19 -o height.out.txt.dose
+
+../bin/palinear -p height.txt -d test.mlprob --ngpreds=2 -i test.mlinfo -m test.map -c 19 -o height.out.txt.prob
+
+diff height.out.txt.dose_add.out.txt height.out.txt.prob_add.out.txt

Added: branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R.R
===================================================================
--- branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R.R	                        (rev 0)
+++ branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R.R	2012-04-29 15:23:11 UTC (rev 902)
@@ -0,0 +1,92 @@
+library(survival)
+
+###
+# load the data
+###
+
+# load phenotypic data
+pheno <- read.table("../coxph_data.txt",head=TRUE,string=FALSE)
+
+# load genetic DOSE data
+dose <- read.table("../test.mldose.2",head=FALSE,string=FALSE)
+# remove "1->" from the names of dose-IDs
+idNames <- dose[,1]
+idNames <- sub("[0-9]+->","",idNames)
+dose[,1] <- idNames
+# check consistency of names
+table(dose[,1] == pheno[,1])
+
+# load genetic PROB data
+prob <- read.table("../test.mlprob",head=FALSE,string=FALSE)
+# remove "1->" from the names of dose-IDs
+idNames <- prob[,1]
+idNames <- sub("[0-9]+->","",idNames)
+prob[,1] <- idNames
+# check consistency of names
+table(prob[,1] == pheno[,1])
+
+# check consistency DOSE <-> PROB
+doseFromProb <- matrix(NA,ncol=dim(dose)[2],nrow=dim(dose)[1])
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	doseFromProb[,i] <- prob[,indexHom]*2 + prob[,indexHet]
+	print( table( ( dose[,i] - doseFromProb[,i] ) > 1e-8 ) )
+}
+
+###
+# run analysis
+###
+
+# run ProbABEL
+system("cd ..; sh example_cox.sh; cd test_cox_ngpreds2")
+resPaAddDose <- read.table("../coxph.out.txt.dose_add.out.txt", head=TRUE)[,c("beta_SNP_add","sebeta_SNP_add")]
+resPaAddProb <- read.table("../coxph.out.txt.prob_add.out.txt", head=TRUE)[,c("beta_SNP_addA1","sebeta_SNP_addA1")]
+resPaDom <- read.table("../coxph.out.txt.prob_domin.out.txt", head=TRUE)[,c("beta_SNP_domA1","sebeta_SNP_domA1")]
+resPaRec <- read.table("../coxph.out.txt.prob_recess.out.txt", head=TRUE)[,c("beta_SNP_recA1","sebeta_SNP_recA1")]
+resPaOdom <- read.table("../coxph.out.txt.prob_over_domin.out.txt", head=TRUE)[,c("beta_SNP_odom","sebeta_SNP_odom")]
+
+attach(pheno)
+
+# run analysis on dose
+emptyRes <- resPaAddDose
+emptyRes[] <- NA
+resRAddDose <- resRAddProb <- resRDom <- resRRec <- resROdom <- emptyRes
+for (i in 3:dim(dose)[2]) {
+	smr <- summary( coxph(Surv(fupt_chd,chd) ~ sex + age + othercov + dose[,i] ) )
+	resRAddDose[i-2,] <-  smr$coef[4,c(1,3)]
+	smr <- summary( coxph(Surv(fupt_chd,chd) ~ sex + age + othercov + doseFromProb[,i] ) )
+	resRAddProb[i-2,] <- smr$coef[4,c(1,3)]
+}
+resPaAddDose/resRAddDose
+resPaAddProb/resRAddProb
+
+# dominant model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHom] + prob[,indexHet]
+	smr <- summary( coxph(Surv(fupt_chd,chd) ~ sex + age + othercov + regProb ) )
+	resRDom[i-2,] <- smr$coef[4,c(1,3)]
+}
+resPaDom/resRDom
+
+# recessive model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHom]
+	smr <- summary( coxph(Surv(fupt_chd,chd) ~ sex + age + othercov + regProb ) )
+	resRRec[i-2,] <- smr$coef[4,c(1,3)]
+}
+resPaRec/resRRec
+
+# over-dominant model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHet]
+	smr <- summary( coxph(Surv(fupt_chd,chd) ~ sex + age + othercov + regProb ) )
+	resROdom[i-2,] <- smr$coef[4,c(1,3)]
+}
+resPaOdom/resROdom


Property changes on: branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Added: branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R_palinear.R
===================================================================
--- branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R_palinear.R	                        (rev 0)
+++ branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R_palinear.R	2012-04-29 15:23:11 UTC (rev 902)
@@ -0,0 +1,74 @@
+###
+# load the data
+###
+
+# load phenotypic data
+pheno <- read.table("../height.txt",head=TRUE,string=FALSE)
+
+# load genetic DOSE data
+dose <- read.table("../test.mldose.2",head=FALSE,string=FALSE)
+# remove "1->" from the names of dose-IDs
+idNames <- dose[,1]
+idNames <- sub("[0-9]+->","",idNames)
+dose[,1] <- idNames
+# check consistency of names
+table(dose[,1] == pheno[,1])
+
+# load genetic PROB data
+prob <- read.table("../test.mlprob",head=FALSE,string=FALSE)
+# remove "1->" from the names of dose-IDs
+idNames <- prob[,1]
+idNames <- sub("[0-9]+->","",idNames)
+prob[,1] <- idNames
+# check consistency of names
+table(prob[,1] == pheno[,1])
+
+# check consistency DOSE <-> PROB
+doseFromProb <- matrix(NA,ncol=dim(dose)[2],nrow=dim(dose)[1])
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	doseFromProb[,i] <- prob[,indexHom]*2 + prob[,indexHet]
+	print( table( ( dose[,i] - doseFromProb[,i] ) > 1e-8 ) )
+}
+
+###
+# run analysis
+###
+
+attach(pheno)
+
+# run analysis on dose
+for (i in 3:dim(dose)[2]) {
+	smr <- summary( lm( height ~ sex + age + dose[,i] ) )
+	print( smr$coef[4,1:2] )
+	smr <- summary( lm( height ~ sex + age + doseFromProb[,i] ) )
+	print( smr$coef[4,1:2] )
+}
+
+# dominant model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHom] + prob[,indexHet]
+	smr <- summary( lm( height ~ sex + age + regProb ) )
+	print( smr$coef[4,1:2] )
+}
+
+# recessive model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHom]
+	smr <- summary( lm( height ~ sex + age + regProb ) )
+	print( smr$coef[4,1:2] )
+}
+
+# over-dominant model
+for (i in 3:dim(dose)[2]) {
+	indexHom <- 3 + ( i - 3 ) * 2
+	indexHet <- indexHom + 1
+	regProb <- prob[,indexHet]
+	smr <- summary( lm( height ~ sex + age + regProb ) )
+	print( smr$coef[4,1:2] )
+}


Property changes on: branches/ProbABEL-pacox/v.0.1-3/examples/test_cox_ngpreds2/run_models_in_R_palinear.R
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: branches/ProbABEL-pacox/v.0.1-3/src/data.h
===================================================================
--- branches/ProbABEL-pacox/v.0.1-3/src/data.h	2012-04-29 11:38:41 UTC (rev 901)
+++ branches/ProbABEL-pacox/v.0.1-3/src/data.h	2012-04-29 15:23:11 UTC (rev 902)
@@ -362,7 +362,7 @@
 	{
 		for (int i=0;i<nids;i++) 
 		for (int j=0;j<ngpreds;j++) 
-			X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov+1-j-1));
+			X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-j));
 //			X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+1+(ngpreds-j+1)));
 //			X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+1+j));
 	}
@@ -491,10 +491,17 @@
 	}
 	void update_snp(gendata &gend, int snpnum)
 	{
-		// note this sorts by "order"!!!
-		for (int i=0;i<nids;i++) 
-		for (int j=0;j<ngpreds;j++) 
-			X.put((gend.G).get(i,(snpnum*ngpreds+j)),(ncov-ngpreds+j),order[i]);
+		/** note this sorts by "order"!!!
+		 * Here we deal with transposed X, hence last two arguments are swapped
+		 * compared to the other 'update_snp'
+		 * Also, the starting column-1 is not necessary for cox X therefore
+		 * 'ncov-j' changes to 'ncov-j-1'
+		**/
+			for (int i=0;i<nids;i++)
+				for (int j=0;j<ngpreds;j++)
+					X.put((gend.G).get(i,(snpnum*ngpreds+j)),(ncov-j-1),order[i]);
+			// OLD VARIANT
+			//			X.put((gend.G).get(i,(snpnum*ngpreds+j)),(ncov-ngpreds+j),order[i]);
 	}
 	~coxph_data()
 	{

Modified: branches/ProbABEL-pacox/v.0.1-3/src/reg1.h
===================================================================
--- branches/ProbABEL-pacox/v.0.1-3/src/reg1.h	2012-04-29 11:38:41 UTC (rev 901)
+++ branches/ProbABEL-pacox/v.0.1-3/src/reg1.h	2012-04-29 15:23:11 UTC (rev 902)
@@ -22,7 +22,8 @@
 
 #include <cmath>
 
-mematrix<double> apply_model(mematrix<double> &X, int model, int interaction, int ngpreds, bool iscox=false, int nullmodel = 0)
+mematrix<double> apply_model(mematrix<double> &X, int model, int interaction,
+		int ngpreds, bool iscox=false, int nullmodel = 0)
 // model 0 = 2 df
 // model 1 = additive 1 df
 // model 2 = dominant 1 df
@@ -159,20 +160,20 @@
 		{
 		nX.reinit(X.nrow,(X.ncol-1));
 		}
-	int c1 = X.ncol-2;
-	int c2 = X.ncol-1;
+	int c1 = X.ncol-2; // column with Prob(A1A2)
+	int c2 = X.ncol-1; // column with Prob(A1A1). Note the order is swapped cf the file!
 	for (int i=0;i<X.nrow;i++)
 	for (int j=0;j<(X.ncol-2);j++)
 		nX[i*nX.ncol+j] = X[i*X.ncol+j];
 	for (int i=0;i<nX.nrow;i++)
 		{
-		if (model==1)
+		if (model==1) // additive
 			nX[i*nX.ncol+c1] = X[i*X.ncol+c1] + 2.*X[i*X.ncol+c2];
-		else if (model==2)
+		else if (model==2) // dominant
 			nX[i*nX.ncol+c1] = X[i*X.ncol+c1] + X[i*X.ncol+c2];
-		else if (model==3)
+		else if (model==3) // recessive
 			nX[i*nX.ncol+c1] = X[i*X.ncol+c2];
-		else if (model==4)
+		else if (model==4) // over-dominant
 			nX[i*nX.ncol+c1] = X[i*X.ncol+c1];
 		if(interaction != 0)	
 		nX[i*nX.ncol+c2] = X[i*nX.ncol+interaction] * nX[i*nX.ncol+c1];//Maksim: interaction with SNP



More information about the Genabel-commits mailing list