[Genabel-commits] r1610 - in pkg/VariABEL: R man src/ITERlib src/VARlib

Fri Feb 14 08:58:44 CET 2014

Author: maksim
Date: 2014-02-14 08:58:43 +0100 (Fri, 14 Feb 2014)
New Revision: 1610

Modified:
   pkg/VariABEL/R/var_test_gwaa.R
   pkg/VariABEL/man/var_test_gwaa.Rd
   pkg/VariABEL/src/ITERlib/iterator.cpp
   pkg/VariABEL/src/ITERlib/iterator_functions.cpp
   pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
Log:
The 2df test is implemented. Now, if analysis_type=2df, then the regression model includes P_AB and P_BB (probabilities of having genotypes AB and BB) and presence of association is tested jointly for P_AB and P_BB. We still need to test it on the possible bugs.

Modified: pkg/VariABEL/R/var_test_gwaa.R
===================================================================

--- pkg/VariABEL/R/var_test_gwaa.R	2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/R/var_test_gwaa.R	2014-02-14 07:58:43 UTC (rev 1610)
@@ -28,12 +28,13 @@
 {
 
 
-if(testname == "bartlett") {testname <- 0}
-else if(testname == "levene") {testname <- 1}
-else if(testname == "likelihood") {testname <- 2}
-else if(testname == "kolmogorov_smirnov") {testname <- 3}
-else if(testname == "svlm") {testname <- 4}
-else {stop(paste(testname, "is unsupportive type of test. Only levene and svlm are supported."))}
+#if(testname == "bartlett") {testname <- 0}
+#else if(testname == "levene") {testname <- 1}
+#else if(testname == "likelihood") {testname <- 2}
+#else if(testname == "kolmogorov_smirnov") {testname <- 3}
+if(testname == "svlm") {testname <- 4}
+#else {stop(paste(testname, "is unsupportive type of test. Only levene and svlm are supported."))}
+else {stop(paste(testname, "is unsupportive type of test. Only svlm is supported."))}
 
 
 if(analysis_type == "AAvsABvsBB") {analysis_type <- 0}
@@ -56,7 +57,6 @@
 
 if(is(formula, "formula"))
 	{
-   print("Formula=formula")
 	design_matrix <- model.matrix(formula, data = phenodata)
 #	design_matrix <- design_matrix_
 	trat_name <- as.character(formula[[2]])
@@ -96,16 +96,13 @@
 #	else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
 	else if(is.character(formula)) 
 	{
-   print("Formula=char")
 	trait <- phenodata[,formula]
 	idnum <- length(trait)
 	
    
-	print(c("analysis_type=", analysis_type))	
 	if(analysis_type == 4) 
       {
       design_matrix_df <- data.frame(X.Intercept=rep(1,idnum), snp_AB=rep(0,idnum), snp_BB=rep(0,idnum))
-      print("design_matrix_df 2df")
       }
    else
       {
@@ -127,6 +124,7 @@
 
 if(class(genodata) == "snp.data")
 	{
+   if(analysis_type == 4) {stop("2df test is available only for imputed SNPs. You should use databel file format.")}
 	print("reading genotype info...")
 	genodata_info_df <- data.frame(name=genodata at snpnames,
 				 												 coding=as.character(genodata at coding),
@@ -242,7 +240,7 @@
 
 #print(cov_names)
 
-output_column_names <- c(colnames(genodata_info_df), "chisq", "df", "Intercept_effect", "Intercept_se")
+output_column_names <- c(colnames(genodata_info_df), "Intercept_effect", "Intercept_se")
 
 
 #print(output_column_names)
@@ -269,17 +267,34 @@
    output_column_names <- c(output_column_names, "snp_se_dispertion")
    }
 
+if(testname == 4)
+   {
+   statistics_name <- "F"
+   df_name <- "df1_df2"
+   output_column_names <- c(output_column_names, df_name, statistics_name)
+   colnames(results_df) <- output_column_names
+   
+   df1_ <- 1
+   if(analysis_type == 4) {df1_ <- 2}
+   results_df$P_value <- pf(results_df$F, df1=df1_, df2=results_df[,df_name], lower.tail=F)
+   results_df[,df_name] <- paste(df1_, results_df[,df_name],sep="_")
+   }
+else
+   {
+   statistics_name <- "chisq"
+   df_name <- "df"
+   output_column_names <- c(output_column_names, df_name, statistics_name)
+   colnames(results_df) <- output_column_names
+   results_df$P_value <- pchisq(results_df$chisq, df=results_df$df, lower.tail=F)
+   }
 
-colnames(results_df) <- output_column_names
+#if(testname != 4)
+#	{
+#	results_df <- results_df[,1:7]
+#	}
 
-if(testname != 4)
-	{
-	results_df <- results_df[,1:7]
-	}
 
+
+
 return(results_df)
 }
-
-
-
-

Modified: pkg/VariABEL/man/var_test_gwaa.Rd
===================================================================
--- pkg/VariABEL/man/var_test_gwaa.Rd	2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/man/var_test_gwaa.Rd	2014-02-14 07:58:43 UTC (rev 1610)
@@ -28,7 +28,7 @@
   each henotypic group is testes against other two, 
   \code{AAvsABandBB} - group AA tested against AB and BB,
   \code{ABvsAAandBB} - AB against AA and BB, \code{BBvsAAandAB} - BB
-  against AA and AB. Only available for typed SNPs. Note that the input parameter analysis_type is ignored if testname="svlm"}
+  against AA and AB, \code{2df} - two degrees of freedom test for testing interaction without assuming any genetic model. In this case, probabilities P_ABB and P_BB of having genotype AB and BB respectively are tested on a presence of interaction.}
 }
 
 \details{
@@ -43,13 +43,13 @@
 }
 
 \value{
-The output is a \code{data.frame} object. The table contains the chi^2 of
-the variance heterogeneity test (the name is \code{chisq}) the effects
+The output is a \code{data.frame} object. The table contains the F-statistic (and its P_value) for 'svlm' test and  chi^2 for other tests of
+the variance heterogeneity test (the name is \code{F} \code{chisq}) the effects
 and standard errors of all covariates included into the regression
 model, main SNP effect (the names are \code{snp_eff} and
 \code{snp_se}). In the case of the svlm test the columns
 \code{snp_eff_dispertion} and \code{snp_se_dispertion} contain effect of
-a SNP on the squared values of the trait.
+a SNP on the squared values of the trait. The column \code{df1_df2} contains numerator and denominator degrees of freedom for F-statistic.
 }
 
 

Modified: pkg/VariABEL/src/ITERlib/iterator.cpp
===================================================================
--- pkg/VariABEL/src/ITERlib/iterator.cpp	2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/ITERlib/iterator.cpp	2014-02-14 07:58:43 UTC (rev 1610)
@@ -251,7 +251,6 @@
 
 		// Get the dimensions of the output the function of our choosing will be giving
 		pMethod(0, nrow, step, 0, ncol_multi, nrow_new, narg, argList);
-		Rprintf("ncol_multi,nrow_new=%d,%d\n",ncol_multi,nrow_new);
 		// Allocate vector
 		// Start output SEXP for passing to R
 		// Even when the output is put into a filevector, we still return an (empty) SEXP
@@ -337,7 +336,6 @@
 			 **/
 
 			// Apply function of choosing
-         Rprintf("Run analysis\n");
 			pMethod(internal_data, nrow, step, out_data, ncol_multi, nrow_new, narg,
 					argList);
 
@@ -352,7 +350,6 @@
 						//          = out_data[k];
 						//REAL(out)[(i_cs * ncol_multi + j) * nrow_new + k]
 						 //         = out_data[j*nrow_new+k];
-                  Rprintf("Write ouput\n");
 						REAL(out)[j*ncol_cs+i_cs+k]
 						          = out_data[j*nrow_new+k];
 		}

Modified: pkg/VariABEL/src/ITERlib/iterator_functions.cpp
===================================================================
--- pkg/VariABEL/src/ITERlib/iterator_functions.cpp	2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/ITERlib/iterator_functions.cpp	2014-02-14 07:58:43 UTC (rev 1610)
@@ -482,8 +482,17 @@
 			{
 			is_trait_na[id_counter]=1;
 			}
-		
 
+	   if(*analys_type == 4)
+         {
+		   if(ISNAN(indata[id_counter+indataHeight])) 
+	   		{
+	   		is_trait_na[id_counter]=1;
+	   		}
+         }
+
+
+
 		}
 	//_________________________________________________________
 
@@ -500,10 +509,8 @@
 
 //	delete[] design_matrix_;
 
-	outdata[0] = *chi2;
-	outdata[1] = double(*df);
 
-	int counter=2;
+	int counter=0;
 	for(int i=0 ; i<((*p)+step) ; i++)
 		{
 		outdata[counter] = betas[i];	
@@ -511,6 +518,9 @@
 		outdata[counter] = se[i];	
 		counter++;
 		}
+	
+	outdata[counter] = double(*df);
+   outdata[counter+1] = *chi2;
 
 //	for(int i=0 ; i<2 + (*INTEGER(argList[3]))*2 ; i++)
 //		{

Modified: pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp	2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp	2014-02-14 07:58:43 UTC (rev 1610)
@@ -106,20 +106,16 @@
       start_snp = nids*(p-2);
       }
    else
-     {
-     start_snp = nids*(p-1);
-     }
+      {
+      start_snp = nids*(p-1);
+      }
 
-//   Rprintf("start_snp=%d, stop_snp=%d", start_snp, stop_snp);
    for(int i=start_snp ; i<stop_snp; i++) 
 		{
-      Rprintf("snp[%d]=%f, ", i-start_snp, snp[i-start_snp]);
 		design_matrix_copy[i]=snp[i-start_snp];
 		}
 	
 
-
-
 	long unsigned int nids_nona=0;
 	for(int column=0 ; column<p ; column++)
 		{
@@ -204,9 +200,6 @@
    my_small_vector squared_residuals_adusted_on_snp(residuals, nids_nona, false);
 
 
-   Rprintf("\n\n*analys_type_=%d\n", *analys_type_);
-         Rprintf("betas_disp[%d]=%f\n", 0, betas_disp[0]);
-         Rprintf("se_disp[%d]=%f\n", 0, se_disp[0]);
 
       
 
@@ -219,18 +212,15 @@
     double SSRr = my_var(&squared_residuals)*nids_nona;
     double SSRu = my_var(&squared_residuals_adusted_on_snp)*nids_nona; 
     int q = step;
-     
-      Rprintf("SSRr=%f, SSRu=%f", SSRr, SSRu);
+    int k = step;
 
     static double Fstat;
-    Fstat = ((SSRr-SSRu)/q)/(SSRu/(nids_nona-1));
+    Fstat = ((SSRr-SSRu)/q)/(SSRu/(nids_nona - k - 1));
 
 //    if(*analys_type_ == 4)
 //      {
     for(unsigned i=0 ; i<step ; i++)
        {
-       Rprintf("betas_disp[%d]=%f\n", i+1, betas_disp[i+1]);
-       Rprintf("se_disp[%d]=%f\n", i+1, se_disp[i+1]);
        betas[p+i] = betas_disp[i+1];
        se[p+i]	   = se_disp[i+1];
        }
@@ -245,9 +235,8 @@
 //    }
 	  
      chi2[0]  = Fstat;
-	  df[0]    = nids_nona;
+	  df[0]    = nids_nona - k -1;
    
-   Rprintf("END!!!!!!!!!!!");
    }
 
 return;