[Genabel-commits] r653 - in pkg/VariABEL: R src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Feb 16 01:23:06 CET 2011


Author: maksim
Date: 2011-02-16 01:23:06 +0100 (Wed, 16 Feb 2011)
New Revision: 653

Modified:
   pkg/VariABEL/R/var.test.gwaa.R
   pkg/VariABEL/src/var_homogeneity_test_C.cpp
Log:
bug fixed: original R regression function changes input parameters. to protect from it the data is copied inside of C function

Modified: pkg/VariABEL/R/var.test.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.test.gwaa.R	2011-02-11 01:56:53 UTC (rev 652)
+++ pkg/VariABEL/R/var.test.gwaa.R	2011-02-16 00:23:06 UTC (rev 653)
@@ -24,9 +24,10 @@
 #_____________________________________________________________________________________________________________________________
 #Available for user function for genome-wide testing variance homogeneity of trait's distribution.
 #
-"var.test.gwaa" <- function(formula, genodata, phenodata, genodata_info="", testname="sqlm", analysis_type="AAvsABvsBB")
+"var.test.gwaa" <- function(formula, genodata, phenodata, genodata_info=NULL, testname="sqlm", analysis_type="AAvsABvsBB")
 {
 
+
 if(testname == "bartlett") {testname <- 0}
 else if(testname == "levene") {testname <- 1}
 else if(testname == "likelihood") {testname <- 2}
@@ -41,36 +42,52 @@
 else if(analysis_type == "BBvsAAandAB") {analysis_type <- 3}
 else {stop(paste(analysis_type, "is unsupportive type of analysis. Only AAvsABvsBB, AAvsABandBB, ABvsAAandBB, and BBvsAAandAB are supported."))} 
 
-
 MAR <- 2
 OUT <- "R" 
 FUN <- "variance_homogeneity_test_C_wrapper"
 
 
-
-
-
-attach(phenodata, pos = 2, warn.conflicts = FALSE)
-
 idnum <- 0
 
 if(is(formula, "formula"))
 	{
 	design_matrix <- model.matrix(formula, data = phenodata)
+#	design_matrix <- design_matrix_
 	trat_name <- as.character(formula[[2]])
 	trait <- phenodata[,trat_name]
-	idnum <- length(trait)
+#	idnum <- length(trait)
 	design_matrix_df <- data.frame(design_matrix)
+
+#Put NAs back to the matrix
+	if(dim(design_matrix_df)[1] != dim(phenodata)[1])
+		{
+		design_matrix_df_with_nas <- matrix(rep(NA, dim(phenodata)[1]*dim(design_matrix_df)[2]), ncol=dim(design_matrix_df)[2], nrow=dim(phenodata)[1])
+		design_matrix_df_with_nas <- data.frame(design_matrix_df_with_nas)
+		formula_vars <- all.vars(formula)
+		na_bool <- rep(T, dim(phenodata)[1])
+		for(term in formula_vars)
+			{
+			na_bool <- na_bool & !is.na(phenodata[,term])
+			}
+		colnames(design_matrix_df_with_nas) <- colnames(design_matrix_df)
+		design_matrix_df_with_nas[na_bool,] <- design_matrix_df	
+		design_matrix_df <- design_matrix_df_with_nas
+		}
+			
 	design_matrix_df$snp <- 0 #will be filled into iterator
+	idnum <- dim(design_matrix_df)[1]
+	
+	
 	}
-	else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
+#	else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
+	else if(is.character(formula)) 
 	{
-	trait <- formula
+	trait <- phenodata[,formula]
 	idnum <- length(trait)
 	design_matrix_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
 	}
 
-design_matrix_geno_means_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
+#design_matrix_geno_means_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
 
 
 p <- dim(design_matrix_df)[2] #covariates number + 1
@@ -81,7 +98,6 @@
 genodata_info_df <- data.frame()
 
 
-#Stolen from MixABEL:
 if(class(genodata) == "snp.data")
 	{
 	print("reading genotype info...")
@@ -103,21 +119,28 @@
 	gtNcol <- dim(genodata)[2]
 	genodata <- genodata at data
 	
-	if(genodata_info != "" & file.exists(genodata_info))
-		{
-		print("reading genotype's info...")
-		genodata_info_df <- read.table(genodata_info, strings=F, header=T)
-#		print(c(dim(genodata_info_df)[1], gtNcol))
-		if(dim(genodata_info_df)[1] != gtNcol) 
+	if(!is.null(genodata_info))
+		{	
+		if(file.exists(genodata_info))
 			{
-			print(paste("warning: File ", genodata_info, " contains information about ", dim(genodata_info_df)[1],
-																											 " SNPs but genotype data contains ", gtNcol, ". Ignoring of the info file.", sep=""))
-			}		
-		genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+			print("reading genotype's info...")
+			genodata_info_df <- read.table(genodata_info, strings=F, header=T)
+#			print(c(dim(genodata_info_df)[1], gtNcol))
+			if(dim(genodata_info_df)[1] != gtNcol) 
+				{
+				print(paste("warning: File ", genodata_info, " contains information about ", dim(genodata_info_df)[1],
+																												 " SNPs but genotype data contains ", gtNcol, ". Ignoring of the info file.", sep=""))
+				genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+				}		
+			}
+		else
+			{
+			print(paste("warning: File ", genodata_info, "does not exist. Trying to run analysis withouth SNP's information..."))
+			genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+			}
 		}
 	else
 		{
-		print(paste("warning: File ", genodata_info, "does not exist. Trying to run analysis withouth SNP's information..."))
 		genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
 		}
 
@@ -137,6 +160,12 @@
 		}
 	}	
 
+
+#print(design_matrix_df)
+#print(dim(design_matrix_df))
+
+
+print(c(gtNrow, gtNcol))
 print("Start variance analysis...")
 results_C <- .Call("iterator", genodata,
 														 as.integer(gtNrow), as.integer(gtNcol),
@@ -147,7 +176,7 @@
 														 as.integer(18), #iterator additional inputa parameters number
 														 as.double(trait),
 														 as.double(data.matrix(design_matrix_df)),
-														 as.double(data.matrix(design_matrix_geno_means_df)),
+														 as.double(data.matrix(design_matrix_df)),
 														 as.integer(p),
 														 as.integer(analysis_type),
 														 as.integer(testname),
@@ -190,7 +219,8 @@
 
 colnames(results_df) <- output_column_names
 
-results_df
+
+return(results_df)
 }
 
 

Modified: pkg/VariABEL/src/var_homogeneity_test_C.cpp
===================================================================
--- pkg/VariABEL/src/var_homogeneity_test_C.cpp	2011-02-11 01:56:53 UTC (rev 652)
+++ pkg/VariABEL/src/var_homogeneity_test_C.cpp	2011-02-16 00:23:06 UTC (rev 653)
@@ -66,8 +66,10 @@
 //_________________________________________________________________________________________________
 
 
-void variance_homogeneity_test_C(double* snp, double *trait, double *design_matrix_geno_means, double *design_matrix, int *p_, long unsigned* nids_, double *betas, double *se, double * chi2, int * df, double * residuals, int *analys_type_, int * is_trait_na, int *testname, /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
+void variance_homogeneity_test_C(double* snp, double *trait, double *design_matrix_copy, double *design_matrix, int *p_, long unsigned* nids_, double *betas, double *se, double * chi2, int * df, double * residuals, int *analys_type_, int * is_trait_na, int *testname, /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
 {
+//std::cout<<"variance_homogeneity_test_C: START\n";
+		
 std::list<my_small_vector> trait_groups;
 static chisq_df chisq;
 
@@ -91,40 +93,119 @@
 
 	long int nids = *nids_;
 
-	int i_start;
-  i_start	= nids*(p-1);
-	int i_stop;
-  i_stop	= nids*p;
 
+
 	//fillfull the last covariates by the values from snp
-	for(int i=i_start ; i<i_stop ; i++) {design_matrix[i]=snp[i-i_start];}
+	for(int i=nids*(p-1) ; i<nids*p ; i++) 
+		{
+		//std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<", snp["<<i-nids*(p-1)<<"]="<<snp[i-nids*(p-1)]<<"\n";
+	
+//		std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<", snp["<<i-nids*(p-1)<<"]="<<snp[i-nids*(p-1)]<<"\n";
+		design_matrix_copy[i]=snp[i-nids*(p-1)];
+		}
+	
+//	for(int i=0 ; i<nids*p ; i++) 
+//		{
+//		std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<"\n";
+//		}
+//		std::cout<<"nids="<<nids<<", p="<<p;
+		
 
+//	for(int i=0 ; i<nids ; i++)
+//		{
+//		std::cout<<"is_trait_na["<<i<<"]="<<is_trait_na[i]<<"\n";
+//		}
+
+
+//	std::cout<<"\np="<<p<<"\n";
+
+
+
+	//drop out the NA values
+	//___________________________________________
+//	long unsigned int nids_nona=0;
+//	for(int i=0 ; i<nids ; i++)
+//		{
+//		nids_nona += is_trait_na[i];
+//		}
+//	nids_nona = nids - nids_nona;	
+
+	long unsigned int nids_nona=0;
+	for(int column=0 ; column<p ; column++)
+		{
+		for(int id=0; id<nids ; id++)
+			{
+//			std::cout<<"design_matrix_copy["<<id + column*nids<<"]="<<design_matrix_copy[id + column*nids]<<"\n";
+			if(is_trait_na[id] == 0) 
+				{
+				design_matrix[nids_nona] = design_matrix_copy[id + column*nids];
+	//			std::cout<<"design_matrix["<<nids_nona<<"]="<<design_matrix[nids_nona]<<"\n";
+				
+				nids_nona++;	
+				}
+			}	
+		}
+
+	nids_nona=0;
+	for(int id=0; id<nids ; id++)
+		{
+		if(is_trait_na[id] == 0)
+			{
+			trait[nids_nona] = trait[id];
+			nids_nona++;
+			}
+		}
+
 	
-	//Zerozing of genotypic means
+
+//	std::cout<<"nids_nona="<<nids_nona<<"\n";
 	//___________________________________________
+
 	
-	int p_geno_mean = 2;
-	for(int i=nids; i<2*nids ; i++) {design_matrix_geno_means[i]=snp[i-nids];}
+	//Get rid of major SNP effect
+	//___________________________________________
 	
+	int p_geno_mean = 2;
+	for(int i=0; i<nids_nona ; i++) design_matrix_copy[i] = design_matrix[i];
+	for(int i=0; i<nids_nona ; i++) design_matrix_copy[i+nids_nona] = design_matrix[i +(p-1)*nids_nona];
 
+//	for(int i=0; i<2*nids_nona ; i++)
+//		{
+//		std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<"\n";
+//		}
+
+//	for(int i=0; i<nids_nona ; i++)
+//		{
+//		std::cout<<"trait["<<i<<"]="<<trait[i]<<"\n";
+//		}
+
+
  
  linear_regression(
-				        /*input variables:*/     trait, design_matrix_geno_means, &p_geno_mean, nids_,
+				        /*input variables:*/     trait, design_matrix_copy, &p_geno_mean, &nids_nona,
 								/*return variables:*/    betas, se, residuals,
 								/*auxiliary variables:*/ qty, jpvt, qraux, work, v, x_for_ch2inv);
 
 	//___________________________________________
 
+
 	
+
+
+//	std::cout<<"main effect: beta0="<<betas[0]<<", se0="<<se[0]<<"\n";
+//	std::cout<<"main effect: beta1="<<betas[1]<<", se1="<<se[1]<<"\n";
+
+
 	//___________________________________________
 
 	for(int i=0; i<nids ; i++) {residuals[i] = residuals[i]*residuals[i];}
 	linear_regression(
-				        /*input variables:*/     residuals, design_matrix, p_, nids_,
+				        /*input variables:*/     residuals, design_matrix, p_, &nids_nona,
 								/*return variables:*/    betas, se, residuals,
 								/*auxiliary variables:*/ qty, jpvt, qraux, work, v, x_for_ch2inv);
 
 	//___________________________________________
+//	std::cout<<"dispersion effect: beta="<<betas[p-1]<<", se["<<p-1<<"]="<<se[p-1]<<"\n";
 
 
 //	for(int i=0 ; i<p ; i++) {std::cout<<"betas["<<i<<"]="<<betas[i]<<", se["<<i<<"]="<<se[i]<<"\n";}
@@ -134,98 +215,9 @@
 	*df = 1;
 	
 	
-//	//dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work);
-//	int n=*nids;
-//	int p = *p_;
-////	double *x = new double[n*p];
-////	for(int i=0 ; i<n ; i++) {x[i]=1.;}
-//	int i_start = n*(p-1);
-//	int i_stop  = n*p;
-//
-//
-////	int p = cov_num+1; //cov number + 1
-//	int ny = 1; //numebr of dependable variable
-//	double tol=1e-07;
-//	double *b  = new double[p];
-//	double *rsd = new double[n];
-//	double *qty = new double[n];
-//	int k;
-//	int *jpvt = new int[p];
-//	double *qraux = new double[p];
-//	double *work = new double[2*p];
-//	dqrls_(design_matrixt, &n, &p, trait, &ny, &tol, b, rsd, qty, &k, jpvt, qraux, work);
-//
-//	for(int i=0 ; i<p ; i++)
-//		{
-//		std::cout<<"b["<<i<<"]="<<b[i]<<"\n";
-//		}
-//
-//
-//	double *v = new double[2*p];
-//	double *x_for_ch2inv = new double[2*p];
-//
-//	for(int col=0 ; col<p ; col++)
-//		{
-//		for(int row=0 ; row<p ; row++)
-//			{
-//			x_for_ch2inv[row + p*col] = design_matrixt[row+n*col];
-//			}
-//		
-//		}
-//
-////	x_for_ch2inv[0] = -59.0339;
-////	x_for_ch2inv[1] = 0.0169394;
-////	x_for_ch2inv[2] = -2.06661;
-////	x_for_ch2inv[3] = 11.2129;
-//	int info;
-//	ch2inv_(x_for_ch2inv, &p, &p, v, &info);
-//
-//	
-//	for(int col=0 ; col<p ; col++)
-//		{
-//		for(int row=0 ; row<p ; row++)
-//			{
-//			std::cout<<"x_for_ch2inv["<<row + p*col<<"]="<<x_for_ch2inv[row + p*col]<<"\n";
-//			}
-//		}
-//	
-//	for(int col=0 ; col<p ; col++)
-//		{
-//		for(int row=0 ; row<p ; row++)
-//			{
-//			std::cout<<"v["<<row + p*col<<"]="<<v[row + p*col]<<"\n";
-//			}
-//		}
-//	
-//
-//
-////calculate rss (residuals square sum)
-//	double rss=0;
-//	for(int i=0 ; i<n ; i++)
-//		{
-//		rss += rsd[i]*rsd[i];
-//		}
-//	
-//	static double rdf;
-//	rdf = n - p;
-//	static double resvar;
-// 	resvar = rss/rdf;
-//
-//
-//	double *se = new double[p];
-//	for(int i=0 ; i<p ; i++)
-//		{
-//		se[i] = sqrt(v[i*n+i] * resvar); //diagonal ellements of v multiply by resvar
-//		std::cout<<"se["<<i<<"]="<<se[i]<<"\n";
-//		}
-//
-// 	delete[] b;
-//  delete[] rsd;
-//  delete[] qty;
-//  delete[] jpvt;
-//  delete[] work;
 	}
 
+//std::cout<<"variance_homogeneity_test_C: END\n";
 return;
 } // end of function bartlett_test
 //_________________________________________________________________________________________________



More information about the Genabel-commits mailing list