[Genabel-commits] r653 - in pkg/VariABEL: R src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Feb 16 01:23:06 CET 2011
Author: maksim
Date: 2011-02-16 01:23:06 +0100 (Wed, 16 Feb 2011)
New Revision: 653
Modified:
pkg/VariABEL/R/var.test.gwaa.R
pkg/VariABEL/src/var_homogeneity_test_C.cpp
Log:
bug fixed: original R regression function changes input parameters. to protect from it the data is copied inside of C function
Modified: pkg/VariABEL/R/var.test.gwaa.R
===================================================================
--- pkg/VariABEL/R/var.test.gwaa.R 2011-02-11 01:56:53 UTC (rev 652)
+++ pkg/VariABEL/R/var.test.gwaa.R 2011-02-16 00:23:06 UTC (rev 653)
@@ -24,9 +24,10 @@
#_____________________________________________________________________________________________________________________________
#Available for user function for genome-wide testing variance homogeneity of trait's distribution.
#
-"var.test.gwaa" <- function(formula, genodata, phenodata, genodata_info="", testname="sqlm", analysis_type="AAvsABvsBB")
+"var.test.gwaa" <- function(formula, genodata, phenodata, genodata_info=NULL, testname="sqlm", analysis_type="AAvsABvsBB")
{
+
if(testname == "bartlett") {testname <- 0}
else if(testname == "levene") {testname <- 1}
else if(testname == "likelihood") {testname <- 2}
@@ -41,36 +42,52 @@
else if(analysis_type == "BBvsAAandAB") {analysis_type <- 3}
else {stop(paste(analysis_type, "is unsupportive type of analysis. Only AAvsABvsBB, AAvsABandBB, ABvsAAandBB, and BBvsAAandAB are supported."))}
-
MAR <- 2
OUT <- "R"
FUN <- "variance_homogeneity_test_C_wrapper"
-
-
-
-attach(phenodata, pos = 2, warn.conflicts = FALSE)
-
idnum <- 0
if(is(formula, "formula"))
{
design_matrix <- model.matrix(formula, data = phenodata)
+# design_matrix <- design_matrix_
trat_name <- as.character(formula[[2]])
trait <- phenodata[,trat_name]
- idnum <- length(trait)
+# idnum <- length(trait)
design_matrix_df <- data.frame(design_matrix)
+
+#Put NAs back to the matrix
+ if(dim(design_matrix_df)[1] != dim(phenodata)[1])
+ {
+ design_matrix_df_with_nas <- matrix(rep(NA, dim(phenodata)[1]*dim(design_matrix_df)[2]), ncol=dim(design_matrix_df)[2], nrow=dim(phenodata)[1])
+ design_matrix_df_with_nas <- data.frame(design_matrix_df_with_nas)
+ formula_vars <- all.vars(formula)
+ na_bool <- rep(T, dim(phenodata)[1])
+ for(term in formula_vars)
+ {
+ na_bool <- na_bool & !is.na(phenodata[,term])
+ }
+ colnames(design_matrix_df_with_nas) <- colnames(design_matrix_df)
+ design_matrix_df_with_nas[na_bool,] <- design_matrix_df
+ design_matrix_df <- design_matrix_df_with_nas
+ }
+
design_matrix_df$snp <- 0 #will be filled into iterator
+ idnum <- dim(design_matrix_df)[1]
+
+
}
- else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
+# else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
+ else if(is.character(formula))
{
- trait <- formula
+ trait <- phenodata[,formula]
idnum <- length(trait)
design_matrix_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
}
-design_matrix_geno_means_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
+#design_matrix_geno_means_df <- data.frame(X.Intercept=rep(1,idnum), snp=rep(0,idnum))
p <- dim(design_matrix_df)[2] #covariates number + 1
@@ -81,7 +98,6 @@
genodata_info_df <- data.frame()
-#Stolen from MixABEL:
if(class(genodata) == "snp.data")
{
print("reading genotype info...")
@@ -103,21 +119,28 @@
gtNcol <- dim(genodata)[2]
genodata <- genodata at data
- if(genodata_info != "" & file.exists(genodata_info))
- {
- print("reading genotype's info...")
- genodata_info_df <- read.table(genodata_info, strings=F, header=T)
-# print(c(dim(genodata_info_df)[1], gtNcol))
- if(dim(genodata_info_df)[1] != gtNcol)
+ if(!is.null(genodata_info))
+ {
+ if(file.exists(genodata_info))
{
- print(paste("warning: File ", genodata_info, " contains information about ", dim(genodata_info_df)[1],
- " SNPs but genotype data contains ", gtNcol, ". Ignoring of the info file.", sep=""))
- }
- genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+ print("reading genotype's info...")
+ genodata_info_df <- read.table(genodata_info, strings=F, header=T)
+# print(c(dim(genodata_info_df)[1], gtNcol))
+ if(dim(genodata_info_df)[1] != gtNcol)
+ {
+ print(paste("warning: File ", genodata_info, " contains information about ", dim(genodata_info_df)[1],
+ " SNPs but genotype data contains ", gtNcol, ". Ignoring of the info file.", sep=""))
+ genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+ }
+ }
+ else
+ {
+ print(paste("warning: File ", genodata_info, "does not exist. Trying to run analysis withouth SNP's information..."))
+ genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
+ }
}
else
{
- print(paste("warning: File ", genodata_info, "does not exist. Trying to run analysis withouth SNP's information..."))
genodata_info_df <- data.frame(name=sub("^", "snp", as.character(1:gtNcol)))
}
@@ -137,6 +160,12 @@
}
}
+
+#print(design_matrix_df)
+#print(dim(design_matrix_df))
+
+
+print(c(gtNrow, gtNcol))
print("Start variance analysis...")
results_C <- .Call("iterator", genodata,
as.integer(gtNrow), as.integer(gtNcol),
@@ -147,7 +176,7 @@
as.integer(18), #iterator additional inputa parameters number
as.double(trait),
as.double(data.matrix(design_matrix_df)),
- as.double(data.matrix(design_matrix_geno_means_df)),
+ as.double(data.matrix(design_matrix_df)),
as.integer(p),
as.integer(analysis_type),
as.integer(testname),
@@ -190,7 +219,8 @@
colnames(results_df) <- output_column_names
-results_df
+
+return(results_df)
}
Modified: pkg/VariABEL/src/var_homogeneity_test_C.cpp
===================================================================
--- pkg/VariABEL/src/var_homogeneity_test_C.cpp 2011-02-11 01:56:53 UTC (rev 652)
+++ pkg/VariABEL/src/var_homogeneity_test_C.cpp 2011-02-16 00:23:06 UTC (rev 653)
@@ -66,8 +66,10 @@
//_________________________________________________________________________________________________
-void variance_homogeneity_test_C(double* snp, double *trait, double *design_matrix_geno_means, double *design_matrix, int *p_, long unsigned* nids_, double *betas, double *se, double * chi2, int * df, double * residuals, int *analys_type_, int * is_trait_na, int *testname, /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
+void variance_homogeneity_test_C(double* snp, double *trait, double *design_matrix_copy, double *design_matrix, int *p_, long unsigned* nids_, double *betas, double *se, double * chi2, int * df, double * residuals, int *analys_type_, int * is_trait_na, int *testname, /*auxiliary variables:*/ double *qty, int *jpvt, double *qraux, double *work, double *v, double *x_for_ch2inv)
{
+//std::cout<<"variance_homogeneity_test_C: START\n";
+
std::list<my_small_vector> trait_groups;
static chisq_df chisq;
@@ -91,40 +93,119 @@
long int nids = *nids_;
- int i_start;
- i_start = nids*(p-1);
- int i_stop;
- i_stop = nids*p;
+
//fillfull the last covariates by the values from snp
- for(int i=i_start ; i<i_stop ; i++) {design_matrix[i]=snp[i-i_start];}
+ for(int i=nids*(p-1) ; i<nids*p ; i++)
+ {
+ //std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<", snp["<<i-nids*(p-1)<<"]="<<snp[i-nids*(p-1)]<<"\n";
+
+// std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<", snp["<<i-nids*(p-1)<<"]="<<snp[i-nids*(p-1)]<<"\n";
+ design_matrix_copy[i]=snp[i-nids*(p-1)];
+ }
+
+// for(int i=0 ; i<nids*p ; i++)
+// {
+// std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<"\n";
+// }
+// std::cout<<"nids="<<nids<<", p="<<p;
+
+// for(int i=0 ; i<nids ; i++)
+// {
+// std::cout<<"is_trait_na["<<i<<"]="<<is_trait_na[i]<<"\n";
+// }
+
+
+// std::cout<<"\np="<<p<<"\n";
+
+
+
+ //drop out the NA values
+ //___________________________________________
+// long unsigned int nids_nona=0;
+// for(int i=0 ; i<nids ; i++)
+// {
+// nids_nona += is_trait_na[i];
+// }
+// nids_nona = nids - nids_nona;
+
+ long unsigned int nids_nona=0;
+ for(int column=0 ; column<p ; column++)
+ {
+ for(int id=0; id<nids ; id++)
+ {
+// std::cout<<"design_matrix_copy["<<id + column*nids<<"]="<<design_matrix_copy[id + column*nids]<<"\n";
+ if(is_trait_na[id] == 0)
+ {
+ design_matrix[nids_nona] = design_matrix_copy[id + column*nids];
+ // std::cout<<"design_matrix["<<nids_nona<<"]="<<design_matrix[nids_nona]<<"\n";
+
+ nids_nona++;
+ }
+ }
+ }
+
+ nids_nona=0;
+ for(int id=0; id<nids ; id++)
+ {
+ if(is_trait_na[id] == 0)
+ {
+ trait[nids_nona] = trait[id];
+ nids_nona++;
+ }
+ }
+
- //Zerozing of genotypic means
+
+// std::cout<<"nids_nona="<<nids_nona<<"\n";
//___________________________________________
+
- int p_geno_mean = 2;
- for(int i=nids; i<2*nids ; i++) {design_matrix_geno_means[i]=snp[i-nids];}
+ //Get rid of major SNP effect
+ //___________________________________________
+ int p_geno_mean = 2;
+ for(int i=0; i<nids_nona ; i++) design_matrix_copy[i] = design_matrix[i];
+ for(int i=0; i<nids_nona ; i++) design_matrix_copy[i+nids_nona] = design_matrix[i +(p-1)*nids_nona];
+// for(int i=0; i<2*nids_nona ; i++)
+// {
+// std::cout<<"design_matrix_copy["<<i<<"]="<<design_matrix_copy[i]<<"\n";
+// }
+
+// for(int i=0; i<nids_nona ; i++)
+// {
+// std::cout<<"trait["<<i<<"]="<<trait[i]<<"\n";
+// }
+
+
linear_regression(
- /*input variables:*/ trait, design_matrix_geno_means, &p_geno_mean, nids_,
+ /*input variables:*/ trait, design_matrix_copy, &p_geno_mean, &nids_nona,
/*return variables:*/ betas, se, residuals,
/*auxiliary variables:*/ qty, jpvt, qraux, work, v, x_for_ch2inv);
//___________________________________________
+
+
+
+// std::cout<<"main effect: beta0="<<betas[0]<<", se0="<<se[0]<<"\n";
+// std::cout<<"main effect: beta1="<<betas[1]<<", se1="<<se[1]<<"\n";
+
+
//___________________________________________
for(int i=0; i<nids ; i++) {residuals[i] = residuals[i]*residuals[i];}
linear_regression(
- /*input variables:*/ residuals, design_matrix, p_, nids_,
+ /*input variables:*/ residuals, design_matrix, p_, &nids_nona,
/*return variables:*/ betas, se, residuals,
/*auxiliary variables:*/ qty, jpvt, qraux, work, v, x_for_ch2inv);
//___________________________________________
+// std::cout<<"dispersion effect: beta="<<betas[p-1]<<", se["<<p-1<<"]="<<se[p-1]<<"\n";
// for(int i=0 ; i<p ; i++) {std::cout<<"betas["<<i<<"]="<<betas[i]<<", se["<<i<<"]="<<se[i]<<"\n";}
@@ -134,98 +215,9 @@
*df = 1;
-// //dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work);
-// int n=*nids;
-// int p = *p_;
-//// double *x = new double[n*p];
-//// for(int i=0 ; i<n ; i++) {x[i]=1.;}
-// int i_start = n*(p-1);
-// int i_stop = n*p;
-//
-//
-//// int p = cov_num+1; //cov number + 1
-// int ny = 1; //numebr of dependable variable
-// double tol=1e-07;
-// double *b = new double[p];
-// double *rsd = new double[n];
-// double *qty = new double[n];
-// int k;
-// int *jpvt = new int[p];
-// double *qraux = new double[p];
-// double *work = new double[2*p];
-// dqrls_(design_matrixt, &n, &p, trait, &ny, &tol, b, rsd, qty, &k, jpvt, qraux, work);
-//
-// for(int i=0 ; i<p ; i++)
-// {
-// std::cout<<"b["<<i<<"]="<<b[i]<<"\n";
-// }
-//
-//
-// double *v = new double[2*p];
-// double *x_for_ch2inv = new double[2*p];
-//
-// for(int col=0 ; col<p ; col++)
-// {
-// for(int row=0 ; row<p ; row++)
-// {
-// x_for_ch2inv[row + p*col] = design_matrixt[row+n*col];
-// }
-//
-// }
-//
-//// x_for_ch2inv[0] = -59.0339;
-//// x_for_ch2inv[1] = 0.0169394;
-//// x_for_ch2inv[2] = -2.06661;
-//// x_for_ch2inv[3] = 11.2129;
-// int info;
-// ch2inv_(x_for_ch2inv, &p, &p, v, &info);
-//
-//
-// for(int col=0 ; col<p ; col++)
-// {
-// for(int row=0 ; row<p ; row++)
-// {
-// std::cout<<"x_for_ch2inv["<<row + p*col<<"]="<<x_for_ch2inv[row + p*col]<<"\n";
-// }
-// }
-//
-// for(int col=0 ; col<p ; col++)
-// {
-// for(int row=0 ; row<p ; row++)
-// {
-// std::cout<<"v["<<row + p*col<<"]="<<v[row + p*col]<<"\n";
-// }
-// }
-//
-//
-//
-////calculate rss (residuals square sum)
-// double rss=0;
-// for(int i=0 ; i<n ; i++)
-// {
-// rss += rsd[i]*rsd[i];
-// }
-//
-// static double rdf;
-// rdf = n - p;
-// static double resvar;
-// resvar = rss/rdf;
-//
-//
-// double *se = new double[p];
-// for(int i=0 ; i<p ; i++)
-// {
-// se[i] = sqrt(v[i*n+i] * resvar); //diagonal ellements of v multiply by resvar
-// std::cout<<"se["<<i<<"]="<<se[i]<<"\n";
-// }
-//
-// delete[] b;
-// delete[] rsd;
-// delete[] qty;
-// delete[] jpvt;
-// delete[] work;
}
+//std::cout<<"variance_homogeneity_test_C: END\n";
return;
} // end of function bartlett_test
//_________________________________________________________________________________________________
More information about the Genabel-commits
mailing list