[Genabel-commits] r1610 - in pkg/VariABEL: R man src/ITERlib src/VARlib
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Feb 14 08:58:44 CET 2014
Author: maksim
Date: 2014-02-14 08:58:43 +0100 (Fri, 14 Feb 2014)
New Revision: 1610
Modified:
pkg/VariABEL/R/var_test_gwaa.R
pkg/VariABEL/man/var_test_gwaa.Rd
pkg/VariABEL/src/ITERlib/iterator.cpp
pkg/VariABEL/src/ITERlib/iterator_functions.cpp
pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
Log:
The 2df test is implemented. Now, if analysis_type=2df, then the regression model includes P_AB and P_BB (probabilities of having genotypes AB and BB) and presence of association is tested jointly for P_AB and P_BB. We still need to test it on the possible bugs.
Modified: pkg/VariABEL/R/var_test_gwaa.R
===================================================================
--- pkg/VariABEL/R/var_test_gwaa.R 2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/R/var_test_gwaa.R 2014-02-14 07:58:43 UTC (rev 1610)
@@ -28,12 +28,13 @@
{
-if(testname == "bartlett") {testname <- 0}
-else if(testname == "levene") {testname <- 1}
-else if(testname == "likelihood") {testname <- 2}
-else if(testname == "kolmogorov_smirnov") {testname <- 3}
-else if(testname == "svlm") {testname <- 4}
-else {stop(paste(testname, "is unsupportive type of test. Only levene and svlm are supported."))}
+#if(testname == "bartlett") {testname <- 0}
+#else if(testname == "levene") {testname <- 1}
+#else if(testname == "likelihood") {testname <- 2}
+#else if(testname == "kolmogorov_smirnov") {testname <- 3}
+if(testname == "svlm") {testname <- 4}
+#else {stop(paste(testname, "is unsupportive type of test. Only levene and svlm are supported."))}
+else {stop(paste(testname, "is unsupportive type of test. Only svlm is supported."))}
if(analysis_type == "AAvsABvsBB") {analysis_type <- 0}
@@ -56,7 +57,6 @@
if(is(formula, "formula"))
{
- print("Formula=formula")
design_matrix <- model.matrix(formula, data = phenodata)
# design_matrix <- design_matrix_
trat_name <- as.character(formula[[2]])
@@ -96,16 +96,13 @@
# else if(is(formula, "numeric") || is(formula, "integer") || is(formula, "double"))
else if(is.character(formula))
{
- print("Formula=char")
trait <- phenodata[,formula]
idnum <- length(trait)
- print(c("analysis_type=", analysis_type))
if(analysis_type == 4)
{
design_matrix_df <- data.frame(X.Intercept=rep(1,idnum), snp_AB=rep(0,idnum), snp_BB=rep(0,idnum))
- print("design_matrix_df 2df")
}
else
{
@@ -127,6 +124,7 @@
if(class(genodata) == "snp.data")
{
+ if(analysis_type == 4) {stop("2df test is available only for imputed SNPs. You should use databel file format.")}
print("reading genotype info...")
genodata_info_df <- data.frame(name=genodata at snpnames,
coding=as.character(genodata at coding),
@@ -242,7 +240,7 @@
#print(cov_names)
-output_column_names <- c(colnames(genodata_info_df), "chisq", "df", "Intercept_effect", "Intercept_se")
+output_column_names <- c(colnames(genodata_info_df), "Intercept_effect", "Intercept_se")
#print(output_column_names)
@@ -269,17 +267,34 @@
output_column_names <- c(output_column_names, "snp_se_dispertion")
}
+if(testname == 4)
+ {
+ statistics_name <- "F"
+ df_name <- "df1_df2"
+ output_column_names <- c(output_column_names, df_name, statistics_name)
+ colnames(results_df) <- output_column_names
+
+ df1_ <- 1
+ if(analysis_type == 4) {df1_ <- 2}
+ results_df$P_value <- pf(results_df$F, df1=df1_, df2=results_df[,df_name], lower.tail=F)
+ results_df[,df_name] <- paste(df1_, results_df[,df_name],sep="_")
+ }
+else
+ {
+ statistics_name <- "chisq"
+ df_name <- "df"
+ output_column_names <- c(output_column_names, df_name, statistics_name)
+ colnames(results_df) <- output_column_names
+ results_df$P_value <- pchisq(results_df$chisq, df=results_df$df, lower.tail=F)
+ }
-colnames(results_df) <- output_column_names
+#if(testname != 4)
+# {
+# results_df <- results_df[,1:7]
+# }
-if(testname != 4)
- {
- results_df <- results_df[,1:7]
- }
+
+
return(results_df)
}
-
-
-
-
Modified: pkg/VariABEL/man/var_test_gwaa.Rd
===================================================================
--- pkg/VariABEL/man/var_test_gwaa.Rd 2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/man/var_test_gwaa.Rd 2014-02-14 07:58:43 UTC (rev 1610)
@@ -28,7 +28,7 @@
each henotypic group is testes against other two,
\code{AAvsABandBB} - group AA tested against AB and BB,
\code{ABvsAAandBB} - AB against AA and BB, \code{BBvsAAandAB} - BB
- against AA and AB. Only available for typed SNPs. Note that the input parameter analysis_type is ignored if testname="svlm"}
+ against AA and AB, \code{2df} - two degrees of freedom test for testing interaction without assuming any genetic model. In this case, probabilities P_ABB and P_BB of having genotype AB and BB respectively are tested on a presence of interaction.}
}
\details{
@@ -43,13 +43,13 @@
}
\value{
-The output is a \code{data.frame} object. The table contains the chi^2 of
-the variance heterogeneity test (the name is \code{chisq}) the effects
+The output is a \code{data.frame} object. The table contains the F-statistic (and its P_value) for 'svlm' test and chi^2 for other tests of
+the variance heterogeneity test (the name is \code{F} \code{chisq}) the effects
and standard errors of all covariates included into the regression
model, main SNP effect (the names are \code{snp_eff} and
\code{snp_se}). In the case of the svlm test the columns
\code{snp_eff_dispertion} and \code{snp_se_dispertion} contain effect of
-a SNP on the squared values of the trait.
+a SNP on the squared values of the trait. The column \code{df1_df2} contains numerator and denominator degrees of freedom for F-statistic.
}
Modified: pkg/VariABEL/src/ITERlib/iterator.cpp
===================================================================
--- pkg/VariABEL/src/ITERlib/iterator.cpp 2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/ITERlib/iterator.cpp 2014-02-14 07:58:43 UTC (rev 1610)
@@ -251,7 +251,6 @@
// Get the dimensions of the output the function of our choosing will be giving
pMethod(0, nrow, step, 0, ncol_multi, nrow_new, narg, argList);
- Rprintf("ncol_multi,nrow_new=%d,%d\n",ncol_multi,nrow_new);
// Allocate vector
// Start output SEXP for passing to R
// Even when the output is put into a filevector, we still return an (empty) SEXP
@@ -337,7 +336,6 @@
**/
// Apply function of choosing
- Rprintf("Run analysis\n");
pMethod(internal_data, nrow, step, out_data, ncol_multi, nrow_new, narg,
argList);
@@ -352,7 +350,6 @@
// = out_data[k];
//REAL(out)[(i_cs * ncol_multi + j) * nrow_new + k]
// = out_data[j*nrow_new+k];
- Rprintf("Write ouput\n");
REAL(out)[j*ncol_cs+i_cs+k]
= out_data[j*nrow_new+k];
}
Modified: pkg/VariABEL/src/ITERlib/iterator_functions.cpp
===================================================================
--- pkg/VariABEL/src/ITERlib/iterator_functions.cpp 2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/ITERlib/iterator_functions.cpp 2014-02-14 07:58:43 UTC (rev 1610)
@@ -482,8 +482,17 @@
{
is_trait_na[id_counter]=1;
}
-
+ if(*analys_type == 4)
+ {
+ if(ISNAN(indata[id_counter+indataHeight]))
+ {
+ is_trait_na[id_counter]=1;
+ }
+ }
+
+
+
}
//_________________________________________________________
@@ -500,10 +509,8 @@
// delete[] design_matrix_;
- outdata[0] = *chi2;
- outdata[1] = double(*df);
- int counter=2;
+ int counter=0;
for(int i=0 ; i<((*p)+step) ; i++)
{
outdata[counter] = betas[i];
@@ -511,6 +518,9 @@
outdata[counter] = se[i];
counter++;
}
+
+ outdata[counter] = double(*df);
+ outdata[counter+1] = *chi2;
// for(int i=0 ; i<2 + (*INTEGER(argList[3]))*2 ; i++)
// {
Modified: pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp
===================================================================
--- pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp 2014-02-13 20:42:52 UTC (rev 1609)
+++ pkg/VariABEL/src/VARlib/var_homogeneity_test_C.cpp 2014-02-14 07:58:43 UTC (rev 1610)
@@ -106,20 +106,16 @@
start_snp = nids*(p-2);
}
else
- {
- start_snp = nids*(p-1);
- }
+ {
+ start_snp = nids*(p-1);
+ }
-// Rprintf("start_snp=%d, stop_snp=%d", start_snp, stop_snp);
for(int i=start_snp ; i<stop_snp; i++)
{
- Rprintf("snp[%d]=%f, ", i-start_snp, snp[i-start_snp]);
design_matrix_copy[i]=snp[i-start_snp];
}
-
-
long unsigned int nids_nona=0;
for(int column=0 ; column<p ; column++)
{
@@ -204,9 +200,6 @@
my_small_vector squared_residuals_adusted_on_snp(residuals, nids_nona, false);
- Rprintf("\n\n*analys_type_=%d\n", *analys_type_);
- Rprintf("betas_disp[%d]=%f\n", 0, betas_disp[0]);
- Rprintf("se_disp[%d]=%f\n", 0, se_disp[0]);
@@ -219,18 +212,15 @@
double SSRr = my_var(&squared_residuals)*nids_nona;
double SSRu = my_var(&squared_residuals_adusted_on_snp)*nids_nona;
int q = step;
-
- Rprintf("SSRr=%f, SSRu=%f", SSRr, SSRu);
+ int k = step;
static double Fstat;
- Fstat = ((SSRr-SSRu)/q)/(SSRu/(nids_nona-1));
+ Fstat = ((SSRr-SSRu)/q)/(SSRu/(nids_nona - k - 1));
// if(*analys_type_ == 4)
// {
for(unsigned i=0 ; i<step ; i++)
{
- Rprintf("betas_disp[%d]=%f\n", i+1, betas_disp[i+1]);
- Rprintf("se_disp[%d]=%f\n", i+1, se_disp[i+1]);
betas[p+i] = betas_disp[i+1];
se[p+i] = se_disp[i+1];
}
@@ -245,9 +235,8 @@
// }
chi2[0] = Fstat;
- df[0] = nids_nona;
+ df[0] = nids_nona - k -1;
- Rprintf("END!!!!!!!!!!!");
}
return;
More information about the Genabel-commits
mailing list