[Genabel-commits] r996 - in pkg/ProbABEL: . src tests tests/verified_results

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Nov 1 22:38:52 CET 2012


Author: lckarssen
Date: 2012-11-01 22:38:51 +0100 (Thu, 01 Nov 2012)
New Revision: 996

Added:
   pkg/ProbABEL/tests/verified_results/
   pkg/ProbABEL/tests/verified_results/height_2df.out.txt
Removed:
   pkg/ProbABEL/tests/known_good_results/
Modified:
   pkg/ProbABEL/configure.ac
   pkg/ProbABEL/src/probabel.pl
   pkg/ProbABEL/src/probabel_config.cfg.example
   pkg/ProbABEL/tests/Makefile.am
   pkg/ProbABEL/tests/check_probabel.pl_chunk.sh
Log:
- Added a check for probabel.pl to also test chunks in combination with .prob files. 
- Renamed the known_good_results directory to verified_results



Modified: pkg/ProbABEL/configure.ac
===================================================================
--- pkg/ProbABEL/configure.ac	2012-11-01 20:46:32 UTC (rev 995)
+++ pkg/ProbABEL/configure.ac	2012-11-01 21:38:51 UTC (rev 996)
@@ -52,11 +52,8 @@
 # Check for presence of sed. Needed to replace path to /etc in probabel.pl
 AC_PROG_SED
 
-# Check for presence of pdfLaTeX
-AC_CHECK_PROG(CUT, cut, cut)
-if test -z "$CUT"; then
-  AC_MSG_WARN([Can't find the 'cut' binary. Some tests won't run correctly])
-fi
+# Check for the presence of awk. Needed in the test suite
+AC_PROG_AWK
 
 # Check for presence of pdfLaTeX
 AC_CHECK_PROG(PDFLATEX, pdflatex, pdflatex)

Modified: pkg/ProbABEL/src/probabel.pl
===================================================================
--- pkg/ProbABEL/src/probabel.pl	2012-11-01 20:46:32 UTC (rev 995)
+++ pkg/ProbABEL/src/probabel.pl	2012-11-01 21:38:51 UTC (rev 996)
@@ -38,7 +38,7 @@
 my %cohorts;
 my @mlinfos;
 my @mldoses;
-my @mlprobes;
+my @mlprobs;
 my @legends;
 
 
@@ -58,7 +58,7 @@
     $cohorts{$line_array[0]} = $i;
     $mlinfos[$i]  = $line_array[1];
     $mldoses[$i]  = $line_array[2];
-    $mlprobes[$i] = $line_array[3];
+    $mlprobs[$i] = $line_array[3];
     $legends[$i]  = $line_array[4];
 }
 close(CFG);
@@ -143,7 +143,7 @@
 
 my $mlinfo = $mlinfos[$cohort_position];
 my $mldose = $mldoses[$cohort_position];
-my $mlprobe = $mlprobes[$cohort_position];
+my $mlprob = $mlprobs[$cohort_position];
 my $legend = $legends[$cohort_position];
 
 
@@ -167,12 +167,12 @@
 
 
 my $model_option_num=0;
-
+my $mldose_prob;
 if($model eq "--additive") {
-    my $mldose_probe = $mldose;
+    $mldose_prob = $mldose;
     $model_option_num=1;
 } elsif($model eq "--allmodels") {
-    my $mldose_probe = $mlprobe;
+    $mldose_prob = $mlprob;
     $model_option_num=2;
 } else {
     die "error: Wrong key for model. You can use \"--additive\" or \"--allmodels\" only\n";
@@ -195,7 +195,7 @@
     $mlinfo_arg = $mlinfo;
     $mlinfo_arg =~ s/$chr_replacement/$chr/g;
 
-    $mldose_arg = $mldose;
+    $mldose_arg = $mldose_prob;
     $mldose_arg =~ s/$chr_replacement/$chr/g;
 
     $legend_arg = $legend;
@@ -208,7 +208,7 @@
 	my $head="--no-head";
     }
 
-    print `$prog -p $phename.PHE --ngpreds $model_option_num -i $mlinfo_arg -d $mldose_arg -m $legend_arg --chrom $chr -o $phename $head $keys`;
+    system "$prog -p $phename.PHE --ngpreds $model_option_num -i $mlinfo_arg -d $mldose_arg -m $legend_arg --chrom $chr -o $phename $head $keys";
 
     exit;
 }
@@ -237,7 +237,7 @@
 	$mlinfo_arg =~ s/$chr_replacement/$chr/g;
 	$mlinfo_arg =~ s/$chunk_replacement/$chunk/g;
 
-	$mldose_arg = $mldose;
+	$mldose_arg = $mldose_prob;
 	$mldose_arg =~ s/$chr_replacement/$chr/g;
 	$mldose_arg =~ s/$chunk_replacement/$chunk/g;
 

Modified: pkg/ProbABEL/src/probabel_config.cfg.example
===================================================================
--- pkg/ProbABEL/src/probabel_config.cfg.example	2012-11-01 20:46:32 UTC (rev 995)
+++ pkg/ProbABEL/src/probabel_config.cfg.example	2012-11-01 21:38:51 UTC (rev 996)
@@ -12,6 +12,9 @@
 # - mlprob_path: path to the file with imputed genotype probabilities
 # - legend_path: path to the legend file used for the imputations
 #
+# If the dosage and probability files are in DatABEL format (i.e. filenames
+# ending in .fvi and .fvd), either of these extensions can be used.
+#
 # Use "_._chr_._" to specify a chromosome number in the configured paths. This
 # can be used multiple times per line, e.g.:
 #

Modified: pkg/ProbABEL/tests/Makefile.am
===================================================================
--- pkg/ProbABEL/tests/Makefile.am	2012-11-01 20:46:32 UTC (rev 995)
+++ pkg/ProbABEL/tests/Makefile.am	2012-11-01 21:38:51 UTC (rev 996)
@@ -2,22 +2,30 @@
 
 AUTOMAKE_OPTIONS = foreign color-tests
 
-testsfiles = check_probabel.pl_chunk.sh
-known_results=known_good_results/height_add.out.txt
+tests_files = check_probabel.pl_chunk.sh
+verified_results = verified_results/height_add.out.txt	\
+ verified_results/height_2df.out.txt
+dose_files = chr1.chunk1.dose chr1.chunk2.dose chr2.chunk1.dose	\
+ chr2.chunk2.dose chr1.dose chr2.dose
+prob_files = chr1.chunk1.prob chr1.chunk2.prob chr2.chunk1.prob	\
+ chr2.chunk2.prob chr1.prob chr2.prob
+map_files = chr1.chunk1.map chr1.chunk2.map chr2.chunk1.map	\
+ chr2.chunk2.map chr1.map chr2.map
+info_files = chr1.chunk1.info chr1.chunk2.info chr2.chunk1.info	\
+ chr2.chunk2.info chr1.info chr2.info
+output_files = height_add.out.txt height_2df.out.txt			\
+ height_domin.out.txt height_over_domin.out.txt height_recess.out.txt
+other_files = probabel.pl probabel_config.cfg height.PHE
+
+
+
 testsdir = $(pkgdatadir)/tests
-dist_tests_DATA = $(testsfiles) $(known_results)
+dist_tests_DATA = $(tests_files) $(verified_results)
 
 TESTS_ENVIRONMENT = sh
 check_SCRIPTS = check_probabel.pl_chunk.sh
 
 TESTS = $(check_SCRIPTS)
 
-chunk_files=chunk1.chr1.dose chunk2.chr1.dose chunk1.chr2.dose		\
- chunk2.chr2.dose chunk1.chr1.map chunk2.chr1.map chunk1.chr2.map	\
- chunk2.chr2.map chunk1.chr1.info chunk2.chr1.info chunk1.chr2.info	\
- chunk2.chr2.info chr1.dose chr2.dose chr1.info chr2.info chr1.map	\
- chr2.map probabel.pl probabel_config.cfg height.PHE			\
- height_add.out.txt
-
-
-CLEANFILES = $(chunk_files)
+CLEANFILES = $(dose_files) $(prob_files) $(map_files) ${info_files} \
+ ${output_files} ${other_files}

Modified: pkg/ProbABEL/tests/check_probabel.pl_chunk.sh
===================================================================
--- pkg/ProbABEL/tests/check_probabel.pl_chunk.sh	2012-11-01 20:46:32 UTC (rev 995)
+++ pkg/ProbABEL/tests/check_probabel.pl_chunk.sh	2012-11-01 21:38:51 UTC (rev 996)
@@ -10,20 +10,20 @@
 fi
 exampledir="${srcdir}/../examples/"
 padir="${srcdir}/../src/"
-results="${srcdir}/known_good_results/"
+results="${srcdir}/verified_results/"
 
 dosefile="$exampledir/test.mldose"
+probfile="$exampledir/test.mlprob"
 infofile="$exampledir/test.mlinfo"
 mapfile="$exampledir/test.map"
 phenofile="$exampledir/height.txt"
-outfile="height_add.out.txt"
 
 probabel="${padir}/probabel.pl"
 probabelcfg="${padir}/probabel_config.cfg.example"
 chunksep="_._chunk_._"
 chrsep="_._chr_._"
 
-# Prepare probabel.pl and the config file
+# ------ Prepare probabel.pl and the config file ------
 sed 's;"./";"../src/";g' $probabel > probabel.pl
 chmod a+x probabel.pl
 cp $probabelcfg probabel_config.cfg
@@ -34,19 +34,20 @@
 echo "TestCohortNoChunk,$base.info,$base.dose,$base.prob,$base.map" \
     >> probabel_config.cfg
 
-base="chunk${chunksep}.chr${chrsep}"
+base="chr${chrsep}.chunk${chunksep}"
 echo "TestCohortChunk,$base.info,$base.dose,$base.prob,$base.map" \
     >> probabel_config.cfg
 
 
 # ------------------ No chunks test -------------------
-rm -f $outfile
-
-# Split the dose and info files up into two chromosomes with some
+# Split the dose, prob and info files up into two chromosomes with some
 # chunks
-cut -d" " -f1,2,3,4 $dosefile > chr1.dose
-cut -d" " -f1,2,5-7 $dosefile > chr2.dose
+awk '{print $1,$2,$3,$4}'    $dosefile > chr1.dose
+awk '{print $1,$2,$5,$6,$7}' $dosefile > chr2.dose
 
+awk '{print $1,$2,$3,$4,$5,$6}'          $probfile > chr1.prob
+awk '{print $1,$2,$7,$8,$9,$10,$11,$12}' $probfile > chr2.prob
+
 sed -n '1,3p' $infofile >  chr1.info
 sed -n '1p'   $infofile >  chr2.info
 sed -n '4,6p' $infofile >> chr2.info
@@ -55,43 +56,63 @@
 sed -n '1p'   $mapfile >  chr2.map
 sed -n '4,6p' $mapfile >> chr2.map
 
-# Run an analysis
+# Run an analysis on dosage data
+outfile="height_add.out.txt"
+rm -f $outfile
 ./probabel.pl 1 2 linear TestCohortNoChunk --additive height
+echo "Checking output using dosages without chunks..."
+diff $outfile $results/$outfile
 
-# Final check:
-echo "Checking output without chunks:"
+
+# Run an analysis on probabilities
+outfile="height_2df.out.txt"
+rm -f $outfile
+./probabel.pl 1 2 linear TestCohortNoChunk --allmodels height
+echo "Checking output using probabilities without chunks..."
 diff $outfile $results/$outfile
 
 
+
 # ------------------ Chunks test ----------------------
-rm -f $outfile
-
 # Split the dose and info files up into two chromosomes with some
 # chunks
-cut -d" " -f1,2,3   $dosefile > chunk1.chr1.dose
-cut -d" " -f1,2,4   $dosefile > chunk2.chr1.dose
-cut -d" " -f1,2,5,6 $dosefile > chunk1.chr2.dose
-cut -d" " -f1,2,7   $dosefile > chunk2.chr2.dose
+awk '{print $1,$2,$3}'    $dosefile > chr1.chunk1.dose
+awk '{print $1,$2,$4}'    $dosefile > chr1.chunk2.dose
+awk '{print $1,$2,$5,$6}' $dosefile > chr2.chunk1.dose
+awk '{print $1,$2,$7}'    $dosefile > chr2.chunk2.dose
 
-sed -n '1,2p' $infofile >  chunk1.chr1.info
-sed -n '1p'   $infofile >  chunk2.chr1.info
-sed -n '3p'   $infofile >> chunk2.chr1.info
-sed -n '1p'   $infofile >  chunk1.chr2.info
-sed -n '4,5p' $infofile >> chunk1.chr2.info
-sed -n '1p'   $infofile >  chunk2.chr2.info
-sed -n '6p'   $infofile >> chunk2.chr2.info
+awk '{print $1,$2,$3,$4}'        $probfile > chr1.chunk1.prob
+awk '{print $1,$2,$5,$6}'        $probfile > chr1.chunk2.prob
+awk '{print $1,$2,$7,$8,$9,$10}' $probfile > chr2.chunk1.prob
+awk '{print $1,$2,$11,$12}'      $probfile > chr2.chunk2.prob
 
-sed -n '1,2p' $mapfile >  chunk1.chr1.map
-sed -n '1p'   $mapfile >  chunk2.chr1.map
-sed -n '3p'   $mapfile >> chunk2.chr1.map
-sed -n '1p'   $mapfile >  chunk1.chr2.map
-sed -n '4,5p' $mapfile >> chunk1.chr2.map
-sed -n '1p'   $mapfile >  chunk2.chr2.map
-sed -n '6p'   $mapfile >> chunk2.chr2.map
+sed -n '1,2p' $infofile >  chr1.chunk1.info
+sed -n '1p'   $infofile >  chr1.chunk2.info
+sed -n '3p'   $infofile >> chr1.chunk2.info
+sed -n '1p'   $infofile >  chr2.chunk1.info
+sed -n '4,5p' $infofile >> chr2.chunk1.info
+sed -n '1p'   $infofile >  chr2.chunk2.info
+sed -n '6p'   $infofile >> chr2.chunk2.info
 
-# Run an analysis
+sed -n '1,2p' $mapfile >  chr1.chunk1.map
+sed -n '1p'   $mapfile >  chr1.chunk2.map
+sed -n '3p'   $mapfile >> chr1.chunk2.map
+sed -n '1p'   $mapfile >  chr2.chunk1.map
+sed -n '4,5p' $mapfile >> chr2.chunk1.map
+sed -n '1p'   $mapfile >  chr2.chunk2.map
+sed -n '6p'   $mapfile >> chr2.chunk2.map
+
+# Run an analysis on dosage data
+outfile="height_add.out.txt"
+rm -f $outfile
 ./probabel.pl 1 2 linear TestCohortChunk --additive height
+echo "Checking output using dosages with chunks..."
+diff $outfile $results/$outfile
 
-# Final check:
-echo "Checking output with chunks:"
+
+# Run an analysis on probabilities
+outfile="height_2df.out.txt"
+rm -f $outfile
+./probabel.pl 1 2 linear TestCohortNoChunk --allmodels height
+echo "Checking output using probabilities without chunks:"
 diff $outfile $results/$outfile

Added: pkg/ProbABEL/tests/verified_results/height_2df.out.txt
===================================================================
--- pkg/ProbABEL/tests/verified_results/height_2df.out.txt	                        (rev 0)
+++ pkg/ProbABEL/tests/verified_results/height_2df.out.txt	2012-11-01 21:38:51 UTC (rev 996)
@@ -0,0 +1,6 @@
+name A1 A2 Freq1 MAF Quality Rsq n Mean_predictor_allele chrom position beta_SNP_A1A2 beta_SNP_A1A1 sebeta_SNP_A1A2 sebeta_SNP_A1A1 loglik
+rs7247199 G A 0.5847 0.415 0.9299 0.8666 182 0.333668 1 204938 615.049 1057.49 -539.487 1040.54 -428.817
+rs8102643 C T 0.5847 0.415 0.9308 0.8685 181 0.808177 1 207859 -820.352 1392.51 -646.025 1054.18 -426.797
+rs8102615 T A 0.5006 0.4702 0.9375 0.8932 182 0.8665 2 211970 -239.797 1308.51 -246.735 1102.06 -428.924
+rs8105536 G A 0.5783 0.4213 0.9353 0.8832 182 0.791701 2 212033 782.576 1474.59 604.391 1064.24 -428.789
+rs2312724 T C 0.9122 0.0877 0.9841 0.9232 182 0.933464 2 217034 1180.53 1104.39 912.574 1003.85 -427.981



More information about the Genabel-commits mailing list