[Genabel-commits] r1492 - in pkg/ProbABEL: checks doc src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Dec 19 23:48:04 CET 2013


Author: lckarssen
Date: 2013-12-19 23:48:03 +0100 (Thu, 19 Dec 2013)
New Revision: 1492

Added:
   pkg/ProbABEL/checks/check_probabel_chunk.sh
   pkg/ProbABEL/doc/probabel.1
   pkg/ProbABEL/src/probabel
Removed:
   pkg/ProbABEL/checks/check_probabel.pl_chunk.sh
   pkg/ProbABEL/doc/probabel.pl.1
   pkg/ProbABEL/src/probabel.pl
Modified:
   pkg/ProbABEL/checks/Makefile.am
   pkg/ProbABEL/doc/ChangeLog
   pkg/ProbABEL/doc/INSTALL
   pkg/ProbABEL/doc/Makefile.am
   pkg/ProbABEL/doc/ProbABEL_manual.tex
   pkg/ProbABEL/src/Makefile.am
Log:
As discussed on	the mailing list (and reported by the Debian packager),	scripts that are used by the user should not have a filename extension. The user doesn't care whether we use Perl, Bash, Python, etc. to get the job done.

This commit replaces all occurences of probabel.pl with	 probabel. When installing (make install) a symbolic link is made so that for now probabel.pl continues to exist. When the script is called as probabel.pl a warning message is displayed, informing the user that (s)he should upgrade their pipelines because the .pl script will be removed in a future release. 



Modified: pkg/ProbABEL/checks/Makefile.am
===================================================================
--- pkg/ProbABEL/checks/Makefile.am	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/checks/Makefile.am	2013-12-19 22:48:03 UTC (rev 1492)
@@ -42,7 +42,7 @@
 
 
 if BUILD_palinear
-check_SCRIPTS += check_probabel.pl_chunk.sh check_dose_input.sh
+check_SCRIPTS += check_probabel_chunk.sh check_dose_input.sh
 check_SCRIPTS += test_qt.sh test_mms.sh
 endif
 if BUILD_palogist
@@ -139,7 +139,7 @@
  height_ngp2_over_domin.out.txt height_ngp2_2df.out.txt			\
  height_ngp2_add.out.txt height_base_add.out.txt
 
-other_files = probabel.pl probabel_config.cfg
+other_files = probabel probabel_config.cfg
 
 cleanfiles_probabel_check = height.PHE $(dose_files) $(prob_files)	\
  $(map_files) $(info_files) $(output_files) $(other_files)

Deleted: pkg/ProbABEL/checks/check_probabel.pl_chunk.sh
===================================================================
--- pkg/ProbABEL/checks/check_probabel.pl_chunk.sh	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/checks/check_probabel.pl_chunk.sh	2013-12-19 22:48:03 UTC (rev 1492)
@@ -1,152 +0,0 @@
-#!/bin/bash
-# L.C. Karssen
-# This script is used to test whether probabel.pl works correctly when
-# input is cut up in chunks
-
-echo "Testing probabel.pl..."
-
-# Exit with error when one of the steps in the script fails
-set -e
-
-# -------- Set some default paths and file names -------
-if [ -z ${srcdir} ]; then
-    srcdir="."
-fi
-inputdir="${srcdir}/inputfiles/"
-padir="${srcdir}/../src/"
-results="${srcdir}/verified_results/"
-
-dosefile="$inputdir/test.mldose"
-probfile="$inputdir/test.mlprob"
-infofile="$inputdir/test.mlinfo"
-mapfile="$inputdir/test.map"
-phenofile="$inputdir/height.txt"
-
-probabel="${padir}/probabel.pl"
-probabelcfg="${padir}/probabel_config.cfg.example"
-chunksep="_._chunk_._"
-chrsep="_._chr_._"
-
-# ------ Prepare probabel.pl and the config file ------
-sed 's;"./";"../src/";g' $probabel > probabel.pl
-chmod a+x probabel.pl
-cp $probabelcfg probabel_config.cfg
-chmod +w probabel_config.cfg # Need this for make distcheck to work
-cp $phenofile height.PHE
-
-base="chr${chrsep}"
-echo "TestCohortNoChunk,$base.info,$base.dose,$base.prob,$base.map" \
-    >> probabel_config.cfg
-
-base="chr${chrsep}.chunk${chunksep}"
-echo "TestCohortChunk,$base.info,$base.dose,$base.prob,$base.map" \
-    >> probabel_config.cfg
-
-
-# ---------- function definitions ----------
-prepare_input ()
-{
-    if [ "$1" = "nochunk" ]; then
-        # ------------------ No chunks test -------------------
-        # Split the dose, prob and info files up into two chromosomes
-        # with some  chunks
-        awk '{print $1,$2,$3,$4}'    $dosefile > chr1.dose
-        awk '{print $1,$2,$5,$6,$7}' $dosefile > chr2.dose
-
-        awk '{print $1,$2,$3,$4,$5,$6}'          $probfile > chr1.prob
-        awk '{print $1,$2,$7,$8,$9,$10,$11,$12}' $probfile > chr2.prob
-
-        sed -n '1,3p' $infofile >  chr1.info
-        sed -n '1p'   $infofile >  chr2.info
-        sed -n '4,6p' $infofile >> chr2.info
-
-        sed -n '1,3p' $mapfile > chr1.map
-        sed -n '1p'   $mapfile >  chr2.map
-        sed -n '4,6p' $mapfile >> chr2.map
-
-        WithOrWithout="without"
-    elif [ "$1" = "chunk" ]; then
-        # ------------------ Chunks test ----------------------
-        # Split the dose and info files up into two chromosomes with
-        # some chunks
-        awk '{print $1,$2,$3}'    $dosefile > chr1.chunk1.dose
-        awk '{print $1,$2,$4}'    $dosefile > chr1.chunk2.dose
-        awk '{print $1,$2,$5,$6}' $dosefile > chr2.chunk1.dose
-        awk '{print $1,$2,$7}'    $dosefile > chr2.chunk2.dose
-
-        awk '{print $1,$2,$3,$4}'        $probfile > chr1.chunk1.prob
-        awk '{print $1,$2,$5,$6}'        $probfile > chr1.chunk2.prob
-        awk '{print $1,$2,$7,$8,$9,$10}' $probfile > chr2.chunk1.prob
-        awk '{print $1,$2,$11,$12}'      $probfile > chr2.chunk2.prob
-
-        sed -n '1,2p' $infofile >  chr1.chunk1.info
-        sed -n '1p'   $infofile >  chr1.chunk2.info
-        sed -n '3p'   $infofile >> chr1.chunk2.info
-        sed -n '1p'   $infofile >  chr2.chunk1.info
-        sed -n '4,5p' $infofile >> chr2.chunk1.info
-        sed -n '1p'   $infofile >  chr2.chunk2.info
-        sed -n '6p'   $infofile >> chr2.chunk2.info
-
-        sed -n '1,2p' $mapfile >  chr1.chunk1.map
-        sed -n '1p'   $mapfile >  chr1.chunk2.map
-        sed -n '3p'   $mapfile >> chr1.chunk2.map
-        sed -n '1p'   $mapfile >  chr2.chunk1.map
-        sed -n '4,5p' $mapfile >> chr2.chunk1.map
-        sed -n '1p'   $mapfile >  chr2.chunk2.map
-        sed -n '6p'   $mapfile >> chr2.chunk2.map
-
-        WithOrWithout="with"
-    else
-        echo "Run this function with one of these arguments: 'chunk'
-        or 'nochunk'."
-        exit 1
-    fi
-
-}
-
-
-run_test ()
-{
-    # Run an analysis on dosage data
-    outfile="height_add.out.txt"
-
-    echo "Checking output using dosages $WithOrWithout chunks..."
-    ./probabel.pl 1 2 linear $1 --additive height > /dev/null
-
-    blanks="                                                          "
-    echo -n "  Verifying "
-    if diff $outfile $results/$outfile; then
-        echo -e "${outfile}${blanks:${#outfile}} OK"
-    else
-        echo -e "${outfile}${blanks:${#outfile}} FAILED"
-        exit 1
-    fi
-
-    # Run an analysis on probabilities
-    outfilelist="height_ngp2_2df.out.txt height_ngp2_recess.out.txt
-    height_ngp2_over_domin.out.txt height_ngp2_domin.out.txt
-    height_ngp2_add.out.txt"
-
-    echo "Checking output using probabilities $WithOrWithout chunks..."
-    ./probabel.pl 1 2 linear $1 --allmodels height -o height_ngp2 > /dev/null
-    for file in $outfilelist; do
-        echo -n "  Verifying "
-        if diff $file $results/$file; then
-            echo -e "${file}${blanks:${#file}} OK"
-        else
-            echo -e "${file}${blanks:${#file}} FAILED"
-            exit 1
-        fi
-    done
-}
-
-# ---------- Continuation of the main function ----------
-prepare_input nochunk
-
-run_test TestCohortNoChunk
-echo "-------------------- Finished check without chunks --------------------"
-
-prepare_input chunk
-
-run_test TestCohortChunk
-echo "-------------------- Finished check with chunks --------------------"

Copied: pkg/ProbABEL/checks/check_probabel_chunk.sh (from rev 1486, pkg/ProbABEL/checks/check_probabel.pl_chunk.sh)
===================================================================
--- pkg/ProbABEL/checks/check_probabel_chunk.sh	                        (rev 0)
+++ pkg/ProbABEL/checks/check_probabel_chunk.sh	2013-12-19 22:48:03 UTC (rev 1492)
@@ -0,0 +1,152 @@
+#!/bin/bash
+# L.C. Karssen
+# This script is used to test whether the probabel script works
+# correctly when input is cut up in chunks
+
+echo "Testing probabel..."
+
+# Exit with error when one of the steps in the script fails
+set -e
+
+# -------- Set some default paths and file names -------
+if [ -z ${srcdir} ]; then
+    srcdir="."
+fi
+inputdir="${srcdir}/inputfiles/"
+padir="${srcdir}/../src/"
+results="${srcdir}/verified_results/"
+
+dosefile="$inputdir/test.mldose"
+probfile="$inputdir/test.mlprob"
+infofile="$inputdir/test.mlinfo"
+mapfile="$inputdir/test.map"
+phenofile="$inputdir/height.txt"
+
+probabel="${padir}/probabel"
+probabelcfg="${padir}/probabel_config.cfg.example"
+chunksep="_._chunk_._"
+chrsep="_._chr_._"
+
+# ------ Prepare probabel and the config file ------
+sed 's;"./";"../src/";g' $probabel > probabel
+chmod a+x probabel
+cp $probabelcfg probabel_config.cfg
+chmod +w probabel_config.cfg # Need this for make distcheck to work
+cp $phenofile height.PHE
+
+base="chr${chrsep}"
+echo "TestCohortNoChunk,$base.info,$base.dose,$base.prob,$base.map" \
+    >> probabel_config.cfg
+
+base="chr${chrsep}.chunk${chunksep}"
+echo "TestCohortChunk,$base.info,$base.dose,$base.prob,$base.map" \
+    >> probabel_config.cfg
+
+
+# ---------- function definitions ----------
+prepare_input ()
+{
+    if [ "$1" = "nochunk" ]; then
+        # ------------------ No chunks test -------------------
+        # Split the dose, prob and info files up into two chromosomes
+        # with some  chunks
+        awk '{print $1,$2,$3,$4}'    $dosefile > chr1.dose
+        awk '{print $1,$2,$5,$6,$7}' $dosefile > chr2.dose
+
+        awk '{print $1,$2,$3,$4,$5,$6}'          $probfile > chr1.prob
+        awk '{print $1,$2,$7,$8,$9,$10,$11,$12}' $probfile > chr2.prob
+
+        sed -n '1,3p' $infofile >  chr1.info
+        sed -n '1p'   $infofile >  chr2.info
+        sed -n '4,6p' $infofile >> chr2.info
+
+        sed -n '1,3p' $mapfile > chr1.map
+        sed -n '1p'   $mapfile >  chr2.map
+        sed -n '4,6p' $mapfile >> chr2.map
+
+        WithOrWithout="without"
+    elif [ "$1" = "chunk" ]; then
+        # ------------------ Chunks test ----------------------
+        # Split the dose and info files up into two chromosomes with
+        # some chunks
+        awk '{print $1,$2,$3}'    $dosefile > chr1.chunk1.dose
+        awk '{print $1,$2,$4}'    $dosefile > chr1.chunk2.dose
+        awk '{print $1,$2,$5,$6}' $dosefile > chr2.chunk1.dose
+        awk '{print $1,$2,$7}'    $dosefile > chr2.chunk2.dose
+
+        awk '{print $1,$2,$3,$4}'        $probfile > chr1.chunk1.prob
+        awk '{print $1,$2,$5,$6}'        $probfile > chr1.chunk2.prob
+        awk '{print $1,$2,$7,$8,$9,$10}' $probfile > chr2.chunk1.prob
+        awk '{print $1,$2,$11,$12}'      $probfile > chr2.chunk2.prob
+
+        sed -n '1,2p' $infofile >  chr1.chunk1.info
+        sed -n '1p'   $infofile >  chr1.chunk2.info
+        sed -n '3p'   $infofile >> chr1.chunk2.info
+        sed -n '1p'   $infofile >  chr2.chunk1.info
+        sed -n '4,5p' $infofile >> chr2.chunk1.info
+        sed -n '1p'   $infofile >  chr2.chunk2.info
+        sed -n '6p'   $infofile >> chr2.chunk2.info
+
+        sed -n '1,2p' $mapfile >  chr1.chunk1.map
+        sed -n '1p'   $mapfile >  chr1.chunk2.map
+        sed -n '3p'   $mapfile >> chr1.chunk2.map
+        sed -n '1p'   $mapfile >  chr2.chunk1.map
+        sed -n '4,5p' $mapfile >> chr2.chunk1.map
+        sed -n '1p'   $mapfile >  chr2.chunk2.map
+        sed -n '6p'   $mapfile >> chr2.chunk2.map
+
+        WithOrWithout="with"
+    else
+        echo "Run this function with one of these arguments: 'chunk'
+        or 'nochunk'."
+        exit 1
+    fi
+
+}
+
+
+run_test ()
+{
+    # Run an analysis on dosage data
+    outfile="height_add.out.txt"
+
+    echo "Checking output using dosages $WithOrWithout chunks..."
+    ./probabel 1 2 linear $1 --additive height > /dev/null
+
+    blanks="                                                          "
+    echo -n "  Verifying "
+    if diff $outfile $results/$outfile; then
+        echo -e "${outfile}${blanks:${#outfile}} OK"
+    else
+        echo -e "${outfile}${blanks:${#outfile}} FAILED"
+        exit 1
+    fi
+
+    # Run an analysis on probabilities
+    outfilelist="height_ngp2_2df.out.txt height_ngp2_recess.out.txt
+    height_ngp2_over_domin.out.txt height_ngp2_domin.out.txt
+    height_ngp2_add.out.txt"
+
+    echo "Checking output using probabilities $WithOrWithout chunks..."
+    ./probabel 1 2 linear $1 --allmodels height -o height_ngp2 > /dev/null
+    for file in $outfilelist; do
+        echo -n "  Verifying "
+        if diff $file $results/$file; then
+            echo -e "${file}${blanks:${#file}} OK"
+        else
+            echo -e "${file}${blanks:${#file}} FAILED"
+            exit 1
+        fi
+    done
+}
+
+# ---------- Continuation of the main function ----------
+prepare_input nochunk
+
+run_test TestCohortNoChunk
+echo "-------------------- Finished check without chunks --------------------"
+
+prepare_input chunk
+
+run_test TestCohortChunk
+echo "-------------------- Finished check with chunks --------------------"

Modified: pkg/ProbABEL/doc/ChangeLog
===================================================================
--- pkg/ProbABEL/doc/ChangeLog	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/doc/ChangeLog	2013-12-19 22:48:03 UTC (rev 1492)
@@ -1,4 +1,9 @@
 ***** v.0.4.2
+* The 'probabel.pl' script is now simply renamed to 'probabel' (a user
+  shouldn't care what scripting language we use). For at least several
+  releases to come, the old script name will still exist (as a link to the
+  original) and a warning message is a displayed when the user runs the
+  .pl script. This should give people time to adjust their pipelines.
 * Fix bug #4919: Too small reading buffers for long alleles in mach info
   and legend files. Thanks to Daniel Taliun for reporting the bug and
   providing the patch. Thanks to Xia Shen for testing.

Modified: pkg/ProbABEL/doc/INSTALL
===================================================================
--- pkg/ProbABEL/doc/INSTALL	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/doc/INSTALL	2013-12-19 22:48:03 UTC (rev 1492)
@@ -42,8 +42,8 @@
   PDF version of the documentation will not be built.
 
   NOTE: make install will OVERWRITE any file with the same name if it
-  already exists. By default the name of the probabel.pl config file is
-  probabel_config.cfg.example. For probabel.pl to work please rename
+  already exists. By default the name of the probabel config file is
+  probabel_config.cfg.example. For probabel to work please rename
   this file to probabel_config.cfg.
 
   To see options, run

Modified: pkg/ProbABEL/doc/Makefile.am
===================================================================
--- pkg/ProbABEL/doc/Makefile.am	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/doc/Makefile.am	2013-12-19 22:48:03 UTC (rev 1492)
@@ -6,7 +6,7 @@
  test.mlinfo test_regression.R COPYING LICENSE INSTALL ChangeLog	\
  TODO
 
-man_MANS = probabel.pl.1
+man_MANS = probabel.1
 if BUILD_palinear
 man_MANS += palinear.1
 endif

Modified: pkg/ProbABEL/doc/ProbABEL_manual.tex
===================================================================
--- pkg/ProbABEL/doc/ProbABEL_manual.tex	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/doc/ProbABEL_manual.tex	2013-12-19 22:48:03 UTC (rev 1492)
@@ -432,9 +432,9 @@
 %in which case the main effect should normally estimated in the polygenic
 %model and only the interaction term in the \PA{} analysis.
 
-\subsection{Running multiple analyses at once: \texttt{probabel.pl}}
-The Perl script \texttt{bin/probabel.pl} represents a handy wraper for
-\PA{} functions.  To start using it the configuration file
+\subsection{Running multiple analyses at once: \texttt{probabel}}
+The \texttt{bin/probabel} script is a handy wraper for the \PA{}
+functions. To start using it the configuration file
 \texttt{etc/probabel\_config.cfg.example} needs to be edited and
 renamed to \texttt{etc/probabel\_config.cfg}. The configuration file
 consists of five columns, separated by commas. Each column except the
@@ -446,14 +446,14 @@
 \texttt{\_.\_chr\_.\_}. In case the imputations were run on chunks of
 chromosomes, the pattern \texttt{\_.\_chunk\_.\_} will be replaced
 with the corresponding chunk number. Chunk numbers should start at 1
-for each chromosome. The columns ``dose\_path'', ``prob\_path''
-and ``legend\_path'' are paths and patterns for ``dose'', ``prob'' and
+for each chromosome. The columns ``dose\_path'', ``prob\_path'' and
+``legend\_path'' are paths and patterns for ``dose'', ``prob'' and
 ``legend'' files, respectively. These also need to include the pattern
 for the chromosome as used in the column for the ``info'' files.
 Empty lines and lines starting with a \texttt{\#} are ignored.
 
 The \texttt{make install} installation procedure should have set all
-paths in the \texttt{probabel.pl} script correctly. If that is not the
+paths in the \texttt{probabel} script correctly. If that is not the
 case you will have to change the variable \texttt{\$config} in the
 script to point to the full path of the configuration file and the
 variables \texttt{\$base\_path} and \texttt{@anprog} to point the full

Copied: pkg/ProbABEL/doc/probabel.1 (from rev 1486, pkg/ProbABEL/doc/probabel.pl.1)
===================================================================
--- pkg/ProbABEL/doc/probabel.1	                        (rev 0)
+++ pkg/ProbABEL/doc/probabel.1	2013-12-19 22:48:03 UTC (rev 1492)
@@ -0,0 +1,73 @@
+.TH probabel 1 "12 December 2013"
+.SH NAME
+probabel \- Wrapper around the three ProbABEL binaries, simplifying their use
+.SH SYNOPSIS
+.B probabel
+.BI "" "CHROM-START CHROM-STOP METHOD COHORT MODEL PHENOTYPE [OTHER_OPTIONS]"
+.SH DESCRIPTION
+.B probabel
+is a wrapper around the three ProbABEL binaries: \fBpalinear\fR,
+\fBpalogist\fR, and \fBpacoxph\fR.
+.PP
+It is designed to simplify runnig of a Genome-Wide Association Study
+(GWAS) by saving the user's precious time in two ways:
+.PP
+1) it runs the regression analysis of all chromosomes from \fICHROM-START\fR to
+\fICHROM-STOP\fR.
+.PP
+2) In contrast to the three binaries mentioned above,
+with \fBprobabel\fR the user doesn't need to specify the locations
+of the required genetic data files. Their location is centrally
+managed in a configuration file (\fI/etc/probabel_config.cfg\fR by default).
+
+.SH OPTIONS
+.SS Required command line options
+.TP
+.I CHROM-START
+Number of the chromosome to start the analysis at.
+.TP
+.I CHROM-END
+Number of the chromosome to end the analysis at.
+.PP
+Note that analysis of a single chromosome can be run by specifying the
+same number for \fICHROM-START\fR and \fICHROM-STOP\fR. In fact, this
+is the way to run analysis for the X or Y chromosome
+.TP
+.I METHOD
+Which regression method to use, can be one of \fIlinear\fR,
+\fIlogistic,\fR or \fIcoxph\fR.
+
+.TP
+.I COHORT
+The name of the cohort as defined in the
+\fI/etc/probabel_config.cfg\fR file. Using the cohort identifier from
+the config file the three actual ProbABEL binaries know where to find
+the genotypic information files (dosage files and/or probability
+files).
+.TP
+.I MODEL
+The genetic model to be used. Can be either \fB--additive\fR for the
+additive model (requires genotype data in dosage form), or
+\fB--allmodels\fR to run all genetic models: additive, recessive,
+dominant, over-dominant and 2df.
+.TP
+.I PHENOTYPE
+The name of the phenotype file, without its extension (which should be
+\fI.PHE\fR!).
+
+.SS Other options
+Any options listed after the (mandatory) previous ones will be passed
+on to the underlying binary: \fBpalinear\fR, \fBpalogist\fR or
+\fBpacoxph\fR, depending on the \fIMETHOD\fR.
+.PP
+The most commonly used option is the \fB\-o\fR option, followed by a
+file name, this specifies the beginning of the file name for the
+output files.
+
+.SS Additional options
+.B "\-h, \-\-help"
+Get additional help on how to run \fBprobabel\fR. In this case none of
+the other options needs to be specified.
+
+.SH EXAMPLES
+probabel 1 22 linear "My_Large_Cohort" \-\-additive my_phenotype

Deleted: pkg/ProbABEL/doc/probabel.pl.1
===================================================================
--- pkg/ProbABEL/doc/probabel.pl.1	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/doc/probabel.pl.1	2013-12-19 22:48:03 UTC (rev 1492)
@@ -1,73 +0,0 @@
-.TH probabel.pl 1 "12 December 2013"
-.SH NAME
-probabel.pl \- Wrapper around the three ProbABEL binaries, simplifying their use
-.SH SYNOPSIS
-.B probabel.pl
-.BI "" "CHROM-START CHROM-STOP METHOD COHORT MODEL PHENOTYPE [OTHER_OPTIONS]"
-.SH DESCRIPTION
-.B probabel.pl
-is a wrapper around the three ProbABEL binaries: \fBpalinear\fR,
-\fBpalogist\fR, and \fBpacoxph\fR.
-.PP
-It is designed to simplify runnig of a Genome-Wide Association Study
-(GWAS) by saving the user's precious time in two ways:
-.PP
-1) it runs the regression analysis of all chromosomes from \fICHROM-START\fR to
-\fICHROM-STOP\fR.
-.PP
-2) In contrast to the three binaries mentioned above,
-with \fBprobabel.pl\fR the user doesn't need to specify the locations
-of the required genetic data files. Their location is centrally
-managed in a configuration file (\fI/etc/probabel_config.cfg\fR by default).
-
-.SH OPTIONS
-.SS Required command line options
-.TP
-.I CHROM-START
-Number of the chromosome to start the analysis at.
-.TP
-.I CHROM-END
-Number of the chromosome to end the analysis at.
-.PP
-Note that analysis of a single chromosome can be run by specifying the
-same number for \fICHROM-START\fR and \fICHROM-STOP\fR. In fact, this
-is the way to run analysis for the X or Y chromosome
-.TP
-.I METHOD
-Which regression method to use, can be one of \fIlinear\fR,
-\fIlogistic,\fR or \fIcoxph\fR.
-
-.TP
-.I COHORT
-The name of the cohort as defined in the
-\fI/etc/probabel_config.cfg\fR file. Using the cohort identifier from
-the config file the three actual ProbABEL binaries know where to find
-the genotypic information files (dosage files and/or probability
-files).
-.TP
-.I MODEL
-The genetic model to be used. Can be either \fB--additive\fR for the
-additive model (requires genotype data in dosage form), or
-\fB--allmodels\fR to run all genetic models: additive, recessive,
-dominant, over-dominant and 2df.
-.TP
-.I PHENOTYPE
-The name of the phenotype file, without its extension (which should be
-\fI.PHE\fR!).
-
-.SS Other options
-Any options listed after the (mandatory) previous ones will be passed
-on to the underlying binary: \fBpalinear\fR, \fBpalogist\fR or
-\fBpacoxph\fR, depending on the \fIMETHOD\fR.
-.PP
-The most commonly used option is the \fB\-o\fR option, followed by a
-file name, this specifies the beginning of the file name for the
-output files.
-
-.SS Additional options
-.B "\-h, \-\-help"
-Get additional help on how to run \fBprobabel.pl\fR. In this case none of
-the other options needs to be specified.
-
-.SH EXAMPLES
-probabel.pl 1 22 linear "My_Large_Cohort" \-\-additive my_phenotype

Modified: pkg/ProbABEL/src/Makefile.am
===================================================================
--- pkg/ProbABEL/src/Makefile.am	2013-12-19 22:38:27 UTC (rev 1491)
+++ pkg/ProbABEL/src/Makefile.am	2013-12-19 22:48:03 UTC (rev 1492)
@@ -80,7 +80,7 @@
 extract_snp_SOURCES = extract-snp.cpp $(FVSRC) $(FVHEADERS)
 
 ## Install these scripts in the bin directory as well:
-dist_bin_SCRIPTS = probabel.pl
+dist_bin_SCRIPTS = probabel
 
 ## Install this R script in the examples directory
 scriptdir = $(pkgdatadir)/scripts
@@ -89,12 +89,14 @@
 ## Install the config file
 dist_sysconf_DATA = probabel_config.cfg.example
 
-## Insert the correct values of several variables in the installed probabel.pl
+## Insert the correct values of several variables in the installed
+## probabel script
 install-exec-hook:
 	$(SED) -i "s;probabel_config.cfg;@sysconfdir@/probabel_config.cfg;" \
-		$(DESTDIR)$(bindir)/probabel.pl
-	$(SED) -i "s;\./;@bindir@/;" $(DESTDIR)$(bindir)/probabel.pl
-	$(SED) -i "s;PROBABEL_VERSION;@VERSION@;" $(DESTDIR)$(bindir)/probabel.pl
+		$(DESTDIR)$(bindir)/probabel
+	$(SED) -i "s;\./;@bindir@/;" $(DESTDIR)$(bindir)/probabel
+	$(SED) -i "s;PROBABEL_VERSION;@VERSION@;" $(DESTDIR)$(bindir)/probabel
+	ln -s $(DESTDIR)$(bindir)/probabel $(DESTDIR)$(bindir)/probabel.pl
 
 ## Make the script executable
 install-data-hook:

Copied: pkg/ProbABEL/src/probabel (from rev 1486, pkg/ProbABEL/src/probabel.pl)
===================================================================
--- pkg/ProbABEL/src/probabel	                        (rev 0)
+++ pkg/ProbABEL/src/probabel	2013-12-19 22:48:03 UTC (rev 1492)
@@ -0,0 +1,351 @@
+#! /usr/bin/perl -W
+#==========================================================================
+#
+#           Filename:  probabel
+#
+#        Description: Handy perl wrapper for ProbABEL functions
+#
+#==========================================================================
+use strict;
+
+## Add warning message about future removal of the pl extension
+use File::Basename;
+my $name = basename($0);
+if ($name eq 'probabel.pl') {
+    print "WARNING: You are running this script as 'probabel.pl'. " .
+        "The correct name of this script is 'probabel', please update " .
+        "pipelines accordingly. In a future release the use of" .
+        "'probabel.pl' will be deprecated.\n\n";
+}
+
+#==========================================================================
+# Set variables
+my $version="PROBABEL_VERSION";
+
+# Define some filename postfixes
+my $_2df_file_postfix = "_2df.out.txt";
+my $_add_file_postfix = "_add.out.txt";
+my $_domin_file_postfix = "_domin.out.txt";
+my $_recess_file_postfix = "_recess.out.txt";
+my $_over_domin_file_postfix = "_over_domin.out.txt";
+
+# Separators in the config file
+my $separator_cfg = ",";
+my $chr_replacement = "_._chr_._";
+my $chunk_replacement = "_._chunk_._";
+
+# Set file locations
+my $base_path = "./";
+my @anprog = ($base_path . "palinear",
+              $base_path . "palogist",
+              $base_path . "pacoxph");
+my $config = "probabel_config.cfg";
+
+# Define the regression methods that are implemented
+my @method = ("linear", "logistic", "coxph");
+
+my %cohorts;
+my @mlinfos;
+my @mldoses;
+my @mlprobs;
+my @legends;
+
+
+#==========================================================================
+# Read config file
+open(CFG, "$config") or die "Reading configuration file $config failed: $!" .
+    "\nDid you forget to edit and rename the probabel_config.cfg.example file?\n";
+
+<CFG>; #skip the first line (header)
+
+for(my $i=0 ; my $line = <CFG> ; $i++)
+{
+    chomp($line);
+    next if ($line =~ /^#/);
+    next if ($line =~ /^$/);
+    my @line_array = split(/$separator_cfg/, $line);
+    $cohorts{$line_array[0]} = $i;
+    $mlinfos[$i]  = $line_array[1];
+    $mldoses[$i]  = $line_array[2];
+    $mlprobs[$i] = $line_array[3];
+    $legends[$i]  = $line_array[4];
+}
+close(CFG);
+
+
+#==========================================================================
+# Print usage info if arguments are not correct
+if(@ARGV<6 || $ARGV[0] eq "--help" || $ARGV[0] eq "-h") {
+    print "Usage:
+        probabel chrom-start chrom-end method cohort <--allmodels OR --additive> trait <other available options of ProbABEL functions>\n";
+    print "\n	* chrom-start - the first chromosome number, chrom-end - the last one; X or Y have to be run separately (specify them twice, once as chrom-start and once as chrom-end)";
+    print "\n	* method can be ";
+    foreach my $me(@method) {print "\"".$me."\", "};
+    print "\n	* use --allmodels if you need dominant, recessive and heterozygous models
+          and --additive if additive only\n";
+    print "	* Available cohorts are ";
+    foreach my $coh(keys %cohorts) {
+        print "\"".$coh."\", "
+    };
+    print "\n	* example:
+          probabel 1 22 linear \"ERF\" --additive filename
+          (filename has to be saved as filename.PHE)\n\n";
+
+    if(@ARGV == 1 && ($ARGV[0] eq "--help" || $ARGV[0] eq "-h")) {
+        print "\nDetails:\n";
+        print " The probabel script is used for analysis of imputed data. First you have to create a file with the phenotype values that you are going to use. The first column contains ids in special order, the second one contains the trait which you are going	analyze, the others contain covariates.  For example:
+
+        id         phen1 covariate1  covariate2
+        1_2094  0     334         0
+        1_5060  1     56          1
+        1_4077  1     346         6
+        .
+        .
+        .
+
+        This implies the model:
+        phen1 ~ covariate1 + covariate2 + SNP
+
+
+        Then save it to folder where you are doing the analysis. The name of the file must be name_of_file.PHE, where name_of_file is any name.
+
+        Then run the following on the command line:
+          probabel 1 22 \"method\" \"cohort\" --model name_of_file
+        Change \"method\", \"cohort\" and --model to appropriate values\n";
+        print "\n	Version: $version";
+        print "\n\n	Authors: Lennart Karssen   - lennart\@karssen.org,
+                 Maarten Kooyman - m.kooijman\@erasmusmc.nl,
+                 Maksim Struchalin - m.struchalin\@erasmusmc.nl,
+                 Yurii Aulchenko   - yurii.aulchenko\@gmail.com.\n\n";
+    }
+    else {
+        print "Type probabel --help for more details.\n";
+    }
+    exit;
+}
+
+
+#==========================================================================
+# Put the command line arguments into variables and verify them
+my $startchr = $ARGV[0];
+my $endchr = $ARGV[1];
+my $method = $ARGV[2];
+my $chohort = $ARGV[3];
+my $model = $ARGV[4];
+
+die "error: chrom-start is > 22" if($startchr > 22 && $startchr != "X") ;
+die "error: chrom-end is > 22" if($endchr > 22 && $endchr != "X");
+die "error: chrom-start > chrom-end" if($startchr > $endchr);
+
+
+my $cohort_position = $cohorts{$chohort};
+
+
+if(!defined($cohort_position)) {
+    print "\nerror: Wrong cohort name, \"$chohort\" is not an available cohort.
+Available cohorts are ";
+    foreach my $coh(keys %cohorts) {
+        print "\"".$coh."\", ";
+    }
+    print "\n\n";
+    exit;
+}
+
+my $mlinfo = $mlinfos[$cohort_position];
+my $mldose = $mldoses[$cohort_position];
+my $mlprob = $mlprobs[$cohort_position];
+my $legend = $legends[$cohort_position];
+
+
+my $passed = 0;
+my $prog;
+for (my $i=0; $i<@method; $i++) {
+    if ($ARGV[2] eq $method[$i]) {
+        $passed = 1;
+        $prog = $anprog[$i];
+    }
+}
+die "error: Wrong method. method has to be one of: @method\n" if (!$passed);
+
+
+my $phename = $ARGV[5];
+# By default the output file prefix is the same as the name of the
+# phenotype file (minus the .PHE extension and any paths)
+use File::Basename;
+my $outfile_prefix = basename($phename);
+my $keys="";
+for (my $i=6; $i<@ARGV; $i++) {
+    if ($ARGV[$i] eq "-o")
+    {
+        # Apparently the user wants to change the output file name
+        $outfile_prefix = $ARGV[$i+1];
+
+        # Skip the next argument (supposedly the addition to the
+        # output file name).
+        $i++;
+    }
+    else
+    {
+        $keys = $keys.$ARGV[$i]." ";
+    }
+}
+chop($keys);
+
+my $model_option_num = 0;
+my $mldose_prob;
+if($model eq "--additive") {
+    $mldose_prob = $mldose;
+    $model_option_num = 1;
+} elsif($model eq "--allmodels") {
+    $mldose_prob = $mlprob;
+    $model_option_num = 2;
+} else {
+    die "error: Wrong key for model. You can use \"--additive\" or \"--allmodels\" only\n";
+}
+
+
+#==========================================================================
+# Start the analysis now that the input has been validated
+print "Start...\n";
+
+my $chr = $startchr;
+my $hadhead=0;
+my $head;
+my $mlinfo_arg;
+my $mldose_arg;
+my $legend_arg;
+my $outfile_arg;
+
+# Separate command for the sex chromosomes.
+if ($chr eq "X" || $chr eq "Y") {
+    $mlinfo_arg = $mlinfo;
+    $mlinfo_arg =~ s/$chr_replacement/$chr/g;
+
+    $mldose_arg = $mldose_prob;
+    $mldose_arg =~ s/$chr_replacement/$chr/g;
+
+    $legend_arg = $legend;
+    $legend_arg =~ s/$chr_replacement/$chr/g;
+
+    if($hadhead==0) {
+        $head="";
+        $hadhead=1;
+    } else {
+        my $head="--no-head";
+    }
+
+    system "$prog -p $phename.PHE --ngpreds $model_option_num -i $mlinfo_arg -d $mldose_arg -m $legend_arg --chrom $chr -o $outfile_prefix $head $keys";
+
+    exit;
+}
+
+# Clean up any existing output files
+if($model_option_num==2)
+{
+    system "rm ${outfile_prefix}${_2df_file_postfix} 2>/dev/null";
+    system "rm ${outfile_prefix}${_add_file_postfix} 2>/dev/null";
+    system "rm ${outfile_prefix}${_domin_file_postfix} 2>/dev/null";
+    system "rm ${outfile_prefix}${_recess_file_postfix} 2>/dev/null";
+    system "rm ${outfile_prefix}${_over_domin_file_postfix} 2>/dev/null";
+} else {
+    system "rm ${outfile_prefix}${_add_file_postfix} 2>/dev/null";
+}
+
+# Commands for the autosomes
+for($chr=$startchr; $chr<=$endchr; $chr++) {
+
+    my $nrchunks = 0;
+    # Find out the number of chunks for the current chromosome
+    my $infofiles = $mlinfo;
+    $infofiles =~ s/$chr_replacement/$chr/g;
+    $infofiles =~ s/$chunk_replacement/*/g;
+    $nrchunks = `ls $infofiles 2>/dev/null | wc -l`;
+    if ($nrchunks==0) {
+        # If no chunked info files exist the 'wc -l' command returns 0
+        # so that actually means 1 chunk containing all data.
+        $nrchunks = 1;
+    }
+    print "Nr. of chunks: $nrchunks";
+
+    # Loop over all chunks
+    for (my $chunk=1; $chunk <= $nrchunks; $chunk++)
+    {
+        if($hadhead==0) {
+            $head="";
+            $hadhead=1;
+        } else {
+            $head="--no-head";
+        }
+        $mlinfo_arg = $mlinfo;
+        $mlinfo_arg =~ s/$chr_replacement/$chr/g;
+        $mlinfo_arg =~ s/$chunk_replacement/$chunk/g;
+
+        $mldose_arg = $mldose_prob;
+        $mldose_arg =~ s/$chr_replacement/$chr/g;
+        $mldose_arg =~ s/$chunk_replacement/$chunk/g;
+
+        $legend_arg = $legend;
+        $legend_arg =~ s/$chr_replacement/$chr/g;
+        $legend_arg =~ s/$chunk_replacement/$chunk/g;
+
+        $outfile_arg = "$outfile_prefix.chunk${chunk}.chr${chr}";
+
+        my $command = "$prog -p $phename.PHE --ngpreds $model_option_num ";
+        $command = $command . "-i $mlinfo_arg -d $mldose_arg -m $legend_arg";
+        $command = $command . " --chrom $chr";
+        $command = $command . " -o $outfile_arg ";
+        $command = $command . "$head $keys";
+        print "$command \n";
+        system $command;
+
+        # Combine the output data for all chunks of this chromosome
+        if($model_option_num==2)
+        {
+            `cat $outfile_arg$_2df_file_postfix >> ${outfile_prefix}.${chr}${_2df_file_postfix}`;
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 1492


More information about the Genabel-commits mailing list