[Genabel-commits] r1080 - in branches/ProbABEL-pacox/v.0.3.0/ProbABEL: examples src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jan 2 17:03:12 CET 2013
Author: lckarssen
Date: 2013-01-02 17:03:12 +0100 (Wed, 02 Jan 2013)
New Revision: 1080
Modified:
branches/ProbABEL-pacox/v.0.3.0/ProbABEL/examples/example_cox.sh
branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.cpp
branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.h
branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/eigen_mematrix.cpp
Log:
ProbABEL-pacox: First commit towards a fix of the pacoxph module in the refactored code of ProbABEL.
This version runs OK for dose files. Also, there is no more difference between data read from a text file (dose of prob) and the same data in filevector format.
The problem that remains is that the results from dose and prob input files don't match (even though the code that fixed this in v.0.1-3 has already been ported to this code base).
- ProbABEL/src/coxph_data.h: Minor code layout changes
- ProbABEL/src/eigen_mematrix.cpp: This was a bug introduced when implementing the Eigen library.
- ProbABEL/src/coxph_data.cpp: Various code layout changes, including removal of old (commented) print statements and the addition of various {} in if statements and for loops. The actual fix is in lines 257--262! Thanks to the checks in Eigen I found out that the data in these vectors was written to column 1 instead of 0. Our own checks in mematrix.cpp also contained an off-by-one error, which was why they never triggered (I already commited a fix for that in r.1056)
- ProbABEL/examples/example_cox.sh: The Cox examples/checks can now be run again. Now that the code is fixed (mostly), the Cox example doesn't take forever to run anymore. Because the dose vs. prob check still fails I'm not enabling this check in Makefile.am yet. We should consider moving these example_*.sh files to the tests directory since by now they are more consistency checks than illustrative examples...
Modified: branches/ProbABEL-pacox/v.0.3.0/ProbABEL/examples/example_cox.sh
===================================================================
--- branches/ProbABEL-pacox/v.0.3.0/ProbABEL/examples/example_cox.sh 2013-01-02 10:48:06 UTC (rev 1079)
+++ branches/ProbABEL-pacox/v.0.3.0/ProbABEL/examples/example_cox.sh 2013-01-02 16:03:12 UTC (rev 1080)
@@ -1,4 +1,22 @@
-echo "analysing Cox model"
+#!/bin/sh
+# This script runs checks on ProbABEL's pacoxph module
+
+run_diff()
+{
+ # This function is run after each check. It needs two arguments:
+ # $1: first file to compare
+ # $2: second file to compare
+ if diff "$1" "$2"; then
+ echo -e "\t\tOK"
+ else
+ echo -e "\t\tFAILED"
+// exit 1
+ fi
+}
+
+
+# ---- The checks start here ----
+echo "Analysing Cox model..."
if [ -z ${srcdir} ]; then
srcdir="."
fi
@@ -11,26 +29,39 @@
-c 19 \
-o coxph
-# ../src/pacoxph \
-# -p ${srcdir}/coxph_data.txt \
-# -d ${srcdir}/test.dose.fvi \
-# -i ${srcdir}/test.mlinfo \
-# -m ${srcdir}/test.map \
-# -c 19 \
-# -o coxph_fv
+../src/pacoxph \
+ -p ${srcdir}/coxph_data.txt \
+ -d ${srcdir}/test.dose.fvi \
+ -i ${srcdir}/test.mlinfo \
+ -m ${srcdir}/test.map \
+ -c 19 \
+ -o coxph_fv
-# echo "pacoxph check: dose vs. dose_fv"
-# diff coxph_add.out.txt coxph_fv_add.out.txt
+echo -n "pacoxph check: dose vs. dose_fv"
+run_diff coxph_add.out.txt coxph_fv_add.out.txt
-# ../src/pacoxph \
-# -p ${srcdir}/coxph_data.txt \
-# -d ${srcdir}/test.mlprob \
-# -i ${srcdir}/test.mlinfo \
-# -m ${srcdir}/test.map \
-# --ngpreds=2 \
-# -c 19 \
-# -o coxph_prob
+../src/pacoxph \
+ -p ${srcdir}/coxph_data.txt \
+ -d ${srcdir}/test.mlprob \
+ -i ${srcdir}/test.mlinfo \
+ -m ${srcdir}/test.map \
+ --ngpreds=2 \
+ -c 19 \
+ -o coxph_prob
-# echo "pacoxph check: dose vs. prob"
-# diff coxph_add.out.txt coxph_prob_add.out.txt
+echo -n "pacoxph check: dose vs. prob"
+run_diff coxph_add.out.txt coxph_prob_add.out.txt
+
+
+../src/pacoxph \
+ -p ${srcdir}/coxph_data.txt \
+ -d ${srcdir}/test.prob.fvi \
+ -i ${srcdir}/test.mlinfo \
+ -m ${srcdir}/test.map \
+ --ngpreds=2 \
+ -c 19 \
+ -o coxph_fv_prob
+
+echo -n "pacoxph check: prob vs. prob_fv"
+run_diff coxph_prob_add.out.txt coxph_fv_prob_add.out.txt
Modified: branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.cpp
===================================================================
--- branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.cpp 2013-01-02 10:48:06 UTC (rev 1079)
+++ branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.cpp 2013-01-02 16:03:12 UTC (rev 1080)
@@ -46,18 +46,21 @@
coxph_data::coxph_data(const coxph_data &obj) : sstat(obj.sstat)
{
- nids = obj.nids;
- ncov = obj.ncov;
- ngpreds = obj.ngpreds;
- weights = obj.weights;
- stime = obj.stime;
- offset = obj.offset;
- strata = obj.strata;
- X = obj.X;
- order = obj.order;
+ nids = obj.nids;
+ ncov = obj.ncov;
+ ngpreds = obj.ngpreds;
+ weights = obj.weights;
+ stime = obj.stime;
+ offset = obj.offset;
+ strata = obj.strata;
+ X = obj.X;
+ order = obj.order;
masked_data = new unsigned short int[nids];
+
for (int i = 0; i < nids; i++)
+ {
masked_data[i] = 0;
+ }
}
coxph_data::coxph_data(phedata &phed, gendata &gend, int snpnum)
@@ -80,7 +83,6 @@
exit(1);
}
- // X.reinit(nids,(ncov+1));
X.reinit(nids, ncov);
stime.reinit(nids, 1);
sstat.reinit(nids, 1);
@@ -103,10 +105,13 @@
}
for (int j = 0; j < phed.ncov; j++)
+ {
for (int i = 0; i < nids; i++)
X.put((phed.X).get(i, j), i, j);
+ }
if (snpnum > 0)
+ {
for (int j = 0; j < ngpreds; j++)
{
float snpdata[nids];
@@ -114,17 +119,15 @@
for (int i = 0; i < nids; i++)
X.put(snpdata[i], i, (ncov - ngpreds + j));
}
+ }
- // for (int i=0;i<nids;i++)
- // for (int j=0;j<ngpreds;j++)
- // X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+j));
-
for (int i = 0; i < nids; i++)
{
weights[i] = 1.0;
offset[i] = 0.0;
strata[i] = 0;
}
+
// sort by time
double tmptime[nids];
int passed_sorted[nids];
@@ -134,12 +137,16 @@
tmptime[i] = stime[i];
passed_sorted[i] = 0;
}
+
qsort(tmptime, nids, sizeof(double), cmpfun);
+
for (int i = 0; i < nids; i++)
{
int passed = 0;
for (int j = 0; j < nids; j++)
+ {
if (tmptime[j] == stime[i])
+ {
if (!passed_sorted[j])
{
order[i] = j;
@@ -147,25 +154,29 @@
passed = 1;
break;
}
+ }
+ }
if (passed != 1)
{
std::cerr << "cannot recover element " << i << "\n";
exit(1);
}
}
- stime = reorder(stime, order);
- sstat = reorder(sstat, order);
+ stime = reorder(stime, order);
+ sstat = reorder(sstat, order);
weights = reorder(weights, order);
- strata = reorder(strata, order);
- offset = reorder(offset, order);
- X = reorder(X, order);
- X = transpose(X);
- // X.print();
- // offset.print();
- // weights.print();
- // stime.print();
- // sstat.print();
+ strata = reorder(strata, order);
+ offset = reorder(offset, order);
+ X = reorder(X, order);
+ X = transpose(X);
+
+ // X.print();
+ // offset.print();
+ // weights.print();
+ // stime.print();
+ // sstat.print();
}
+
void coxph_data::update_snp(gendata &gend, int snpnum)
{
/**
@@ -186,7 +197,9 @@
{
float snpdata[nids];
for (int i = 0; i < nids; i++)
+ {
masked_data[i] = 0;
+ }
gend.get_var(snpnum * ngpreds + j, snpdata);
@@ -213,22 +226,21 @@
coxph_data coxph_data::get_unmasked_data()
{
-// std::cout << " !!! in get_unmasked_data !!! ";
coxph_data to; // = coxph_data(*this);
+
// filter missing data
-
int nmeasured = 0;
for (int i = 0; i < nids; i++)
+ {
if (masked_data[i] == 0)
+ {
nmeasured++;
+ }
+ }
to.nids = nmeasured;
-// std::cout << "nmeasured=" << nmeasured << "\n";
to.ncov = ncov;
-// std::cout << "ncov=" << ncov << "\n";
to.ngpreds = ngpreds;
int dim1X = X.nrow;
-// std::cout << "X.ncol=" << X.ncol << "\n";
-// std::cout << "X.nrow=" << X.nrow << "\n";
(to.weights).reinit(to.nids, 1);
(to.stime).reinit(to.nids, 1);
(to.sstat).reinit(to.nids, 1);
@@ -237,34 +249,32 @@
(to.order).reinit(to.nids, 1);
(to.X).reinit(dim1X, to.nids);
-// std::cout << "(to.X).ncol=" << (to.X).ncol << "\n";
-// std::cout << "(to.X).nrow=" << (to.X).nrow << "\n";
-// std::cout << " !!! just before cycle !!! ";
int j = 0;
for (int i = 0; i < nids; i++)
{
-// std::cout << nids << " " << i << " " << masked_data[i] << "\n";
if (masked_data[i] == 0)
{
- (to.weights).put(weights.get(i, 1), j, 1);
- (to.stime).put(stime.get(i, 1), j, 1);
- (to.sstat).put(sstat.get(i, 1), j, 1);
- (to.offset).put(offset.get(i, 1), j, 1);
- (to.strata).put(strata.get(i, 1), j, 1);
- (to.order).put(order.get(i, 1), j, 1);
+ (to.weights).put(weights.get(i, 0), j, 0);
+ (to.stime).put(stime.get(i, 0), j, 0);
+ (to.sstat).put(sstat.get(i, 0), j, 0);
+ (to.offset).put(offset.get(i, 0), j, 0);
+ (to.strata).put(strata.get(i, 0), j, 0);
+ (to.order).put(order.get(i, 0), j, 0);
for (int nc = 0; nc < dim1X; nc++)
+ {
(to.X).put(X.get(nc, i), nc, j);
+ }
j++;
}
}
-// std::cout << " !!! just after cycle !!! ";
//delete [] to.masked_data;
to.masked_data = new unsigned short int[to.nids];
for (int i = 0; i < to.nids; i++)
+ {
to.masked_data[i] = 0;
- // std::cout << "get_unmasked: " << to.nids << " "
- // << dim2X << " " << dim2Y << "\n";
+ }
+
return (to);
}
@@ -287,7 +297,7 @@
coxph_data cdata = cdatain.get_unmasked_data();
mematrix<double> X = t_apply_model(cdata.X, model, interaction, ngpreds,
iscox, nullmodel);
- // X.print();
+
int length_beta = X.nrow;
beta.reinit(length_beta, 1);
sebeta.reinit(length_beta, 1);
@@ -307,19 +317,29 @@
int flag;
double sctest = 1.0;
- //TODO(maarten): this function works only in combination with eigen
- // remove .data() from arguments to make is available under old matrix
+ // When using Eigen coxfit2 needs to be called in a slightly
+ // different way (i.e. the .data()-part needs to be added).
+#if EIGEN
coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data.data(),
cdata.sstat.data.data(), X.data.data(), newoffset.data.data(),
cdata.weights.data.data(), cdata.strata.data.data(),
means.data.data(), beta.data.data(), u.data.data(),
imat.data.data(), loglik_int, &flag, work, &eps, &tol_chol,
&sctest);
+#else
+ coxfit2(&maxiter, &cdata.nids, &X.nrow, cdata.stime.data,
+ cdata.sstat.data, X.data, newoffset.data,
+ cdata.weights.data, cdata.strata.data,
+ means.data, beta.data, u.data,
+ imat.data, loglik_int, &flag, work, &eps, &tol_chol,
+ &sctest);
+#endif
for (int i = 0; i < X.nrow; i++)
{
sebeta[i] = sqrt(imat.get(i, i));
}
+
loglik = loglik_int[1];
niter = maxiter;
}
Modified: branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.h
===================================================================
--- branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.h 2013-01-02 10:48:06 UTC (rev 1079)
+++ branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/coxph_data.h 2013-01-02 16:03:12 UTC (rev 1080)
@@ -23,6 +23,7 @@
class coxph_data {
public:
coxph_data get_unmasked_data();
+
coxph_data()
{
nids = 0;
@@ -30,6 +31,7 @@
ngpreds = 0;
masked_data = NULL;
}
+
coxph_data(const coxph_data &obj);
coxph_data(phedata &phed, gendata &gend, int snpnum);
void update_snp(gendata &gend, int snpnum);
@@ -40,11 +42,11 @@
int ngpreds;
mematrix<double> weights;
mematrix<double> stime;
- mematrix<int> sstat;
+ mematrix<int> sstat;
mematrix<double> offset;
- mematrix<int> strata;
+ mematrix<int> strata;
mematrix<double> X;
- mematrix<int> order;
+ mematrix<int> order;
unsigned short int * masked_data;
};
Modified: branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/eigen_mematrix.cpp
===================================================================
--- branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/eigen_mematrix.cpp 2013-01-02 10:48:06 UTC (rev 1079)
+++ branches/ProbABEL-pacox/v.0.3.0/ProbABEL/src/eigen_mematrix.cpp 2013-01-02 16:03:12 UTC (rev 1080)
@@ -311,7 +311,7 @@
for (int i = 0; i < temp.nrow; i++)
{
source = order.data(i, 0);
- temp.data.row(i) = M.data.row(source);
+ temp.data.row(source) = M.data.row(i);
}
return temp;
}
More information about the Genabel-commits
mailing list