[Genabel-commits] r868 - branches/ProbABEL-refactoring/ProbABEL/src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Mar 29 00:38:42 CEST 2012
Author: maartenk
Date: 2012-03-29 00:38:42 +0200 (Thu, 29 Mar 2012)
New Revision: 868
Added:
branches/ProbABEL-refactoring/ProbABEL/src/data.cpp
branches/ProbABEL-refactoring/ProbABEL/src/gendata.cpp
branches/ProbABEL-refactoring/ProbABEL/src/gendata.h
branches/ProbABEL-refactoring/ProbABEL/src/phedata.cpp
branches/ProbABEL-refactoring/ProbABEL/src/phedata.h
Log:
split data.h into smaller files (part1)
Added: branches/ProbABEL-refactoring/ProbABEL/src/data.cpp
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/data.cpp (rev 0)
+++ branches/ProbABEL-refactoring/ProbABEL/src/data.cpp 2012-03-28 22:38:42 UTC (rev 868)
@@ -0,0 +1,509 @@
+#include <string>
+#include <sstream>
+#include <fstream>
+
+
+
+#include "fvlib/AbstractMatrix.h"
+#include "fvlib/CastUtils.h"
+#include "fvlib/const.h"
+#include "fvlib/convert_util.h"
+#include "fvlib/FileVector.h"
+#include "fvlib/frutil.h"
+#include "fvlib/frversion.h"
+#include "fvlib/Logger.h"
+#include "fvlib/Transposer.h"
+#include "phedata.h"
+#include "gendata.h"
+#include "data.h"
+
+#include "mematrix.h"
+#include "mematri1.h"
+#include "utilities.h"
+using namespace std;
+
+extern bool is_interaction_excluded;
+
+
+unsigned int Nmeasured(char * fname, int nphenocols, int npeople) {
+ int ncov = nphenocols - 2;
+ int nids_all = npeople;
+
+ FILE * infile;
+ // first pass -- find unmeasured people
+ if ((infile = fopen(fname, "r")) == NULL) {
+ fprintf(stderr, "Nmeasured: cannot open file %s\n", fname);
+ }
+ char tmp[100];
+
+ for (int i = 0; i < nphenocols; i++) {
+ fscanf(infile, "%s", tmp);
+ // printf("%s ",tmp);
+ } //printf("\n");
+
+ unsigned short int * allmeasured = new unsigned short int[npeople];
+ int nids = 0;
+ for (int i = 0; i < npeople; i++) {
+ allmeasured[i] = 1;
+ fscanf(infile, "%s", tmp);
+ for (int j = 1; j < nphenocols; j++) {
+ fscanf(infile, "%s", tmp);
+ if (tmp[0] == 'N' || tmp[0] == 'n')
+ allmeasured[i] = 0;
+ }
+ if (allmeasured[i] == 1)
+ nids++;
+ }
+ fclose(infile);
+ return (nids);
+}
+
+regdata::regdata(const regdata &obj) {
+ nids = obj.nids;
+ ncov = obj.ncov;
+ ngpreds = obj.ngpreds;
+ noutcomes = obj.noutcomes;
+ X = obj.X;
+ Y = obj.Y;
+ masked_data = new unsigned short int[nids];
+ for (int i = 0; i < nids; i++) {
+ masked_data[i] = 0;
+ }
+}
+regdata::regdata(phedata &phed, gendata &gend, int snpnum) {
+ nids = gend.nids;
+ masked_data = new unsigned short int[nids];
+ for (int i = 0; i < nids; i++) {
+ masked_data[i] = 0;
+ }
+
+ ngpreds = gend.ngpreds;
+ if (snpnum >= 0) {
+ ncov = phed.ncov + ngpreds;
+ } else {
+ ncov = phed.ncov;
+ }
+ noutcomes = phed.noutcomes;
+ X.reinit(nids, (ncov + 1));
+ Y.reinit(nids, noutcomes);
+ for (int i = 0; i < nids; i++) {
+ X.put(1., i, 0);
+ Y.put((phed.Y).get(i, 0), i, 0);
+ }
+ for (int j = 1; j <= phed.ncov; j++)
+ for (int i = 0; i < nids; i++)
+ X.put((phed.X).get(i, j - 1), i, j);
+ if (snpnum > 0)
+ for (int j = 0; j < ngpreds; j++) {
+ float snpdata[nids];
+ gend.get_var(snpnum * ngpreds + j, snpdata);
+ for (int i = 0; i < nids; i++)
+ X.put(snpdata[i], i, (ncov - ngpreds + 1 + j));
+ }
+ // for (int i=0;i<nids;i++)
+ // for (int j=0;j<ngpreds;j++)
+ // X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+1+j));
+}
+void regdata::update_snp(gendata &gend, int snpnum) {
+ for (int j = 0; j < ngpreds; j++) {
+ float snpdata[nids];
+ for (int i = 0; i < nids; i++)
+ masked_data[i] = 0;
+ gend.get_var(snpnum * ngpreds + j, snpdata);
+ for (int i = 0; i < nids; i++) {
+ X.put(snpdata[i], i, (ncov + 1 - j - 1));
+ if (isnan(snpdata[i]))
+ masked_data[i] = 1;
+ }
+ }
+}
+regdata::~regdata() {
+ delete[] regdata::masked_data;
+ // delete X;
+ // delete Y;
+}
+
+regdata regdata::get_unmasked_data() {
+ regdata to; // = regdata(*this);
+ int nmeasured = 0;
+ for (int i = 0; i < nids; i++)
+ if (masked_data[i] == 0)
+ nmeasured++;
+ to.nids = nmeasured;
+ //cout << to.nids << " in get_unmasked_data\n";
+ to.ncov = ncov;
+ to.ngpreds = ngpreds;
+ to.noutcomes = noutcomes;
+ int dim2Y = Y.ncol;
+ int dim2X = X.ncol;
+ (to.X).reinit(to.nids, dim2X);
+ (to.Y).reinit(to.nids, dim2Y);
+
+ int j = 0;
+ for (int i = 0; i < nids; i++) {
+ if (masked_data[i] == 0) {
+ for (int nc = 0; nc < dim2X; nc++)
+ (to.X).put(X.get(i, nc), j, nc);
+ for (int nc = 0; nc < dim2Y; nc++)
+ (to.Y).put(Y.get(i, nc), j, nc);
+ j++;
+ }
+ }
+
+ //delete [] to.masked_data;
+ to.masked_data = new unsigned short int[to.nids];
+ for (int i = 0; i < to.nids; i++)
+ to.masked_data[i] = 0;
+ //fprintf(stdout,"get_unmasked: %i %i %i\n",to.nids,dim2X,dim2Y);
+ return (to);
+}
+
+mematrix<double> regdata::extract_genotypes(void) {
+ mematrix<double> out;
+ out.reinit(X.nrow, ngpreds);
+ for (int i = 0; i < X.nrow; i++)
+ for (int j = 0; j < ngpreds; j++)
+ out[i * ngpreds + j] = X.get(i, (ncov - ngpreds + 1 + j));
+ return out;
+}
+
+// compare for sort of times
+int cmpfun(const void *a, const void *b) {
+ double el1 = *(double*) a;
+ double el2 = *(double*) b;
+ int returnvalue;
+ if (el1 > el2)
+ returnvalue = 1;
+ if (el1 < el2)
+ returnvalue = -1;
+ if (el1 == el2)
+ returnvalue = 0;
+ return returnvalue;
+}
+
+coxph_data::coxph_data(const coxph_data &obj) {
+ nids = obj.nids;
+ ncov = obj.ncov;
+ ngpreds = obj.ngpreds;
+ weights = obj.weights;
+ stime = obj.stime;
+ sstat = obj.sstat;
+ offset = obj.offset;
+ strata = obj.strata;
+ X = obj.X;
+ order = obj.order;
+ masked_data = new unsigned short int[nids];
+ for (int i = 0; i < nids; i++)
+ masked_data[i] = 0;
+}
+coxph_data::coxph_data(phedata &phed, gendata &gend, int snpnum) {
+ nids = gend.nids;
+ masked_data = new unsigned short int[nids];
+ for (int i = 0; i < nids; i++)
+ masked_data[i] = 0;
+ ngpreds = gend.ngpreds;
+ if (snpnum >= 0)
+ ncov = phed.ncov + ngpreds;
+ else
+ ncov = phed.ncov;
+ if (phed.noutcomes != 2) {
+ fprintf(stderr,
+ "coxph_data: number of outcomes should be 2 (now: %d)\n",
+ phed.noutcomes);
+ exit(1);
+ }
+ // X.reinit(nids,(ncov+1));
+ X.reinit(nids, ncov);
+ stime.reinit(nids, 1);
+ sstat.reinit(nids, 1);
+ weights.reinit(nids, 1);
+ offset.reinit(nids, 1);
+ strata.reinit(nids, 1);
+ order.reinit(nids, 1);
+ for (int i = 0; i < nids; i++) {
+ // X.put(1.,i,0);
+ stime[i] = (phed.Y).get(i, 0);
+ sstat[i] = int((phed.Y).get(i, 1));
+ if (sstat[i] != 1 & sstat[i] != 0) {
+ fprintf(stderr,"coxph_data: status not 0/1 (right order: id, fuptime, status ...)\n", phed.noutcomes);
+ exit(1);
+ }
+ }
+ for (int j = 0; j < phed.ncov; j++)
+ for (int i = 0; i < nids; i++)
+ X.put((phed.X).get(i, j), i, j);
+
+ if (snpnum > 0)
+ for (int j = 0; j < ngpreds; j++) {
+ float snpdata[nids];
+ gend.get_var(snpnum * ngpreds + j, snpdata);
+ for (int i = 0; i < nids; i++)
+ X.put(snpdata[i], i, (ncov - ngpreds + j));
+ }
+
+ // for (int i=0;i<nids;i++)
+ // for (int j=0;j<ngpreds;j++)
+ // X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+j));
+
+ for (int i = 0; i < nids; i++) {
+ weights[i] = 1.0;
+ offset[i] = 0.0;
+ strata[i] = 0;
+ }
+ // sort by time
+ double tmptime[nids];
+ int passed_sorted[nids];
+ for (int i = 0; i < nids; i++) {
+ tmptime[i] = stime[i];
+ passed_sorted[i] = 0;
+ }
+ qsort(tmptime, nids, sizeof(double), cmpfun);
+ for (int i = 0; i < nids; i++) {
+ int passed = 0;
+ for (int j = 0; j < nids; j++)
+ if (tmptime[j] == stime[i])
+ if (!passed_sorted[j]) {
+ order[i] = j;
+ passed_sorted[j] = 1;
+ passed = 1;
+ break;
+ }
+ if (passed != 1) {
+ fprintf(stderr, "cannot recover element %d\n", i);
+ exit(1);
+ }
+ }
+ stime = reorder(stime, order);
+ sstat = reorder(sstat, order);
+ weights = reorder(weights, order);
+ strata = reorder(strata, order);
+ offset = reorder(offset, order);
+ X = reorder(X, order);
+ X = transpose(X);
+ // X.print();
+ // offset.print();
+ // weights.print();
+ // stime.print();
+ // sstat.print();
+}
+void coxph_data::update_snp(gendata &gend, int snpnum) {
+ // note this sorts by "order"!!!
+
+ for (int j = 0; j < ngpreds; j++) {
+ float snpdata[nids];
+ for (int i = 0; i < nids; i++)
+ masked_data[i] = 0;
+ gend.get_var(snpnum * ngpreds + j, snpdata);
+ for (int i = 0; i < nids; i++) {
+ X.put(snpdata[i], (ncov - ngpreds + j), order[i]);
+ if (isnan(snpdata[i]))
+ masked_data[order[i]] = 1;
+ }
+ }
+ // for (int i=0;i<nids;i++)
+ // for (int j=0;j<ngpreds;j++)
+ // X.put((gend.G).get(i,(snpnum*ngpreds+j)),(ncov-ngpreds+j),order[i]);
+}
+coxph_data::~coxph_data() {
+ delete[] coxph_data::masked_data;
+ // delete X;
+ // delete sstat;
+ // delete stime;
+ // delete weights;
+ // delete offset;
+ // delete strata;
+ // delete order;
+}
+
+coxph_data coxph_data::get_unmasked_data() {
+// std::cout << " !!! in get_unmasked_data !!! ";
+ coxph_data to; // = coxph_data(*this);
+ // filter missing data
+
+ int nmeasured = 0;
+ for (int i = 0; i < nids; i++)
+ if (masked_data[i] == 0)
+ nmeasured++;
+ to.nids = nmeasured;
+// std::cout << "nmeasured=" << nmeasured << "\n";
+ to.ncov = ncov;
+// std::cout << "ncov=" << ncov << "\n";
+ to.ngpreds = ngpreds;
+ int dim1X = X.nrow;
+// std::cout << "X.ncol=" << X.ncol << "\n";
+// std::cout << "X.nrow=" << X.nrow << "\n";
+ (to.weights).reinit(to.nids, 1);
+ (to.stime).reinit(to.nids, 1);
+ (to.sstat).reinit(to.nids, 1);
+ (to.offset).reinit(to.nids, 1);
+ (to.strata).reinit(to.nids, 1);
+ (to.order).reinit(to.nids, 1);
+ (to.X).reinit(dim1X, to.nids);
+
+// std::cout << "(to.X).ncol=" << (to.X).ncol << "\n";
+// std::cout << "(to.X).nrow=" << (to.X).nrow << "\n";
+// std::cout << " !!! just before cycle !!! ";
+ int j = 0;
+ for (int i = 0; i < nids; i++) {
+// std::cout << nids << " " << i << " " << masked_data[i] << "\n";
+ if (masked_data[i] == 0) {
+ (to.weights).put(weights.get(i, 1), j, 1);
+ (to.stime).put(stime.get(i, 1), j, 1);
+ (to.sstat).put(sstat.get(i, 1), j, 1);
+ (to.offset).put(offset.get(i, 1), j, 1);
+ (to.strata).put(strata.get(i, 1), j, 1);
+ (to.order).put(order.get(i, 1), j, 1);
+ for (int nc = 0; nc < dim1X; nc++)
+ (to.X).put(X.get(nc, i), nc, j);
+ j++;
+ }
+ }
+// std::cout << " !!! just after cycle !!! ";
+
+ //delete [] to.masked_data;
+ to.masked_data = new unsigned short int[to.nids];
+ for (int i = 0; i < to.nids; i++)
+ to.masked_data[i] = 0;
+ //fprintf(stdout,"get_unmasked: %i %i %i\n",to.nids,dim2X,dim2Y);
+ return (to);
+}
+
+mlinfo::mlinfo(char * filename, char * mapname) {
+ FILE * infile = fopen(filename, "r");
+ if (infile == NULL) {
+ fprintf(stderr, "mlinfo: cannot open file %s", filename);
+ exit(1);
+ }
+ char tmp[100];
+ unsigned int nlin = 0;
+ while (fscanf(infile, "%s", tmp) != EOF) {
+ nlin++;
+ }
+ fclose(infile);
+ if (nlin % 7) {
+ fprintf(stderr, "mlinfo: number of columns != 7 in %s", filename);
+ exit(1);
+ }
+ nsnps = int(nlin / 7) - 1;
+ printf("Number of SNPs = %d\n", nsnps);
+ name = new std::string[nsnps];
+ A1 = new std::string[nsnps];
+ A2 = new std::string[nsnps];
+ Freq1 = new double[nsnps];
+ MAF = new double[nsnps];
+ Quality = new double[nsnps];
+ Rsq = new double[nsnps];
+ map = new std::string[nsnps];
+ if ((infile = fopen(filename, "r")) == NULL) {
+ fprintf(stderr, "mlinfo: cannot open file %s", filename);
+ exit(1);
+ }
+ for (int i = 0; i < 7; i++)
+ fscanf(infile, "%s", tmp);
+ for (int i = 0; i < nsnps; i++) {
+ fscanf(infile, "%s", tmp);
+ name[i] = tmp;
+ fscanf(infile, "%s", tmp);
+ A1[i] = tmp;
+ fscanf(infile, "%s", tmp);
+ A2[i] = tmp;
+ fscanf(infile, "%s", tmp);
+ Freq1[i] = atof(tmp);
+ fscanf(infile, "%s", tmp);
+ MAF[i] = atof(tmp);
+ fscanf(infile, "%s", tmp);
+ Quality[i] = atof(tmp);
+ fscanf(infile, "%s", tmp);
+ Rsq[i] = atof(tmp);
+ map[i] = "-999";
+ }
+ fclose(infile);
+ if (mapname != NULL) {
+ std::ifstream instr(mapname);
+ int BFS = 1000;
+ char line[BFS], tmp[BFS];
+ if (!instr.is_open()) {
+ fprintf(stderr, "mlinfo: cannot open file %s", mapname);
+ exit(1);
+ }
+ instr.getline(line, BFS);
+ for (int i = 0; i < nsnps; i++) {
+ instr.getline(line, BFS);
+ std::stringstream line_stream(line);
+ line_stream >> tmp >> map[i];
+ }
+ instr.close();
+ }
+}
+mlinfo::~mlinfo() {
+ delete[] mlinfo::name;
+ delete[] mlinfo::A1;
+ delete[] mlinfo::A2;
+ delete[] mlinfo::Freq1;
+ delete[] mlinfo::MAF;
+ delete[] mlinfo::Quality;
+ delete[] mlinfo::Rsq;
+ delete[] mlinfo::map;
+}
+
+//_________________________________________Maksim_start
+
+InvSigma::InvSigma(const char * filename_, phedata * phe) {
+ filename = filename_;
+ npeople = phe->nids;
+ std::ifstream myfile(filename_);
+ char * line = new char[MAXIMUM_PEOPLE_AMOUNT];
+ double val;
+ std::string id;
+ unsigned row = 0, col = 0;
+
+ matrix.reinit(npeople, npeople);
+
+ //idnames[k], if (allmeasured[i]==1)
+
+ if (myfile.is_open()) {
+ while (myfile.getline(line, MAXIMUM_PEOPLE_AMOUNT)) {
+
+ std::stringstream line_stream(line);
+ line_stream >> id;
+
+ if (phe->idnames[row] != id) {
+ std::cerr << "error:in row " << row << " id="
+ << phe->idnames[row]
+ << " in inverse variance matrix but id=" << id
+ << " must be there. Wrong inverse variance matrix (only measured id must be there)\n";
+ exit(1);
+ }
+
+ while (line_stream >> val) {
+ matrix.put(val, row, col);
+ col++;
+ }
+
+ if (col != npeople) {
+ fprintf(stderr,
+ "error: inv file: Number of columns in row %d equals to %d but people amount is %d\n",
+ row, col, npeople);
+ myfile.close();
+ exit(1);
+ }
+ col = 0;
+ row++;
+ }
+ myfile.close();
+ } else {
+ fprintf(stderr, "error: inv file: cannot open file '%s'\n", filename_);
+ }
+
+}
+;
+
+InvSigma::~InvSigma() {
+ //af af
+}
+
+mematrix<double> & InvSigma::get_matrix(void) {
+ return matrix;
+}
+
+//________________________________________Maksim_end
Property changes on: branches/ProbABEL-refactoring/ProbABEL/src/data.cpp
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:mime-type
+ text/plain
Added: branches/ProbABEL-refactoring/ProbABEL/src/gendata.cpp
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/gendata.cpp (rev 0)
+++ branches/ProbABEL-refactoring/ProbABEL/src/gendata.cpp 2012-03-28 22:38:42 UTC (rev 868)
@@ -0,0 +1,156 @@
+/*
+ * gendata.cpp
+ *
+ * Created on: Mar 8, 2012
+ * Author: mkooyman
+ */
+#include <string>
+#include "gendata.h"
+#include "fvlib/FileVector.h"
+#include "mematrix.h"
+#include "mematri1.h"
+#include "utilities.h"
+
+
+
+void gendata::get_var(int var, float * data) {
+ if (DAG == NULL)
+ for (int i = 0; i < G.nrow; i++)
+ data[i] = G.get(i, var);
+ else if (DAG != NULL) {
+ float tmpdata[DAG->getNumObservations()];
+ DAG->readVariableAs((unsigned long int) var, tmpdata);
+ unsigned int j = 0;
+ for (unsigned int i = 0; i < DAG->getNumObservations(); i++)
+ if (!DAGmask[i])
+ data[j++] = tmpdata[i];
+ //fprintf(stdout,"%i %i %i\n",j,DAG->get_nobservations(),nids);
+ } else
+ report_error("cannot get gendata");
+}
+
+gendata::gendata() {
+ nsnps = nids = ngpreds = 0;
+}
+
+void gendata::re_gendata(string filename, int insnps, int ingpreds, int npeople,
+ int nmeasured, unsigned short int * allmeasured,
+ std::string * idnames) {
+ nsnps = insnps;
+ ngpreds = ingpreds;
+ DAG = new FileVector(filename, 128, true);
+ DAGmask = new unsigned short int[DAG->getNumObservations()];
+ if (DAG->getNumObservations() != npeople)
+ report_error("dimension of fvf-data and phenotype data do not match\n");
+ if (DAG->getNumVariables() != insnps * ingpreds)
+ report_error("dimension of fvf-data and mlinfo data do not match\n");
+ long int j = -1;
+ for (unsigned int i = 0; i < npeople; i++) {
+ if (allmeasured[i] == 0)
+ DAGmask[i] = 1;
+ else {
+ DAGmask[i] = 0;
+ j++;
+ }
+ string DAGobsname = DAG->readObservationName(i).name;
+
+ if (DAGobsname.find("->") != string::npos)
+ DAGobsname = DAGobsname.substr(DAGobsname.find("->") + 2);
+
+//if (allmeasured[i] && idnames[j] != DAGobsname)
+ // error("names do not match for observation at phenofile line (phe/geno) %i/+1 (%s/%s)\n",
+ // i+1,idnames[i].c_str(),DAGobsname.c_str());
+ // fix thanks to Vadym Pinchuk
+ if (allmeasured[i] && idnames[j] != DAGobsname)
+ report_error(
+ "names do not match for observation at phenofile line(phe/geno) %i/+1 (%s/%s)\n",
+ i + 1, idnames[j].c_str(), DAGobsname.c_str());
+
+ }
+ nids = j + 1;
+ //fprintf(stdout,"in INI: %i %i\n",nids,npeople);
+ if (nids != nmeasured)
+ report_error("nids != mneasured (%i != %i)\n", nids, nmeasured);
+
+}
+void gendata::re_gendata(char * fname, int insnps, int ingpreds, int npeople,
+ int nmeasured, unsigned short int * allmeasured, int skipd,
+ std::string * idnames) {
+ nids = nmeasured;
+ nsnps = insnps;
+ ngpreds = ingpreds;
+ DAG = NULL;
+ // int nids_all = npeople;
+
+ G.reinit(nids, (nsnps * ngpreds));
+
+ FILE * infile;
+
+ if ((infile = fopen(fname, "r")) == NULL) {
+ fprintf(stderr, "gendata: cannot open file %s\n", fname);
+ }
+
+ char tmp[100], tmpn[100];
+ std::string tmpid, tmpstr;
+
+ int k = 0;
+ for (int i = 0; i < npeople; i++)
+ if (allmeasured[i] == 1) {
+ if (skipd > 0) {
+ // int ttt;
+ char ttt[100];
+ fscanf(infile, "%s", tmp);
+ // sscanf(tmp,"%d->%s",&ttt, tmpn);
+ // these changes are thanks to BMM & BP :)
+ // sscanf(tmp,"%s->%s",&ttt, tmpn);
+ // sscanf(tmp,"%[^->]->%[^->]",&ttt, tmpn);
+ tmpstr = tmp;
+ if (tmpstr.find("->") != string::npos) {
+ sscanf(tmp, "%[^->]->%s", ttt, tmpn);
+ tmpid = tmpn;
+ } else {
+ tmpid = tmpstr;
+ //fprintf(stdout,"%s;%s;%s;%s;%s\n",tmp,ttt,tmpn,tmpid.c_str(),idnames[k].c_str());
+ }
+ if (tmpid != idnames[k]) {
+ fprintf(stderr,
+ "phenofile and dosefile did not match at line %d ",
+ i + 2);
+ cerr << "(" << tmpid << " != " << idnames[k] << ")\n";
+ fclose(infile);
+ exit(1);
+ }
+ }
+ for (int j = 1; j < skipd; j++) {
+ fscanf(infile, "%s", tmp);
+ }
+ for (int j = 0; j < (nsnps * ngpreds); j++) {
+ int a = fscanf(infile, "%s", tmp);
+ if (!a || a == EOF) {
+ fprintf(stderr,
+ "cannot read dose-file: check skipd and ngpreds parameters\n");
+ fclose(infile);
+ exit(1);
+ }
+ G.put(atof(tmp), k, j);
+ }
+ k++;
+ } else {
+ for (int j = 0; j < skipd; j++)
+ fscanf(infile, "%s", tmp);
+ for (int j = 0; j < (nsnps * ngpreds); j++)
+ fscanf(infile, "%s", tmp);
+ }
+ fclose(infile);
+}
+// HERE NEED A NEW CONSTRUCTOR BASED ON DATABELBASECPP OBJECT
+gendata::~gendata() {
+
+ if (DAG != NULL) {
+ delete DAG;
+ delete[] DAGmask;
+ }
+
+ // delete G;
+}
+
Property changes on: branches/ProbABEL-refactoring/ProbABEL/src/gendata.cpp
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:mime-type
+ text/plain
Added: branches/ProbABEL-refactoring/ProbABEL/src/gendata.h
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/gendata.h (rev 0)
+++ branches/ProbABEL-refactoring/ProbABEL/src/gendata.h 2012-03-28 22:38:42 UTC (rev 868)
@@ -0,0 +1,44 @@
+/*
+ * gendata.h
+ *
+ * Created on: Mar 8, 2012
+ * Author: mkooyman
+ */
+
+#ifndef GENDATA_H_
+#define GENDATA_H_
+#include <string>
+#include "fvlib/FileVector.h"
+#include "mematrix.h"
+
+
+class gendata
+{
+public:
+ int nsnps;
+ int nids;
+ int ngpreds;
+ gendata();
+ void re_gendata(char * fname, int insnps, int ingpreds,
+ int npeople, int nmeasured,
+ unsigned short int * allmeasured,
+ int skipd,
+ std::string * idnames);
+ void re_gendata(string filename, int insnps, int ingpreds,
+ int npeople, int nmeasured,
+ unsigned short int * allmeasured,
+ std::string * idnames);
+ void get_var(int var, float * data);
+ ~gendata();
+
+ // MAKE THAT PRIVATE, ACCESS THROUGH GET_SNP
+ // ANOTHER PRIVATE OBJECT IS A POINTER TO DATABELBASECPP
+ // UPDATE SNP, ALL REGRESSION METHODS: ACCOUNT FOR MISSING
+private:
+ mematrix <float> G;
+ AbstractMatrix * DAG;
+ unsigned short int * DAGmask;
+ // mematrix<double> G;
+};
+
+#endif /* GENDATA_H_ */
Property changes on: branches/ProbABEL-refactoring/ProbABEL/src/gendata.h
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:mime-type
+ text/plain
Added: branches/ProbABEL-refactoring/ProbABEL/src/phedata.cpp
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/phedata.cpp (rev 0)
+++ branches/ProbABEL-refactoring/ProbABEL/src/phedata.cpp 2012-03-28 22:38:42 UTC (rev 868)
@@ -0,0 +1,195 @@
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <cstdarg>
+#include <cstdlib>
+#include <phedata.h>
+
+
+phedata::phedata(char * fname, int noutc, int npeople, int interaction,
+ bool iscox) {
+ setphedata(fname, noutc, npeople, interaction, iscox);
+}
+
+void phedata::set_is_interaction_excluded(bool int_exl){
+ is_interaction_excluded=int_exl;
+}
+void phedata::setphedata(char * fname, int noutc, int npeople, int interaction,
+ bool iscox) {
+ static const unsigned int BFS = 1000;
+ std::ifstream myfile(fname);
+ char line[BFS];
+ char tmp[100];
+ noutcomes = noutc;
+
+ int nphenocols = 0;
+ int savenpeople = npeople;
+ npeople = 0;
+ if (myfile.is_open()) {
+ myfile.getline(line, BFS);
+ std::stringstream line_stream(line);
+ // std::cout << line << "\n ";
+ while (line_stream >> tmp) {
+
+ nphenocols++;
+ // std::cout << tmp << " " << nphenocols << " ";
+ }
+ while (myfile.getline(line, BFS)) {
+ int tmplins = 0;
+ std::stringstream line_stream(line);
+ while (line_stream >> tmp)
+ tmplins++;
+ if (tmplins != nphenocols) {
+ fprintf(stderr,
+ "phenofile: number of variables different from %d in line %d\n",
+ nphenocols, tmplins);
+ myfile.close();
+ exit(1);
+ }
+ npeople++;
+ };
+ myfile.close();
+ } else {
+ fprintf(stderr, "Unable to open file %s\n", fname);
+ exit(1);
+ }
+ fprintf(stdout, "Actual number of people in phenofile = %d", npeople);
+ if (savenpeople > 0) {
+ npeople = savenpeople;
+ fprintf(stdout, "; using only %d first\n", npeople);
+ } else {
+ fprintf(stdout, "; using all of these\n");
+ }
+
+ ncov = nphenocols - 1 - noutcomes;
+ nids_all = npeople;
+ model_terms = new std::string[ncov + 2];
+
+ FILE * infile;
+ // first pass -- find unmeasured people
+ if ((infile = fopen(fname, "r")) == NULL) {
+ fprintf(stderr, "phedata: cannot open file %s\n", fname);
+ }
+
+ fscanf(infile, "%s", tmp);
+ model = "( ";
+ fscanf(infile, "%s", tmp);
+ model = model + tmp;
+ for (int i = 1; i < noutcomes; i++) {
+ fscanf(infile, "%s", tmp);
+ model = model + " , ";
+ model = model + tmp;
+ }
+ n_model_terms = 0;
+#if COXPH
+ model = model + " ) ~ ";
+#else
+ model = model + " ) ~ mu + ";
+ model_terms[n_model_terms++] = "mu";
+#endif
+
+ if (nphenocols > noutcomes + 1) {
+ fscanf(infile, "%s", tmp);
+ model = model + tmp;
+ model_terms[n_model_terms++] = tmp;
+ for (int i = (2 + noutcomes); i < nphenocols; i++) {
+ fscanf(infile, "%s", tmp);
+
+ // if(iscox && ) {if(n_model_terms+1 == interaction-1) {continue;} }
+ // else {if(n_model_terms+1 == interaction) {continue;} }
+ model = model + " + ";
+ model = model + tmp;
+ model_terms[n_model_terms++] = tmp;
+ }
+ }
+ model = model + " + SNP_A1";
+ if (interaction != 0) {
+ if (iscox) {
+ model = model + " + " + model_terms[interaction - 1] + "*SNP_A1";
+ } else {
+ model = model + " + " + model_terms[interaction] + "*SNP_A1";
+ }
+ }
+ model_terms[n_model_terms++] = "SNP_A1";
+
+ if (is_interaction_excluded) // exclude covariates from covariate names
+ {
+ if (iscox) {
+ std::cout << "model is running without "
+ << model_terms[interaction - 1] << ", term\n";
+ } else {
+ std::cout << "model is running without " << model_terms[interaction]
+ << ", term\n";
+ }
+ }
+
+#if LOGISTIC
+ std::cout << "Logistic ";
+#elif LINEAR
+ std::cout << "Linear ";
+#elif COXPH
+ std::cout << "Coxph ";
+#else
+ std::cout << "Unrecognised ";
+#endif
+ std::cout << "model: " << model << "\n";
+
+ allmeasured = new unsigned short int[npeople];
+ nids = 0;
+ for (int i = 0; i < npeople; i++) {
+ allmeasured[i] = 1;
+ for (int j = 0; j < nphenocols; j++) {
+ fscanf(infile, "%s", tmp);
+ if (j > 0 && (tmp[0] == 'N' || tmp[0] == 'n'))
+ allmeasured[i] = 0;
+ }
+ if (allmeasured[i] == 1)
+ nids++;
+ }
+ fclose(infile);
+ // printf("npeople = %d, no. all measured = %d\n",nids_all,nids);
+
+ // allocate objects
+ int ntmpcov = 1;
+ if (ncov > 0)
+ ntmpcov = ncov;
+ idnames = new std::string[nids];
+ X.reinit(nids, ntmpcov);
+ Y.reinit(nids, noutcomes);
+
+ // second pass -- read the data
+ if ((infile = fopen(fname, "r")) == NULL) {
+ fprintf(stderr, "phedata: cannot open file %s\n", fname);
+ exit(1);
+ }
+
+ for (int i = 0; i < nphenocols; i++) {
+ fscanf(infile, "%s", tmp);
+ }
+
+ int k = 0;
+ int m = 0;
+ for (int i = 0; i < npeople; i++)
+ if (allmeasured[i] == 1) {
+ fscanf(infile, "%s", tmp);
+ idnames[m] = tmp;
+ for (int j = 0; j < noutcomes; j++) {
+ fscanf(infile, "%s", tmp);
+ Y.put(atof(tmp), m, j);
+ }
+ for (int j = (1 + noutcomes); j < nphenocols; j++) {
+ fscanf(infile, "%s", tmp);
+ X.put(atof(tmp), m, (j - 1 - noutcomes));
+ }
+ m++;
+ } else
+ for (int j = 0; j < nphenocols; j++)
+ fscanf(infile, "%s", tmp);
+ fclose(infile);
+}
+phedata::~phedata() {
+ // delete X;
+ // delete Y;
+ // delete [] allmeasured;
+}
+
Property changes on: branches/ProbABEL-refactoring/ProbABEL/src/phedata.cpp
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:mime-type
+ text/plain
Added: branches/ProbABEL-refactoring/ProbABEL/src/phedata.h
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/phedata.h (rev 0)
+++ branches/ProbABEL-refactoring/ProbABEL/src/phedata.h 2012-03-28 22:38:42 UTC (rev 868)
@@ -0,0 +1,41 @@
+/*
+ * phedata.h
+ *
+ * Created on: Mar 6, 2012
+ * Author: mkooyman
+ */
+
+#ifndef PHEDATA_H_
+#define PHEDATA_H_
+
+#include "mematrix.h"
+
+class phedata {
+private:
+
+ bool is_interaction_excluded;
+public:
+ phedata() {
+
+ }
+ phedata(char * fname, int noutc, int npeople, int interaction, bool iscox);
+
+ void setphedata(char * fname, int noutc, int npeople, int interaction,
+ bool iscox);
+
+ int nids_all;
+ int nids;
+ int noutcomes;
+ int ncov;
+ unsigned short int * allmeasured;
+ mematrix<double> X;
+ mematrix<double> Y;
+ std::string * idnames;
+ std::string model;
+ std::string * model_terms;
+ int n_model_terms;
+ void set_is_interaction_excluded(bool inter_excluded);
+ ~phedata();
+
+};
+#endif /* PHEDATA_H_ */
Property changes on: branches/ProbABEL-refactoring/ProbABEL/src/phedata.h
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:mime-type
+ text/plain
More information about the Genabel-commits
mailing list