[Genabel-commits] r869 - branches/ProbABEL-refactoring/ProbABEL/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Mar 29 00:39:46 CEST 2012


Author: maartenk
Date: 2012-03-29 00:39:45 +0200 (Thu, 29 Mar 2012)
New Revision: 869

Modified:
   branches/ProbABEL-refactoring/ProbABEL/src/data.h
Log:
split data.h into smaller files (part1)(addition to previous commit)

Modified: branches/ProbABEL-refactoring/ProbABEL/src/data.h
===================================================================
--- branches/ProbABEL-refactoring/ProbABEL/src/data.h	2012-03-28 22:38:42 UTC (rev 868)
+++ branches/ProbABEL-refactoring/ProbABEL/src/data.h	2012-03-28 22:39:45 UTC (rev 869)
@@ -1,944 +1,93 @@
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <cstdarg>
-#include "mematrix.h"
+/*
+ * data.h
+ *
+ *  Created on: Mar 8, 2012
+ *      Author: mkooyman
+ */
 
-#include "fvlib/AbstractMatrix.h"
-#include "fvlib/CastUtils.h"
-#include "fvlib/const.h"
-#include "fvlib/convert_util.h"
-#include "fvlib/FileVector.h"
-#include "fvlib/frutil.h"
-#include "fvlib/frversion.h"
-#include "fvlib/Logger.h"
-#include "fvlib/Transposer.h"
+#ifndef DATA_H_
+#define DATA_H_
 
 extern bool is_interaction_excluded;
 
-void error(const char * format, ...) {
-    va_list args;
-    char buffer[256];
-    va_start(args, format);
-    vsprintf(buffer, format, args);
-    va_end(args);
+unsigned int Nmeasured(char * fname, int nphenocols, int npeople);
+#include "phedata.h"
+#include "gendata.h"
 
-    printf("ERROR: %s\n", buffer);
-    exit(EXIT_FAILURE);
-}
 
-unsigned int Nmeasured(char * fname, int nphenocols, int npeople) {
-    int ncov = nphenocols - 2;
-    int nids_all = npeople;
-
-    // first pass -- find unmeasured people
-    std::ifstream infile(fname);
-    if (!infile) {
-        std::cerr << "Nmeasured: cannot open file " << fname << endl;
-    }
-
-    char tmp[100];
-
-    for (int i = 0; i < nphenocols; i++) {
-        infile >> tmp;
-    }
-
-    unsigned short int * allmeasured = new unsigned short int[npeople];
-    int nids = 0;
-    for (int i = 0; i < npeople; i++) {
-        allmeasured[i] = 1;
-        infile >> tmp;
-        for (int j = 1; j < nphenocols; j++) {
-            infile >> tmp;
-            if (tmp[0] == 'N' || tmp[0] == 'n')
-                allmeasured[i] = 0;
-        }
-        if (allmeasured[i] == 1)
-            nids++;
-    }
-    infile.close();
-
-    delete[] allmeasured;
-
-    return (nids);
-}
-
-class phedata {
+class regdata
+{
 public:
-    int nids_all;
-    int nids;
-    int noutcomes;
-    int ncov;
-    unsigned short int * allmeasured;
-    mematrix<double> X;
-    mematrix<double> Y;
-    std::string * idnames;
-    std::string model;
-    std::string * model_terms;
-    int n_model_terms;
-    phedata(char * fname, int noutc, int npeople, int interaction, bool iscox) {
-        static const unsigned int BFS = 1000;
-        std::ifstream myfile(fname);
-        char line[BFS];
-        char tmp[100];
-        noutcomes = noutc;
-
-        int nphenocols = 0;
-        int savenpeople = npeople;
-        npeople = 0;
-        if (myfile.is_open()) {
-            myfile.getline(line, BFS);
-            std::stringstream line_stream(line);
-            //			std::cout << line << "\n ";
-            while (line_stream >> tmp) {
-
-                nphenocols++;
-                //				std::cout << tmp << " " << nphenocols << " ";
-            }
-            while (myfile.getline(line, BFS)) {
-                int tmplins = 0;
-                std::stringstream line_stream(line);
-                while (line_stream >> tmp)
-                    tmplins++;
-                if (tmplins != nphenocols) {
-                    std::cerr
-                            << "phenofile: number of variables different from "
-                            << nphenocols << " in line " << tmplins << endl;
-                    myfile.close();
-                    exit(1);
-                }
-                npeople++;
-            };
-            myfile.close();
-        } else {
-            std::cerr << "Unable to open file " << fname << endl;
-            exit(1);
-        }
-        std::cout << "Actual number of people in phenofile = " << npeople;
-        if (savenpeople > 0) {
-            npeople = savenpeople;
-            std::cout << "; using only " << npeople << " first\n";
-        } else {
-            std::cout << "; using all of these\n";
-        }
-
-        ncov = nphenocols - 1 - noutcomes;
-        nids_all = npeople;
-        model_terms = new std::string[ncov + 2];
-
-        // first pass -- find unmeasured people
-        std::ifstream infile(fname);
-        if (!infile) {
-            std::cerr << "phedata: cannot open file " << fname << endl;
-        }
-
-        infile >> tmp;
-        model = "( ";
-        infile >> tmp;
-        model = model + tmp;
-        for (int i = 1; i < noutcomes; i++) {
-            infile >> tmp;
-            model = model + " , ";
-            model = model + tmp;
-        }
-        n_model_terms = 0;
-#if COXPH
-        model = model + " ) ~ ";
-#else
-        model = model + " ) ~ mu + ";
-        model_terms[n_model_terms++] = "mu";
-#endif
-
-        if (nphenocols > noutcomes + 1) {
-            infile >> tmp;
-            model = model + tmp;
-            model_terms[n_model_terms++] = tmp;
-            for (int i = (2 + noutcomes); i < nphenocols; i++) {
-                infile >> tmp;
-
-                //				if(iscox && ) {if(n_model_terms+1 == interaction-1) {continue;} }
-                //				else      {if(n_model_terms+1 == interaction) {continue;} }
-                model = model + " + ";
-                model = model + tmp;
-                model_terms[n_model_terms++] = tmp;
-            }
-        }
-        model = model + " + SNP_A1";
-        if (interaction != 0) {
-            if (iscox) {
-                model = model + " + " + model_terms[interaction - 1]
-                        + "*SNP_A1";
-            } else {
-                model = model + " + " + model_terms[interaction] + "*SNP_A1";
-            }
-        }
-        model_terms[n_model_terms++] = "SNP_A1";
-
-        if (is_interaction_excluded) // exclude covariates from covariate names
-        {
-            if (iscox) {
-                std::cout << "model is running without "
-                        << model_terms[interaction - 1] << ", term\n";
-            } else {
-                std::cout << "model is running without "
-                        << model_terms[interaction] << ", term\n";
-            }
-        }
-
-#if LOGISTIC
-        std::cout << "Logistic ";
-#elif LINEAR
-        std::cout << "Linear ";
-#elif COXPH
-        std::cout << "Coxph ";
-#else
-        std::cout << "Unrecognised ";
-#endif
-        std::cout << "model: " << model << "\n";
-
-        allmeasured = new unsigned short int[npeople];
-        nids = 0;
-        for (int i = 0; i < npeople; i++) {
-            allmeasured[i] = 1;
-            for (int j = 0; j < nphenocols; j++) {
-                infile >> tmp;
-                if (j > 0 && (tmp[0] == 'N' || tmp[0] == 'n'))
-                    allmeasured[i] = 0;
-            }
-            if (allmeasured[i] == 1)
-                nids++;
-        }
-        infile.close();
-        //		printf("npeople = %d, no. all measured = %d\n",nids_all,nids);
-
-        // allocate objects
-        int ntmpcov = 1;
-        if (ncov > 0)
-            ntmpcov = ncov;
-        idnames = new std::string[nids];
-        X.reinit(nids, ntmpcov);
-        Y.reinit(nids, noutcomes);
-
-        // second pass -- read the data
-        infile.open(fname);
-        if (!infile) {
-            std::cerr << "phedata: cannot open file " << fname << endl;
-            exit(1);
-        }
-
-        for (int i = 0; i < nphenocols; i++) {
-            infile >> tmp;
-        }
-
-        int k = 0;
-        int m = 0;
-        for (int i = 0; i < npeople; i++)
-            if (allmeasured[i] == 1) {
-                infile >> tmp;
-                idnames[m] = tmp;
-                for (int j = 0; j < noutcomes; j++) {
-                    infile >> tmp;
-                    Y.put(atof(tmp), m, j);
-                }
-                for (int j = (1 + noutcomes); j < nphenocols; j++) {
-                    infile >> tmp;
-                    X.put(atof(tmp), m, (j - 1 - noutcomes));
-                }
-                m++;
-            } else
-                for (int j = 0; j < nphenocols; j++)
-                    infile >> tmp;
-        infile.close();
-    }
-    ~phedata() {
-        delete[] model_terms;
-        delete[] idnames;
-        delete[] allmeasured;
-        // delete X;
-        // delete Y;
-    }
-};
-
-class gendata {
-public:
-    int nsnps;
-    int nids;
-    int ngpreds;
-    gendata();
-    void re_gendata(char * fname, int insnps, int ingpreds, int npeople,
-            int nmeasured, unsigned short int * allmeasured, int skipd,
-            std::string * idnames);
-    void re_gendata(string filename, int insnps, int ingpreds, int npeople,
-            int nmeasured, unsigned short int * allmeasured,
-            std::string * idnames);
-    ~gendata();
-    void get_var(int var, float * data);
-    // MAKE THAT PRIVATE, ACCESS THROUGH GET_SNP
-    // ANOTHER PRIVATE OBJECT IS A POINTER TO DATABELBASECPP
-    // UPDATE SNP, ALL REGRESSION METHODS: ACCOUNT FOR MISSING
+	int nids;
+	int ncov;
+	int ngpreds;
+	int noutcomes;
+	unsigned short int * masked_data;
+	mematrix<double> X;
+	mematrix<double> Y;
+	regdata(){}
+	regdata(const regdata &obj) ;
+	regdata(phedata &phed, gendata &gend, int snpnum);
+	mematrix<double>  extract_genotypes();
+	void update_snp(gendata &gend, int snpnum);
+	regdata get_unmasked_data();
+	~regdata();
 private:
-    mematrix<float> G;
-    AbstractMatrix * DAG;
-    unsigned short int * DAGmask;
-    //	mematrix<double> G;
-};
 
-void gendata::get_var(int var, float * data) {
-    if (DAG == NULL)
-        for (int i = 0; i < G.nrow; i++)
-            data[i] = G.get(i, var);
-    else if (DAG != NULL) {
-        float tmpdata[DAG->getNumObservations()];
-        DAG->readVariableAs((unsigned long int) var, tmpdata);
-        unsigned int j = 0;
-        for (unsigned int i = 0; i < DAG->getNumObservations(); i++)
-            if (!DAGmask[i])
-                data[j++] = tmpdata[i];
-        //fprintf(stdout,"%i %i %i\n",j,DAG->get_nobservations(),nids);
-    } else
-        error("cannot get gendata");
-}
-
-gendata::gendata() {
-    nsnps = nids = ngpreds = 0;
-}
-
-void gendata::re_gendata(string filename, int insnps, int ingpreds, int npeople,
-        int nmeasured, unsigned short int * allmeasured,
-        std::string * idnames) {
-    nsnps = insnps;
-    ngpreds = ingpreds;
-    DAG = new FileVector(filename, 128, true);
-    DAGmask = new unsigned short int[DAG->getNumObservations()];
-    if (DAG->getNumObservations() != npeople)
-        error("dimension of fvf-data and phenotype data do not match\n");
-    if (DAG->getNumVariables() != insnps * ingpreds)
-        error("dimension of fvf-data and mlinfo data do not match\n");
-    long int j = -1;
-    for (unsigned int i = 0; i < npeople; i++) {
-        if (allmeasured[i] == 0)
-            DAGmask[i] = 1;
-        else {
-            DAGmask[i] = 0;
-            j++;
-        }
-        string DAGobsname = DAG->readObservationName(i).name;
-
-        if (DAGobsname.find("->") != string::npos)
-            DAGobsname = DAGobsname.substr(DAGobsname.find("->") + 2);
-
-//if (allmeasured[i] && idnames[j] != DAGobsname)
-        //		error("names do not match for observation at phenofile line (phe/geno) %i/+1 (%s/%s)\n",
-        //			i+1,idnames[i].c_str(),DAGobsname.c_str());
-        // fix thanks to Vadym Pinchuk
-        if (allmeasured[i] && idnames[j] != DAGobsname)
-            error(
-                    "names do not match for observation at phenofile line(phe/geno) %i/+1 (%s/%s)\n",
-                    i + 1, idnames[j].c_str(), DAGobsname.c_str());
-
-    }
-    nids = j + 1;
-    //fprintf(stdout,"in INI: %i %i\n",nids,npeople);
-    if (nids != nmeasured)
-        error("nids != mneasured (%i != %i)\n", nids, nmeasured);
-
-}
-
-void gendata::re_gendata(char * fname, int insnps, int ingpreds, int npeople,
-        int nmeasured, unsigned short int * allmeasured, int skipd,
-        std::string * idnames) {
-    nids = nmeasured;
-    nsnps = insnps;
-    ngpreds = ingpreds;
-    DAG = NULL;
-    //	int nids_all = npeople;
-
-    G.reinit(nids, (nsnps * ngpreds));
-
-    std::ifstream infile;
-
-    infile.open(fname);
-    if (!infile) {
-        std::cerr << "gendata: cannot open file " << fname << endl;
-    }
-
-    char tmp[100], tmpn[100];
-    std::string tmpid, tmpstr;
-
-    int k = 0;
-    for (int i = 0; i < npeople; i++)
-        if (allmeasured[i] == 1) {
-            if (skipd > 0) {
-                //				int ttt;
-                char ttt[100];
-                infile >> tmp;
-                //				sscanf(tmp,"%d->%s",&ttt, tmpn);
-                //		these changes are thanks to BMM & BAP :)
-                //				sscanf(tmp,"%s->%s",&ttt, tmpn);
-                //				sscanf(tmp,"%[^->]->%[^->]",&ttt, tmpn);
-                tmpstr = tmp;
-                if (tmpstr.find("->") != string::npos) {
-                    sscanf(tmp, "%[^->]->%s", ttt, tmpn);
-                    tmpid = tmpn;
-                } else {
-                    tmpid = tmpstr;
-                    //fprintf(stdout,"%s;%s;%s;%s;%s\n",tmp,ttt,tmpn,tmpid.c_str(),idnames[k].c_str());
-                }
-                if (tmpid != idnames[k]) {
-                    fprintf(stderr,
-                            "phenofile and dosefile did not match at line %d ",
-                            i + 2);
-                    cerr << "(" << tmpid << " != " << idnames[k] << ")\n";
-                    infile.close();
-                    exit(1);
-                }
-            }
-            for (int j = 1; j < skipd; j++) {
-                infile >> tmp;
-            }
-            for (int j = 0; j < (nsnps * ngpreds); j++) {
-                if (infile.good()) {
-                    infile >> tmp;
-                } else {
-                    std::cerr
-                            << "cannot read dose-file: check skipd and ngpreds parameters\n";
-                    infile.close();
-                    exit(1);
-                }
-                G.put(atof(tmp), k, j);
-            }
-            k++;
-        } else {
-            for (int j = 0; j < skipd; j++)
-                infile >> tmp;
-            for (int j = 0; j < (nsnps * ngpreds); j++)
-                infile >> tmp;
-        }
-    infile.close();
-}
-
-// HERE NEED A NEW CONSTRUCTOR BASED ON DATABELBASECPP OBJECT
-gendata::~gendata() {
-
-    if (DAG != NULL) {
-        delete DAG;
-        delete[] DAGmask;
-    }
-
-    //		delete G;
-}
-
-class regdata {
-public:
-    int nids;
-    int ncov;
-    int ngpreds;
-    int noutcomes;
-    unsigned short int * masked_data;
-    mematrix<double> X;
-    mematrix<double> Y;
-
-    regdata() {
-    }
-    regdata(const regdata &obj) {
-        nids = obj.nids;
-        ncov = obj.ncov;
-        ngpreds = obj.ngpreds;
-        noutcomes = obj.noutcomes;
-        X = obj.X;
-        Y = obj.Y;
-        masked_data = new unsigned short int[nids];
-        for (int i = 0; i < nids; i++)
-            masked_data[i] = 0;
-    }
-    regdata(phedata &phed, gendata &gend, int snpnum) {
-        nids = gend.nids;
-        masked_data = new unsigned short int[nids];
-        for (int i = 0; i < nids; i++)
-            masked_data[i] = 0;
-        ngpreds = gend.ngpreds;
-        if (snpnum >= 0)
-            ncov = phed.ncov + ngpreds;
-        else
-            ncov = phed.ncov;
-        noutcomes = phed.noutcomes;
-        X.reinit(nids, (ncov + 1));
-        Y.reinit(nids, noutcomes);
-        for (int i = 0; i < nids; i++) {
-            X.put(1., i, 0);
-            Y.put((phed.Y).get(i, 0), i, 0);
-        }
-        for (int j = 1; j <= phed.ncov; j++)
-            for (int i = 0; i < nids; i++)
-                X.put((phed.X).get(i, j - 1), i, j);
-        if (snpnum > 0)
-            for (int j = 0; j < ngpreds; j++) {
-                float snpdata[nids];
-                gend.get_var(snpnum * ngpreds + j, snpdata);
-                for (int i = 0; i < nids; i++)
-                    X.put(snpdata[i], i, (ncov - ngpreds + 1 + j));
-            }
-        //			for (int i=0;i<nids;i++)
-        //				for (int j=0;j<ngpreds;j++)
-        //					X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+1+j));
-    }
-    void update_snp(gendata &gend, int snpnum) {
-        for (int j = 0; j < ngpreds; j++) {
-            float snpdata[nids];
-            for (int i = 0; i < nids; i++)
-                masked_data[i] = 0;
-            gend.get_var(snpnum * ngpreds + j, snpdata);
-            for (int i = 0; i < nids; i++) {
-                X.put(snpdata[i], i, (ncov + 1 - j - 1));
-                if (isnan(snpdata[i]))
-                    masked_data[i] = 1;
-            }
-        }
-    }
-    ~regdata() {
-        delete[] masked_data;
-        //		delete X;
-        //		delete Y;
-    }
-
-    regdata get_unmasked_data() {
-        regdata to; // = regdata(*this);
-        int nmeasured = 0;
-        for (int i = 0; i < nids; i++)
-            if (masked_data[i] == 0)
-                nmeasured++;
-        to.nids = nmeasured;
-        //cout << to.nids << " in get_unmasked_data\n";
-        to.ncov = ncov;
-        to.ngpreds = ngpreds;
-        to.noutcomes = noutcomes;
-        int dim2Y = Y.ncol;
-        int dim2X = X.ncol;
-        (to.X).reinit(to.nids, dim2X);
-        (to.Y).reinit(to.nids, dim2Y);
-
-        int j = 0;
-        for (int i = 0; i < nids; i++) {
-            if (masked_data[i] == 0) {
-                for (int nc = 0; nc < dim2X; nc++)
-                    (to.X).put(X.get(i, nc), j, nc);
-                for (int nc = 0; nc < dim2Y; nc++)
-                    (to.Y).put(Y.get(i, nc), j, nc);
-                j++;
-            }
-        }
-
-        //delete [] to.masked_data;
-        to.masked_data = new unsigned short int[to.nids];
-        for (int i = 0; i < to.nids; i++)
-            to.masked_data[i] = 0;
-        //fprintf(stdout,"get_unmasked: %i %i %i\n",to.nids,dim2X,dim2Y);
-        return (to);
-    }
-
-    mematrix<double> extract_genotypes(void) {
-        mematrix<double> out;
-        out.reinit(X.nrow, ngpreds);
-        for (int i = 0; i < X.nrow; i++)
-            for (int j = 0; j < ngpreds; j++)
-                out[i * ngpreds + j] = X.get(i, (ncov - ngpreds + 1 + j));
-        return out;
-    }
 };
 
-// compare for sort of times
-int cmpfun(const void *a, const void *b) {
-    double el1 = *(double*) a;
-    double el2 = *(double*) b;
-    if (el1 > el2)
-        return 1;
-    if (el1 < el2)
-        return -1;
-    if (el1 == el2)
-        return 0;
-}
-
-class coxph_data {
+class coxph_data
+{
 public:
-    int nids;
-    int ncov;
-    int ngpreds;
-    mematrix<double> weights;
-    mematrix<double> stime;
-    mematrix<int> sstat;
-    mematrix<double> offset;
-    mematrix<int> strata;
-    mematrix<double> X;
-    mematrix<int> order;
-    unsigned short int * masked_data;
+	int nids;
+	int ncov;
+	int ngpreds;
+	mematrix<double> weights;
+	mematrix<double> stime;
+	mematrix<int>    sstat;
+	mematrix<double> offset;
+	mematrix<int>    strata;
+	mematrix<double> X;
+	mematrix<int>    order;
+	unsigned short int * masked_data;
+	coxph_data get_unmasked_data();
+	coxph_data(){}
+	coxph_data(const coxph_data &obj);
+	coxph_data(phedata &phed, gendata &gend, int snpnum);
+	void update_snp(gendata &gend, int snpnum);
+	~coxph_data();
 
-    coxph_data() {
-    }
-
-    coxph_data(const coxph_data &obj) {
-        nids = obj.nids;
-        ncov = obj.ncov;
-        ngpreds = obj.ngpreds;
-        weights = obj.weights;
-        stime = obj.stime;
-        sstat = obj.sstat;
-        offset = obj.offset;
-        strata = obj.strata;
-        X = obj.X;
-        order = obj.order;
-        masked_data = new unsigned short int[nids];
-        for (int i = 0; i < nids; i++)
-            masked_data[i] = 0;
-    }
-    coxph_data(phedata &phed, gendata &gend, int snpnum) {
-        nids = gend.nids;
-        masked_data = new unsigned short int[nids];
-        for (int i = 0; i < nids; i++)
-            masked_data[i] = 0;
-        ngpreds = gend.ngpreds;
-        if (snpnum >= 0)
-            ncov = phed.ncov + ngpreds;
-        else
-            ncov = phed.ncov;
-        if (phed.noutcomes != 2) {
-            fprintf(stderr,
-                    "coxph_data: number of outcomes should be 2 (now: %d)\n",
-                    phed.noutcomes);
-            exit(1);
-        }
-        //		X.reinit(nids,(ncov+1));
-        X.reinit(nids, ncov);
-        stime.reinit(nids, 1);
-        sstat.reinit(nids, 1);
-        weights.reinit(nids, 1);
-        offset.reinit(nids, 1);
-        strata.reinit(nids, 1);
-        order.reinit(nids, 1);
-        for (int i = 0; i < nids; i++) {
-            //			X.put(1.,i,0);
-            stime[i] = (phed.Y).get(i, 0);
-            sstat[i] = int((phed.Y).get(i, 1));
-            if (sstat[i] != 1 & sstat[i] != 0) {
-                fprintf(stderr,
-                        "coxph_data: status not 0/1 (right order: id, fuptime, status ...)\n",
-                        phed.noutcomes);
-                exit(1);
-            }
-        }
-        for (int j = 0; j < phed.ncov; j++)
-            for (int i = 0; i < nids; i++)
-                X.put((phed.X).get(i, j), i, j);
-
-        if (snpnum > 0)
-            for (int j = 0; j < ngpreds; j++) {
-                float snpdata[nids];
-                gend.get_var(snpnum * ngpreds + j, snpdata);
-                for (int i = 0; i < nids; i++)
-                    X.put(snpdata[i], i, (ncov - ngpreds + j));
-            }
-
-        //			for (int i=0;i<nids;i++)
-        //				for (int j=0;j<ngpreds;j++)
-        //					X.put((gend.G).get(i,(snpnum*ngpreds+j)),i,(ncov-ngpreds+j));
-
-        for (int i = 0; i < nids; i++) {
-            weights[i] = 1.0;
-            offset[i] = 0.0;
-            strata[i] = 0;
-        }
-        // sort by time
-        double tmptime[nids];
-        int passed_sorted[nids];
-        for (int i = 0; i < nids; i++) {
-            tmptime[i] = stime[i];
-            passed_sorted[i] = 0;
-        }
-        qsort(tmptime, nids, sizeof(double), cmpfun);
-        for (int i = 0; i < nids; i++) {
-            int passed = 0;
-            for (int j = 0; j < nids; j++)
-                if (tmptime[j] == stime[i])
-                    if (!passed_sorted[j]) {
-                        order[i] = j;
-                        passed_sorted[j] = 1;
-                        passed = 1;
-                        break;
-                    }
-            if (passed != 1) {
-                fprintf(stderr, "cannot recover element %d\n", i);
-                exit(1);
-            }
-        }
-        stime = reorder(stime, order);
-        sstat = reorder(sstat, order);
-        weights = reorder(weights, order);
-        strata = reorder(strata, order);
-        offset = reorder(offset, order);
-        X = reorder(X, order);
-        X = transpose(X);
-        //		X.print();
-        //		offset.print();
-        //		weights.print();
-        //		stime.print();
-        //		sstat.print();
-    }
-    void update_snp(gendata &gend, int snpnum) {
-        // note this sorts by "order"!!!
-
-        for (int j = 0; j < ngpreds; j++) {
-            float snpdata[nids];
-            for (int i = 0; i < nids; i++)
-                masked_data[i] = 0;
-            gend.get_var(snpnum * ngpreds + j, snpdata);
-            for (int i = 0; i < nids; i++) {
-                X.put(snpdata[i], (ncov - ngpreds + j), order[i]);
-                if (isnan(snpdata[i]))
-                    masked_data[order[i]] = 1;
-            }
-        }
-        //		for (int i=0;i<nids;i++)
-        //			for (int j=0;j<ngpreds;j++)
-        //				X.put((gend.G).get(i,(snpnum*ngpreds+j)),(ncov-ngpreds+j),order[i]);
-    }
-    ~coxph_data() {
-        delete[] masked_data;
-        //		delete X;
-        //		delete sstat;
-        //		delete stime;
-        //		delete weights;
-        //		delete offset;
-        //		delete strata;
-        //		delete order;
-    }
-
-    coxph_data get_unmasked_data() {
-//		std::cout << " !!! in get_unmasked_data !!! ";
-        coxph_data to; // = coxph_data(*this);
-        // filter missing data
-
-        int nmeasured = 0;
-        for (int i = 0; i < nids; i++)
-            if (masked_data[i] == 0)
-                nmeasured++;
-        to.nids = nmeasured;
-//		std::cout << "nmeasured=" << nmeasured << "\n";
-        to.ncov = ncov;
-//		std::cout << "ncov=" << ncov << "\n";
-        to.ngpreds = ngpreds;
-        int dim1X = X.nrow;
-//		std::cout << "X.ncol=" << X.ncol << "\n";
-//		std::cout << "X.nrow=" << X.nrow << "\n";
-        (to.weights).reinit(to.nids, 1);
-        (to.stime).reinit(to.nids, 1);
-        (to.sstat).reinit(to.nids, 1);
-        (to.offset).reinit(to.nids, 1);
-        (to.strata).reinit(to.nids, 1);
-        (to.order).reinit(to.nids, 1);
-        (to.X).reinit(dim1X, to.nids);
-
-//		std::cout << "(to.X).ncol=" << (to.X).ncol << "\n";
-//		std::cout << "(to.X).nrow=" << (to.X).nrow << "\n";
-//		std::cout << " !!! just before cycle !!! ";
-        int j = 0;
-        for (int i = 0; i < nids; i++) {
-//			std::cout << nids << " " << i << " " << masked_data[i] << "\n";
-            if (masked_data[i] == 0) {
-                (to.weights).put(weights.get(i, 1), j, 1);
-                (to.stime).put(stime.get(i, 1), j, 1);
-                (to.sstat).put(sstat.get(i, 1), j, 1);
-                (to.offset).put(offset.get(i, 1), j, 1);
-                (to.strata).put(strata.get(i, 1), j, 1);
-                (to.order).put(order.get(i, 1), j, 1);
-                for (int nc = 0; nc < dim1X; nc++)
-                    (to.X).put(X.get(nc, i), nc, j);
-                j++;
-            }
-        }
-//		std::cout << " !!! just after cycle !!! ";
-
-        //delete [] to.masked_data;
-        to.masked_data = new unsigned short int[to.nids];
-        for (int i = 0; i < to.nids; i++)
-            to.masked_data[i] = 0;
-        //fprintf(stdout,"get_unmasked: %i %i %i\n",to.nids,dim2X,dim2Y);
-        return (to);
-    }
 };
 
-class mlinfo {
+class mlinfo
+{
 public:
-    int nsnps;
-    std::string * name;
-    std::string * A1;
-    std::string * A2;
-    double * Freq1;
-    double * MAF;
-    double * Quality;
-    double * Rsq;
-    std::string * map;
-
-    mlinfo(char * filename, char * mapname) {
-        char tmp[100];
-        unsigned int nlin = 0;
-
-        std::ifstream infile(filename);
-        if (infile.is_open()) {
-            while (infile.good()) {
-                infile >> tmp;
-                nlin++;
-            }
-            nlin--; // Subtract one, the previous loop added 1 too much
-        } else {
-            std::cerr << "mlinfo: cannot open file " << filename << endl;
-            exit(1);
-        }
-        infile.close();
-
-        if (nlin % 7) {
-            std::cerr << "mlinfo: number of columns != 7 in " << filename
-                    << endl;
-            exit(1);
-        }
-        nsnps = int(nlin / 7) - 1;
-        std::cout << "Number of SNPs = " << nsnps << endl;
-        name = new std::string[nsnps];
-        A1 = new std::string[nsnps];
-        A2 = new std::string[nsnps];
-        Freq1 = new double[nsnps];
-        MAF = new double[nsnps];
-        Quality = new double[nsnps];
-        Rsq = new double[nsnps];
-        map = new std::string[nsnps];
-
-        infile.open(filename);
-        if (!infile) { // file couldn't be opened
-            std::cerr << "mlinfo: cannot open file " << filename << endl;
-            exit(1);
-        }
-        /* Read the header and discard it */
-        for (int i = 0; i < 7; i++)
-            infile >> tmp;
-
-        for (int i = 0; i < nsnps; i++) {
-            infile >> tmp;
-            name[i] = tmp;
-            infile >> tmp;
-            A1[i] = tmp;
-            infile >> tmp;
-            A2[i] = tmp;
-            infile >> tmp;
-            Freq1[i] = atof(tmp);
-            infile >> tmp;
-            MAF[i] = atof(tmp);
-            infile >> tmp;
-            Quality[i] = atof(tmp);
-            infile >> tmp;
-            Rsq[i] = atof(tmp);
-            map[i] = "-999";
-        }
-        infile.close();
-
-        if (mapname != NULL) {
-            std::ifstream instr(mapname);
-            int BFS = 1000;
-            char line[BFS], tmp[BFS];
-            if (!instr.is_open()) {
-                std::cerr << "mlinfo: cannot open file " << mapname << endl;
-                exit(1);
-            }
-            instr.getline(line, BFS);
-            for (int i = 0; i < nsnps; i++) {
-                instr.getline(line, BFS);
-                std::stringstream line_stream(line);
-                line_stream >> tmp >> map[i];
-            }
-            instr.close();
-        }
-    }
-    ~mlinfo() {
-        delete[] name;
-        delete[] A1;
-        delete[] A2;
-        delete[] Freq1;
-        delete[] MAF;
-        delete[] Quality;
-        delete[] Rsq;
-        delete[] map;
-    }
+	int nsnps;
+	std::string * name;
+	std::string * A1;
+	std::string * A2;
+	double * Freq1;
+	double * MAF;
+	double * Quality;
+	double * Rsq;
+	std::string * map;
+	mlinfo(){}
+	mlinfo(char * filename, char * mapname);
+	~mlinfo();
 };
 
-//_________________________________________Maksim_start
-
 class InvSigma {
 
-public:
-
-    InvSigma(const char * filename_, phedata * phe) {
-        filename = filename_;
-        npeople = phe->nids;
-        std::ifstream myfile(filename_);
-        char * line = new char[MAXIMUM_PEOPLE_AMOUNT];
-        double val;
-        std::string id;
-        unsigned row = 0, col = 0;
-
-        matrix.reinit(npeople, npeople);
-
-        //idnames[k], if (allmeasured[i]==1)
-
-        if (myfile.is_open()) {
-            while (myfile.getline(line, MAXIMUM_PEOPLE_AMOUNT)) {
-
-                std::stringstream line_stream(line);
-                line_stream >> id;
-
-                if (phe->idnames[row] != id) {
-                    std::cerr << "error:in row " << row << " id="
-                            << phe->idnames[row]
-                            << " in inverse variance matrix but id=" << id
-                            << " must be there. Wrong inverse variance matrix (only measured id must be there)\n";
-                    exit(1);
-                }
-
-                while (line_stream >> val) {
-                    matrix.put(val, row, col);
-                    col++;
-                }
-
-                if (col != npeople) {
-                    fprintf(stderr,
-                            "error: inv file: Number of columns in row %d equals to %d but people amount is %d\n",
-                            row, col, npeople);
-                    myfile.close();
-                    exit(1);
-                }
-                col = 0;
-                row++;
-            }
-            myfile.close();
-        } else {
-            fprintf(stderr, "error: inv file: cannot open file '%s'\n",
-                    filename_);
-        }
-
-        delete[] line;
-    }
-    ;
-
-    ~InvSigma() {
-        //af af
-    }
-
-    mematrix<double> & get_matrix(void) {
-        return matrix;
-    }
-
 private:
 
-    static const unsigned MAXIMUM_PEOPLE_AMOUNT = 1000000;
-    unsigned npeople; //amount of people
-    std::string filename;
-    mematrix<double> matrix; //file is stored here
-
+	static const unsigned MAXIMUM_PEOPLE_AMOUNT = 1000000;
+	unsigned npeople; //amount of people
+	std::string filename;
+	mematrix<double> matrix; //file is stored here
+public:
+	InvSigma(const char * filename_, phedata * phe);
+	mematrix<double> & get_matrix();
+	~InvSigma();
 };
-//________________________________________Maksim_end
+
+#endif /* DATA_H_ */



More information about the Genabel-commits mailing list