[Genabel-commits] r1818 - in pkg/OmicABELnoMM: . doc src tests

Mon Sep 8 16:36:26 CEST 2014

Author: afrank
Date: 2014-09-08 16:36:26 +0200 (Mon, 08 Sep 2014)
New Revision: 1818

Modified:
   pkg/OmicABELnoMM/ChangeLog
   pkg/OmicABELnoMM/configure.ac
   pkg/OmicABELnoMM/doc/howtocompile.txt
   pkg/OmicABELnoMM/src/AIOwrapper.cpp
   pkg/OmicABELnoMM/src/AIOwrapper.h
   pkg/OmicABELnoMM/src/Algorithm.cpp
   pkg/OmicABELnoMM/tests/test.cpp
Log:
Removed individuals with covariates missing. Fixed SST Bug. Fixed Individuals Exclude list Bug. Added faster SST calculation

Modified: pkg/OmicABELnoMM/ChangeLog
===================================================================

--- pkg/OmicABELnoMM/ChangeLog	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/ChangeLog	2014-09-08 14:36:26 UTC (rev 1818)
@@ -8,7 +8,7 @@
 -Allow for runtime dosage models
 
 Optimizations:
--Remove individuals with covariates missing
+
 -Reduce memcpy overhead of XR and XR XL factors
 -Reduce computation time of XR and XR XL factors (do GEMMS)
 
@@ -17,8 +17,17 @@
 
 Changes
 -------------
-4 -9 - 2014
+-------------
 
+8-9-2014
+--------------
+Removed individuals with covariates missing
+Fixed SST Bug
+Fixed Individuals Exclude list Bug
+Added faster SST calculation
+
+4-9-2014
+--------------
 First user usable version.
 Several Bug-fixes.
 Added parameters to control thresholds of significance.
@@ -28,4 +37,4 @@
 Thresholds for text-files cannot include more results than the ones for binary files.
 Added bigger test files.
 Added program to transform binary result files  to text files with parametrized thresholds.
-Added HPC functions for sum of squares calculations.
\ No newline at end of file
+Added HPC functions for sum of squares calculations.

Modified: pkg/OmicABELnoMM/configure.ac
===================================================================
--- pkg/OmicABELnoMM/configure.ac	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/configure.ac	2014-09-08 14:36:26 UTC (rev 1818)
@@ -18,7 +18,7 @@
 # Set some default compile flags
 if test -z "$CXXFLAGS"; then
    # User did not set CXXFLAGS, so we can put in our own defaults
-   CXXFLAGS=" -O3"
+   CXXFLAGS=" -O3  -march=corei7 -mfpmath=sse -mtune=corei7 -flto -funroll-loops"
   #CXXFLAGS="-g -ggdb"
 fi
 if test -z "$CPPFLAGS"; then
@@ -37,8 +37,8 @@
 AC_OPENMP
 AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
 
-AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
-#AM_CXXFLAGS="-static  -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
+AM_CXXFLAGS="-static -O3  -march=corei7 -mfpmath=sse -mtune=corei7 -flto -funroll-loops -I../libs/include -I./libs/include $AM_CXXFLAGS"
+#AM_CXXFLAGS="-static  -I../libs/include -I./libs/include $AM_CXXFLAGS"
 # Checks for libraries.
 # pthread library
 AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
@@ -46,7 +46,7 @@
 ])
 
 if test -z "$LDFLAGS"; then
-	LDFLAGS="-L./libs/lib/ -L../libs/lib/"
+	LDFLAGS="-L./libs/lib -L../libs/lib"
 fi
 
 found_blas=0

Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
===================================================================
--- pkg/OmicABELnoMM/doc/howtocompile.txt	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/doc/howtocompile.txt	2014-09-08 14:36:26 UTC (rev 1818)
@@ -44,10 +44,10 @@
 
 ON the folder GWAS_PROJECT
 
-svn checkout svn+ssh://developername@svn.r-forge.r-project.org/svnroot/genabel/OmicABELnoMM
+svn checkout svn://svn.r-forge.r-project.org/svnroot/genabel/pkg/OmicABELnoMM/
 
 cd OmicABELnoMM
-
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:path_to_/OmicABELnoMM/libs/lib
 autoreconf -fi
 
 ./configure
@@ -114,7 +114,9 @@
 -e Minimum R2 to store in .bin will be above -10
 -s Significance to store in .bin will be P-val's below 0.2
 
+----------------------------------------getcpu info
 
+lscpu
 
 
 

Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-09-08 14:36:26 UTC (rev 1818)
@@ -51,8 +51,12 @@
 
         Yfvi  = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
         ALfvi = load_databel_fvi( (Fhandler->fnameAL+".fvi").c_str() );
-        ARfvi = load_databel_fvi( (Fhandler->fnameAR+".fvi").c_str() );
-        params.n = ALfvi->fvi_header.numObservations;
+        ARfvi = load_databel_fvi( (Fhandler->fnameAR+".fvi").c_str() );
+
+
+
+        params.n = ALfvi->fvi_header.numObservations;
+        Fhandler->fileN = params.n;
         params.m = ARfvi->fvi_header.numVariables/params.r;
         params.t = Yfvi->fvi_header.numVariables;
         params.l = ALfvi->fvi_header.numVariables;
@@ -112,8 +116,11 @@
 
     //params.fname_excludelist = "exclfile.txt";
     int excl_count = 0;
+    int Almissings = 0;
     Fhandler->excl_List = new list< pair<int,int> >();
 
+
+
     if(params.fname_excludelist.size()==0)
     {
         (Fhandler->excl_List)->push_back( make_pair(0,params.n) );
@@ -123,7 +130,14 @@
         read_excludeList( Fhandler->excl_List,excl_count,params.n,params.fname_excludelist);
     }
 
-    params.n -= excl_count;
+    if(!Fhandler->fakefiles)
+    {
+
+        removeALmissings(Fhandler->excl_List,params,Almissings);
+
+    }
+
+    params.n -= (excl_count + Almissings);
 
     params.p = params.l + params.r;
 
@@ -373,7 +387,7 @@
                 {
                     for (list<  pair<int,int>  >::iterator it=excl_List->begin(); it != excl_List->end(); ++it)
                     {
-                        file_pos = Fhandler->n*i + it->first;
+                        file_pos = Fhandler->fileN*i + it->first;
                         fseek ( fp_Y , file_pos*sizeof(type_precision) , SEEK_SET );
                         chunk_size_buff = it->second;
 
@@ -445,7 +459,7 @@
                 {
                     for (list<  pair<int,int>  >::iterator it=excl_List->begin(); it != excl_List->end(); ++it)
                     {
-                        file_pos = Fhandler->n*i + it->first;
+                        file_pos = Fhandler->fileN*i + it->first;
                         fseek ( fp_Ar , file_pos*sizeof(type_precision) , SEEK_SET );
 
                         chunk_size_buff = it->second;
@@ -1032,10 +1046,121 @@
 {
 
 }
+
+void AIOwrapper::removeALmissings(list< pair<int,int> >* excl_List,struct Settings params, int &Almissings)
+{
+    float* tempAL = new float[params.n*params.l];
+
+    FILE *fp;
+    fp = fopen((Fhandler->fnameAL+".fvd").c_str(), "rb");
+    if(fp == 0)
+    {
+        cout << "Error Reading File " << Fhandler->fnameAL << endl;
+        exit(1);
+    }
+    size_t result = fread (tempAL,sizeof(type_precision),params.n*params.l,fp); result++;
+    fclose(fp);
+
+    int prev=0;
+    list <int> missings;
+
+    for (int h=0; h < params.l; h++)
+    {
+        for (int i=0; i < params.n; i++)
+        {
+            if(isnan(tempAL[h*params.n+i]))
+            {
+                splitpair(i,excl_List,params );
+                missings.push_back(i);
+            }
+        }
+    }
+
+    missings.sort();
+    missings.unique();
+    Almissings = missings.size();
+
+    delete tempAL;
+
+
+}
+
+void AIOwrapper::splitpair(int value, list< pair<int,int> >* excl_List,struct Settings params)
+{
+
+
+    int buffsize = 0;
+
+    for (list<  pair<int,int>  >::iterator it=excl_List->begin(); it != excl_List->end(); ++it)
+    {
+        int inipos = it->first;
+        int endpos = it->first+it->second-1;
+        int origbuffsize = it->second;
+
+        if(value <=  endpos)
+        {
+           // cout << "(" << inipos<< ":" << endpos << ") ";
+
+            if(value > inipos && value < endpos)
+            {
+                buffsize = value-inipos;
+                excl_List->insert (it,make_pair(inipos,buffsize));
+                buffsize = origbuffsize-buffsize-1;
+                excl_List->insert (it,make_pair(value+1,buffsize));
+//                cout << inipos<< ":" << endpos << " ";
+//                it--;it--;
+//                cout << it->first<< ":" << it->second << " ";
+//                 it++;
+//                cout << it->first<< ":" << it->second << " | ";
+//                it++;
+
+                excl_List->remove((*it));
+
+                it = excl_List->end();
+            }
+            else
+            {
+                if(value == inipos && inipos != endpos)
+                {
+                    excl_List->insert (it,make_pair(min(inipos+1,params.n),origbuffsize-1));
+//                    cout << inipos<< ":" << endpos << " ";
+//                    it--;
+//                    cout << it->first<< ":" << it->second << " | ";
+//                    it++;
+
+                    excl_List->remove((*it));
+                    it = excl_List->end();
+                }
+                else
+                {
+                     if(value == endpos && endpos != inipos)
+                     {
+                        excl_List->insert (it,make_pair(inipos,origbuffsize-1));
+//                        cout << inipos<< ":" << endpos << " ";
+//                        it--;
+//                        cout << it->first<< ":" << it->second << " | ";
+//                        it++;
+
+                        excl_List->remove((*it));
+                        it = excl_List->end();
+                     }
+                     else
+                     {
+                        if(value == inipos && value == endpos && endpos == inipos)
+                        {
+                            excl_List->remove((*it));
+                            it = excl_List->end();
+                        }
+                     }
+                }
+            }
+        }
+    }
+}
 
+void AIOwrapper::load_AL(type_precision** AL)
+{
 
-void AIOwrapper::load_AL(type_precision** AL)
-{
     if(Fhandler->fakefiles)
     {
         FILE *fp;
@@ -1074,7 +1199,8 @@
         {
             for (list<  pair<int,int>  >::iterator it=excl_List->begin(); it != excl_List->end(); ++it)
             {
-                file_pos = i*Fhandler->n+ it->first;
+
+                file_pos = i*Fhandler->fileN+ it->first;
                 fseek ( fp , file_pos*sizeof(type_precision) , SEEK_SET );
                 chunk_size_buff = it->second;
 
@@ -1082,6 +1208,8 @@
                 buff_pos += chunk_size_buff;
             }
         }
+
+        //cout << Fhandler->n;
 
 
 //        size_t result = fread (Fhandler->AL,sizeof(type_precision),Fhandler->l*Fhandler->n,fp);

Modified: pkg/OmicABELnoMM/src/AIOwrapper.h
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.h	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/src/AIOwrapper.h	2014-09-08 14:36:26 UTC (rev 1818)
@@ -66,7 +66,7 @@
     queue<type_buffElement*> ar_full_buffers;
 
     int index;
-
+    int fileN;
     int n;
     int r;
     int l;
@@ -179,7 +179,11 @@
         void finalize_AL();
 
         void prepare_OutFiles(int max_b_blockSize, int p);
-        void finalize_OutFiles();
+        void finalize_OutFiles();
+
+
+        void removeALmissings(list< pair<int,int> >* excl_List,struct Settings params,int &Almissings);
+        void splitpair(int value, list< pair<int,int> >* excl_List,struct Settings params);
 
 
         static void* async_io(void *ptr );

Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp	2014-09-08 14:36:26 UTC (rev 1818)
@@ -437,7 +437,7 @@
 
     AIOfile.initialize(params);//THIS HAS TO BE DONE FIRST! ALWAYS
 
-
+    //cout << params.n <<  "\n";
     //parameters read on AIOfile.initialize are then copied localy
     n = params.n; l = params.l; r = params.r; p = l+r;
     disp_cov = params.disp_cov;
@@ -515,7 +515,7 @@
     list<long int>* al_nan_idxs = new list<long int>[l];
     list<long int>* y_nan_idxs = new list<long int>[y_block_size];
     list<long int>* ar_nan_idxs = new list<long int>[a_block_size*r];
-    int* YxALmiss=new int[y_block_size];
+    int* Ymiss=new int[y_block_size];
 
 
     type_precision* A = new type_precision[n * p * 1];
@@ -548,14 +548,14 @@
 
     //matlab_print_matrix("AL", n,l,backupAL);
 
-    replace_nans(al_nan_idxs,1, backupAL, n, l);
-    for (int i = 1; i < l; i++)
-    {
-        al_nan_idxs[i]=al_nan_idxs[0];
-    }
+//    replace_nans(al_nan_idxs,1, backupAL, n, l);
+//    for (int i = 1; i < l; i++)
+//    {
+//        al_nan_idxs[i]=al_nan_idxs[0];
+//    }
 
 
-    sumSquares(backupAL,l,n,ssAL,al_nan_idxs);
+    sumSquares(backupAL,l,n,ssAL,0);
 
 
     //LAPACKE_dgesdd()
@@ -591,12 +591,12 @@
 
         for (int jj = 0; jj < y_block_size; jj++)
         {
-            list<long int> nans = al_nan_idxs[0];
-            list<long int> nans2 = y_nan_idxs[jj];
-            nans.merge(nans2);
-            nans.sort();//!might not be needed
-            nans.unique();
-            YxALmiss[jj] = nans.size();
+//            list<long int> nans = al_nan_idxs[0];
+//            list<long int> nans2 = y_nan_idxs[jj];
+//            nans.merge(nans2);
+//            nans.sort();//!might not be needed
+//            nans.unique();
+            Ymiss[jj] = y_nan_idxs[jj].size();
         }
 
 
@@ -639,7 +639,7 @@
 
             replace_nans_avgs(a_block_size, backupAR, n, r, ar_nan_idxs);
 
-            replace_with_zeros(al_nan_idxs, backupAR,  n, r, a_block_size);
+            //replace_with_zeros(al_nan_idxs, backupAR,  n, r, a_block_size);
 
 
 
@@ -793,7 +793,7 @@
 
 
                 get_ticks(start_tick2);
-                hpc_statistics( YxALmiss[jj],n,AL,AR,a_block_size,&Y[jj*n],jj,ssY[jj],B,p,l,r,ssAL,ssAR,j*params.tb, i*params.mb*r, sigResults);
+                hpc_statistics( Ymiss[jj],n,AL,AR,a_block_size,&Y[jj*n],jj,ssY[jj],B,p,l,r,ssAL,ssAR,j*params.tb, i*params.mb*r, sigResults);
                 get_ticks(end_tick);
                 out.acc_stats += ticks2sec(end_tick,start_tick2);
                 /**************************/
@@ -831,15 +831,16 @@
     out.duration = ticks2sec(end_tick ,start_tick);
 
 
-    out.gflops = y_iters * (gemm_flops(l, n, params.tb*1, 0) +
-                             a_iters * ( gemm_flops(params.mb*r, n, params.tb*1, 0) +
-                             params.tb *(  gemm_flops(l, 1*n, l, 0) + gemm_flops(l, n, params.mb *r, 0) +
-                                    params.mb * (
-                                         gemm_flops(r, 1*n, r, 0) +
-                                         (p * p * p / 3.0) /
-                                         1000.0/1000.0/1000.0 )) ))  ;
+    out.gflops = gemm_flops(l, n, params.t*1, 0) + gemm_flops(params.m*r, n, params.t*1, 0)+
+                             params.t *(  gemm_flops(l, 1*n, l, 0) + gemm_flops(l, n, params.m *r, 0) +
+                                    params.m * (
+                                         gemm_flops(r, 1*n, r, 0) +
+                                         (p * p * p / 3.0) /
+                                         1000.0/1000.0/1000.0 ))   ;
 
-     out.gflops += params.t/1000.0*params.m/1000.0*(params.n*(params.p*2+3)+12)/1000.0;//hpcstatistics
+     out.gflops += params.t/1000.0*params.m/1000.0*((params.n)*(params.p*2+2))/1000.0;//hpcstatistics
+
+     //cout << (params.t/1000.0*params.m/1000.0*(params.n*(params.p*2+2))/1000.0)/out.acc_stats;
 
 
     AIOfile.finalize();
@@ -868,134 +869,116 @@
 void Algorithm::hpc_SSY(int n,int p, int l,int r, type_precision* __restrict  AL, type_precision* __restrict  AR, int a_amount, type_precision* __restrict y, type_precision* __restrict  B )
 {
 
-    int a_amount_padded = 0;
 
-    if(a_amount > 4)
-    {
-        int blk_size_k = min(1000,n);
-        int k_n_iters = (n+blk_size_k-1)/blk_size_k;
+    float* __restrict xl;
 
-        int a_rest = a_amount%4;
-        a_amount_padded = a_amount-a_rest;
-        int blk_size_i = min(1000,a_amount_padded);
-        int i_a_iters = (a_amount_padded+blk_size_i-1)/blk_size_i;
-        int max_blk_size_i = blk_size_i;
-
-        for (int k=0; k < k_n_iters; k++)
+    float Xl_n[l*n];
+    float yred[n];
+    int n_idx = 0;
+    int xl_n_idx = 0;
+    for (int h=0; h < l; h++)
+    {
+        for (int k=0; k < n; k++)
         {
-            #pragma omp parallel for schedule(static) default(shared)
-            for (int i= 0; i < i_a_iters; i++)
+            xl = &AL[n*h];
+            if(AL[k]!=0 && y[k] !=0)
             {
-
-                for (int ii=0; ii < blk_size_i; ii+=4)//a_amount
+                if(h==0)
                 {
+                    yred[n_idx] =  y[k];
+                    n_idx++;
+                }
+                Xl_n[xl_n_idx] = xl[k];
+                xl_n_idx++;
+            }
+        }
+    }
 
-                    SYY[(i*max_blk_size_i)+ii+0]   = 0;
-                    SYY[(i*max_blk_size_i)+ii+1]   = 0;
-                    SYY[(i*max_blk_size_i)+ii+2]   = 0;
-                    SYY[(i*max_blk_size_i)+ii+3]   = 0;
+//        matlab_print_matrix("yred",1,n_idx,yred);
+//        matlab_print_matrix("Y",1,n,y);
 
-                    type_precision* b1 = &B[(i*max_blk_size_i*p)+(ii+0)*p];
-                    type_precision* b2 = &B[(i*max_blk_size_i*p)+(ii+1)*p];
-                    type_precision* b3 = &B[(i*max_blk_size_i*p)+(ii+2)*p];
-                    type_precision* b4 = &B[(i*max_blk_size_i*p)+(ii+3)*p];
+//    matlab_print_matrix("Xl",1,n-n/2,&AL[n/2*(l-1)]);
+//    matlab_print_matrix("Xl_n",1,n_idx-n_idx/2,&Xl_n[n_idx/2*(l-1)]);
 
-                    int ar_idx1 = (i*max_blk_size_i*n*r)+(ii+0)*(n*r)-l*n;
-                    int ar_idx2 = (i*max_blk_size_i*n*r)+(ii+1)*(n*r)-l*n;
-                    int ar_idx3 = (i*max_blk_size_i*n*r)+(ii+2)*(n*r)-l*n;
-                    int ar_idx4 = (i*max_blk_size_i*n*r)+(ii+3)*(n*r)-l*n;
+    //cout << n_idx << " ";
 
-                    int next_blk_size_k = min((k+1)*blk_size_k,n);
+    //#pragma omp parallel for schedule(static) default(shared)
+    for (int ii= 0; ii < a_amount; ii++)
+    {
+        int ar_idx = ii*(n*r);
 
+        float* __restrict b = &B[ii*p];
 
-                    for (int kk=k*blk_size_k; kk < next_blk_size_k; kk++)//n???????????????????
-                    {
-                        type_precision xl;
-                        type_precision xr;
-                        type_precision sy[4];
+        float* __restrict xr;
+        float syvec[n_idx];
+        float* __restrict sy = syvec;
 
-                        if(AL[kk]!=0 && y[kk] !=0)
-                        {
-                            sy[0]=y[kk]; sy[1]=y[kk]; sy[2]=y[kk]; sy[3]=y[kk];
 
 
-                            for (int h=0; h < l; h++)
-                            {
-                                xl = AL[n*h+kk];
-                                sy[0]-= xl*b1[h];
-                                sy[1]-= xl*b2[h];
-                                sy[2]-= xl*b3[h];
-                                sy[3]-= xl*b4[h];
-                            }
-                            for (int h=l; h < p; h++)
-                            {
-                                sy[0]-= AR[ar_idx1+n*h+kk]*b1[h];
-                                sy[1]-= AR[ar_idx2+n*h+kk]*b2[h];
-                                sy[2]-= AR[ar_idx3+n*h+kk]*b3[h];
-                                sy[3]-= AR[ar_idx4+n*h+kk]*b4[h];
-                            }
+        float XR_n[r*n_idx];
 
-                            SYY[(i*max_blk_size_i)+ii+0] += sy[0]*sy[0];
-                            SYY[(i*max_blk_size_i)+ii+1] += sy[1]*sy[1];
-                            SYY[(i*max_blk_size_i)+ii+2] += sy[2]*sy[2];
-                            SYY[(i*max_blk_size_i)+ii+3] += sy[3]*sy[3];
-
-                        }
-
-                    }
-
-
-
+        int xr_idx=0;
+        for (int h=0; h < r; h++)
+        {
+            xr = &AR[ar_idx+n*h];
+            for (int k=0; k < n; k++)
+            {
+                if(AL[k]!=0 && y[k] !=0)
+                {
+                    XR_n[xr_idx] = xr[k];
+                    xr_idx++;
                 }
-
-                blk_size_i = min(max_blk_size_i,a_amount_padded-(i+1)*blk_size_i);
             }
         }
 
+//
+//    matlab_print_matrix("XR",1,n,&AR[ar_idx+n*(r-1)]);
+//
+//    matlab_print_matrix("XR_n",1,n_idx,&XR_n[n_idx*(r-1)]);
 
-    }
+//        for (int k=0; k < n_idx; k++)
+//        {
+//            sy[k] = yred[k];
+//        }
+        memcpy(sy,yred,n_idx*sizeof(float));
+//        matlab_print_matrix("yred",1,n_idx,yred);
+//        matlab_print_matrix("SY",1,n_idx,sy);
 
 
-    for (int ii= a_amount_padded; ii < a_amount; ii++)
-    {
-        //cout << ii <<  ":" <<a_amount_padded << ":" << a_amount << " ";
-        type_precision* b = &B[ii*p];
-
-        int ar_idx = ii*(n*r)-l*n;
-
         SYY[ii] = 0;
 
-        for (int k=0;  k < n; k++)
+        for (int h=0; h < l; h++)
         {
-            type_precision* xl;
-            type_precision* xr;
-            type_precision sy;
+            xl = &Xl_n[n_idx*h];
 
-            if(AL[k]!=0 && y[k] !=0)
+            for (int k=0; k < n_idx; k++)
             {
-                sy=y[k];
+                sy[k] -= xl[k]*b[h];
+            }
+        }
 
-                for (int h=0; h < l; h++)
-                {
-                    xl = &AL[n*h];
-                    sy-= xl[k]*b[h];
-                }
-                for (int h=l; h < p; h++)
-                {
-                    xr = &AR[ar_idx+n*h];
-                    sy-= xr[k]*b[h];
-                }
+        for (int h=l; h < p; h++)
+        {
+            xr = &XR_n[n_idx*(h-l)];
 
-                SYY[ii] += sy*sy;
+            for (int k=0; k < n_idx; k++)
+            {
+                sy[k] -= xr[k]*b[h];
             }
         }
-    }
 
 
+        for (int k=0; k < n_idx; k++)
+        {
+            SYY[ii] += sy[k]*sy[k];
+        }
+
+    }
+
 }
 
 
-void Algorithm::hpc_statistics(int YxALmiss, int n,
+void Algorithm::hpc_statistics(int Ymiss, int n,
                 type_precision* __restrict  AL, type_precision* __restrict  AR, int a_amount, type_precision* __restrict y, int jj, float varY, type_precision* __restrict  B,
                 int p, int l,int r,type_precision* __restrict var_xL,type_precision* __restrict var_xR, int y_blck_offset, int A_blck_offset, list < resultH >* __restrict sigResults)
 {
@@ -1011,7 +994,7 @@
 
         sigResults->clear();
 
-        int n_not_nans = YxALmiss;
+        int n_not_nans = Ymiss;
         //cout << " " <<  nans.size() << "\t";
         int n_corrected = n-n_not_nans-p;
         float varFactor = (float)(n_corrected+p)/(float)n;
@@ -1051,9 +1034,10 @@
 
 
             Syy=SYY[ii];
+            //cout << Syy << " ";
 
             //R2[ay_idx] = 1-(Syy/(SST*varFactor));
-            res[thread_id].R2 = 1-((Syy/(n_corrected-1))/(SST*varFactor/(n-1)));
+            res[thread_id].R2 = 1-((Syy/(n_corrected-1))/(SST/(n-1)));
             //R2 = SReg/(SST*varFactor);
 
             //cout << n_corrected <<" "<< Syy/(n_corrected-p-1) << " " << SST/(n-1) << " " << R2[ay_idx] << endl;
@@ -1083,6 +1067,7 @@
 
 
                 t=fabs(b[h]/SE);
+                //cout << t << " ";
 
 
 
@@ -1155,8 +1140,8 @@
         type_precision* x = &Data[h*rows];
 
         int n = rows;
-//        if(indexs_Data)
-//            n =n-indexs_Data[h].size();
+        if(indexs_Data)
+            n =n-indexs_Data[h].size();
 
         for (int k=0; k < rows; k++)
         {

Modified: pkg/OmicABELnoMM/tests/test.cpp
===================================================================
--- pkg/OmicABELnoMM/tests/test.cpp	2014-09-04 12:16:09 UTC (rev 1817)
+++ pkg/OmicABELnoMM/tests/test.cpp	2014-09-08 14:36:26 UTC (rev 1818)
@@ -87,14 +87,14 @@
     cout << "Perfromance Test\n" << flush;
 
     double duration, gemmgflops, gemm_gflopsPsec;
-    cpu_benchmark((1*1024.0),3,duration,gemmgflops);
+    cpu_benchmark((1024.0),5,duration,gemmgflops);
     gemm_gflopsPsec = gemmgflops/duration;
     cout << "\nGEMM GFLOPS/s " << gemm_gflopsPsec << endl;
 
     struct Outputs out2 = {0};
-    int factor = 10;
-    params.n=4000; params.l=5;  params.r=1;
-    params.t=100*factor; params.tb=min(1000,100*factor); params.m=100*factor; params.mb=min(200,100*factor);
+    int factor = 0;
+    params.n=2000; params.l=3;  params.r=1;
+    params.t=800; params.tb=min(800,params.t); params.m=1600; params.mb=min(1600,params.m);
     alg.solve(params, out2, P_NEQ_B_OPT_MD);
 
     print_output(out2, gemm_gflopsPsec);