[Genabel-commits] r1850 - in pkg/OmicABELnoMM: src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Oct 27 17:07:09 CET 2014


Author: lckarssen
Date: 2014-10-27 17:07:09 +0100 (Mon, 27 Oct 2014)
New Revision: 1850

Modified:
   pkg/OmicABELnoMM/src/Algorithm.cpp
   pkg/OmicABELnoMM/src/Algorithm.h
   pkg/OmicABELnoMM/src/Definitions.h
   pkg/OmicABELnoMM/src/Utility.cpp
   pkg/OmicABELnoMM/src/Utility.h
   pkg/OmicABELnoMM/src/main.cpp
   pkg/OmicABELnoMM/tests/test.cpp
Log:
Several OmicABELnoMM source files contained ^M characters (Windows EOL markers), in fact they appeared to contain a mix of Windows and Unix EOL charachters. For this commit I used dos2unix to convert Windows EOLs to Unix ones.

No functional changes, only changes in the code layout.


Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp	2014-10-26 21:51:40 UTC (rev 1849)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp	2014-10-27 16:07:09 UTC (rev 1850)
@@ -1,269 +1,269 @@
 #include "Algorithm.h"
 
-
-
-Algorithm::Algorithm()
-{
+
+
+Algorithm::Algorithm()
+{
     // ctor
-
-}
-
-
-Algorithm::~Algorithm()
-{
-    // dtor
-}
-
-
-void Algorithm::solve(struct Settings params, struct Outputs &out, int type)
-{
-    switch (type)
-    {
-        case P_NEQ_B_OPT_MD:
-            partialNEQ_Blocked_STL_MD(params, out);
-            break;
-
-        default:
-            break;
-    }
-}
-
-
-void Algorithm::extract_subMatrix(float* source, float* dest,
-                                  int dim1_source, int dim2_source,
-                                  int dim1_ini, int dim1_end, int dim2_ini,
-                                  int dim2_end)
-{
-    int i, j, idx = 0;
-    int size, source_ini;
-    for (i = dim2_ini; i < dim2_end; i++)
-    {
-        j = dim1_ini;
-        source_ini = i * dim1_source+j;
-        size = dim1_end - dim1_ini;
-        memcpy( (float*)&dest[idx],
-                (float*)&source[source_ini],
-                size * sizeof(float) );
-//        for (j = dim1_ini; j<dim1_end; j++)
-//        {
-//            dest[idx] = source[i*dim1_source+j];
-//            idx++;
-//        }
-        idx += size;
-    }
-}
-
-
-void Algorithm::prepare_Bfinal(float* bfinal, float* bsource, int a_amount, int y_amount, int p)
-{
-//    // memcpy are faster version of the fors
-//    int i, k, w, top_idx, bot_idx;
-//    int size;
-//    top_idx = 0;
-//    bot_idx = 0;
-//    for (k = 0; k < dim2_b; k++)
-//    {
-//        size = k * dim1_b + (dim1_b - dim1_b_bot) - (k * dim1_b);
-//        memcpy( (float*)&bfinal[k * dim1_b],
-//                (float*)&top[top_idx],
-//                size * sizeof(float) );
-////        for (i = k*dim1_b; i < k*dim1_b+(dim1_b-dim1_b_bot); i++)
-////        {
-////            bfinal[i] = top[top_idx];
-////            top_idx++;
-////        }
-//        top_idx += size;
-//        i = k * dim1_b + size;
-//        w = i;
-//
-//        size = w + dim1_b_bot - w;
-//        memcpy( (float*)&bfinal[w],
-//                (float*)&bot[bot_idx],
-//                size * sizeof(float) );
-////        for (i = w; i < w+dim1_b_bot; i++)
-////        {
-////            bfinal[i] = bot[bot_idx];
-////            bot_idx++;
-////        }
-//        bot_idx += size;
-//    }
-}
-
-
-void Algorithm::prepare_QY(float* qy, float* top,
-                           float* bot, int dim1_QY,
-                           int dim2_QY, int dim1_qy_bot, int bot_blocks )
-{
-    int i, k, w, top_idx, bot_idx;
-    top_idx = 0;
-    bot_idx = 0;
-    for (k = 0; k < dim2_QY; k++)
-    {
-        for (i = k * dim1_QY; i < (k+1) * dim1_QY - dim1_qy_bot; i++)
-        {
-            qy[i] = top[top_idx];
-            top_idx++;
-        }
-        w = i;
-
-        for (i = w; i < w + dim1_qy_bot; i++)
-        {
-            qy[i] = bot[bot_idx];
-            bot_idx++;
-        }
-        bot_idx += (bot_blocks-1) * dim1_qy_bot;
-    }
-}
-
-
-float* Algorithm::extract_R(float* A, int dim1_A, int dim2_A)
-{
-    float* R = (float*)calloc(dim2_A * dim2_A,
-                                                sizeof(float));
-    int i, j;
-
-    int R_idx = 0;
-
-    for (i = 0; i < dim2_A; i++)
-    {
-        for (j = 0; j <= i; j++)
-        {
-            R[R_idx] = A[j + i * dim1_A];
-            R_idx++;
-        }
-        R_idx = dim2_A * (i+1);
-    }
-    return R;
-}
-
-
-float* Algorithm::prepare_R(float* RL, int dim1_A,
-                                     int dim2_AL, int dim2_AR)
-{
-    int R_dim = (dim2_AR + dim2_AL);
-    float* R = new float[R_dim * R_dim];
-
-    int i, j;
-
-    int RL_idx = 0;
-    int R_idx = 0;
-
-    for (i = 0; i < dim2_AL; i++)
-    {
-        for (j = 0; j <= i; j++)
-        {
-            RL_idx   = i * dim1_A + j;
-            R_idx    = i * R_dim + j;
-            R[R_idx] = RL[RL_idx];
-        }
-    }
-    return R;
-}
-
-
-void Algorithm::update_R(float* R, float* topRr,
-                         float* botRr, int dim1, int dim2, int r)
-{
-    int i, j, w;
-    int max = dim1 * dim2;
-    int rtr_idx = 0;
-    int rbr_idx = 0;
-    for (j = r; j > 0; j--)
-    {
-        for (i = max - dim1 * j; i < max - dim1 * j + dim2 - r; i++)
-        {
-            R[i] = topRr[rtr_idx];
-            rtr_idx++;
-        }
-        w = i;
-
-        for (i = w; i < w + r; i++)
-        {
-            R[i] = botRr[rbr_idx];
-            rbr_idx++;
-        }
-    }
-}
-
-
-void Algorithm::build_S(float* S, float* Stl,
-                        float* Str, float* Sbr, int l, int r)
-{
-    int Sidx;
-    int p = l + r;
-
-    for (int i = 0; i < l; i++)
-    {
-        Sidx = i * p;
-        for (int j = 0; j <= i; j++)
-        {
-            S[Sidx] = Stl[j+i*l];
-            Sidx++;
-        }
-    }
-
-    for (int i = 0; i < r; i++)
-    {
-        Sidx = l*p + p*i;
-        for (int j = 0; j < l; j++)
-        {
-            S[Sidx] = Str[j + i * l];
-            Sidx++;
-        }
-    }
-
-    for (int i = 0; i < r; i++)
-    {
-        Sidx = l*p + l + p*i;
-        for (int j= 0; j <= i; j++)
-        {
-            S[Sidx] = Sbr[j + i * r];
-            Sidx++;
-        }
-    }
-}
-
-
-void Algorithm::check_result(float* AL, float* AR,
-                             int rowsA, int colsA, int rhs, int colsAR,
-                             float* y, float* res,struct Settings params,int iX,int iiX, int jY, int jjY)
-{
-    float* A = (float*)malloc(rowsA * colsA *
-                                                sizeof(float));
-
-    int i, ar_idx = 0;
-    for (i = 0; i < rowsA * (colsA - colsAR); i++)
-    {
-        A[i] =  AL[i];
-    }
-
-    for (i = rowsA * (colsA - colsAR); i < rowsA * colsA; i++)
-    {
-        A[i] = AR[ar_idx];
-        ar_idx++;
+
+}
+
+
+Algorithm::~Algorithm()
+{
+    // dtor
+}
+
+
+void Algorithm::solve(struct Settings params, struct Outputs &out, int type)
+{
+    switch (type)
+    {
+        case P_NEQ_B_OPT_MD:
+            partialNEQ_Blocked_STL_MD(params, out);
+            break;
+
+        default:
+            break;
     }
+}
 
-    //matlab_print_matrix("A", rowsA, colsA, A);
-
-
-    float* ynew = replicate_vec(y, rowsA*rhs);
-    float* new_sol = (float*)malloc(colsA * rhs *
-                                                      sizeof(float));
-
-    lapack_int info = LAPACKE_sgels(STORAGE_TYPE, 'N', rowsA, colsA, rhs, A,
-                                    rowsA, ynew, rowsA);
+
+void Algorithm::extract_subMatrix(float* source, float* dest,
+                                  int dim1_source, int dim2_source,
+                                  int dim1_ini, int dim1_end, int dim2_ini,
+                                  int dim2_end)
+{
+    int i, j, idx = 0;
+    int size, source_ini;
+    for (i = dim2_ini; i < dim2_end; i++)
+    {
+        j = dim1_ini;
+        source_ini = i * dim1_source+j;
+        size = dim1_end - dim1_ini;
+        memcpy( (float*)&dest[idx],
+                (float*)&source[source_ini],
+                size * sizeof(float) );
+//        for (j = dim1_ini; j<dim1_end; j++)
+//        {
+//            dest[idx] = source[i*dim1_source+j];
+//            idx++;
+//        }
+        idx += size;
+    }
+}
+
+
+void Algorithm::prepare_Bfinal(float* bfinal, float* bsource, int a_amount, int y_amount, int p)
+{
+//    // memcpy are faster version of the fors
+//    int i, k, w, top_idx, bot_idx;
+//    int size;
+//    top_idx = 0;
+//    bot_idx = 0;
+//    for (k = 0; k < dim2_b; k++)
+//    {
+//        size = k * dim1_b + (dim1_b - dim1_b_bot) - (k * dim1_b);
+//        memcpy( (float*)&bfinal[k * dim1_b],
+//                (float*)&top[top_idx],
+//                size * sizeof(float) );
+////        for (i = k*dim1_b; i < k*dim1_b+(dim1_b-dim1_b_bot); i++)
+////        {
+////            bfinal[i] = top[top_idx];
+////            top_idx++;
+////        }
+//        top_idx += size;
+//        i = k * dim1_b + size;
+//        w = i;
+//
+//        size = w + dim1_b_bot - w;
+//        memcpy( (float*)&bfinal[w],
+//                (float*)&bot[bot_idx],
+//                size * sizeof(float) );
+////        for (i = w; i < w+dim1_b_bot; i++)
+////        {
+////            bfinal[i] = bot[bot_idx];
+////            bot_idx++;
+////        }
+//        bot_idx += size;
+//    }
+}
+
+
+void Algorithm::prepare_QY(float* qy, float* top,
+                           float* bot, int dim1_QY,
+                           int dim2_QY, int dim1_qy_bot, int bot_blocks )
+{
+    int i, k, w, top_idx, bot_idx;
+    top_idx = 0;
+    bot_idx = 0;
+    for (k = 0; k < dim2_QY; k++)
+    {
+        for (i = k * dim1_QY; i < (k+1) * dim1_QY - dim1_qy_bot; i++)
+        {
+            qy[i] = top[top_idx];
+            top_idx++;
+        }
+        w = i;
+
+        for (i = w; i < w + dim1_qy_bot; i++)
+        {
+            qy[i] = bot[bot_idx];
+            bot_idx++;
+        }
+        bot_idx += (bot_blocks-1) * dim1_qy_bot;
+    }
+}
+
+
+float* Algorithm::extract_R(float* A, int dim1_A, int dim2_A)
+{
+    float* R = (float*)calloc(dim2_A * dim2_A,
+                                                sizeof(float));
+    int i, j;
+
+    int R_idx = 0;
+
+    for (i = 0; i < dim2_A; i++)
+    {
+        for (j = 0; j <= i; j++)
+        {
+            R[R_idx] = A[j + i * dim1_A];
+            R_idx++;
+        }
+        R_idx = dim2_A * (i+1);
+    }
+    return R;
+}
+
+
+float* Algorithm::prepare_R(float* RL, int dim1_A,
+                                     int dim2_AL, int dim2_AR)
+{
+    int R_dim = (dim2_AR + dim2_AL);
+    float* R = new float[R_dim * R_dim];
+
+    int i, j;
+
+    int RL_idx = 0;
+    int R_idx = 0;
+
+    for (i = 0; i < dim2_AL; i++)
+    {
+        for (j = 0; j <= i; j++)
+        {
+            RL_idx   = i * dim1_A + j;
+            R_idx    = i * R_dim + j;
+            R[R_idx] = RL[RL_idx];
+        }
+    }
+    return R;
+}
+
+
+void Algorithm::update_R(float* R, float* topRr,
+                         float* botRr, int dim1, int dim2, int r)
+{
+    int i, j, w;
+    int max = dim1 * dim2;
+    int rtr_idx = 0;
+    int rbr_idx = 0;
+    for (j = r; j > 0; j--)
+    {
+        for (i = max - dim1 * j; i < max - dim1 * j + dim2 - r; i++)
+        {
+            R[i] = topRr[rtr_idx];
+            rtr_idx++;
+        }
+        w = i;
+
+        for (i = w; i < w + r; i++)
+        {
+            R[i] = botRr[rbr_idx];
+            rbr_idx++;
+        }
+    }
+}
+
+
+void Algorithm::build_S(float* S, float* Stl,
+                        float* Str, float* Sbr, int l, int r)
+{
+    int Sidx;
+    int p = l + r;
+
+    for (int i = 0; i < l; i++)
+    {
+        Sidx = i * p;
+        for (int j = 0; j <= i; j++)
+        {
+            S[Sidx] = Stl[j+i*l];
+            Sidx++;
+        }
+    }
+
+    for (int i = 0; i < r; i++)
+    {
+        Sidx = l*p + p*i;
+        for (int j = 0; j < l; j++)
+        {
+            S[Sidx] = Str[j + i * l];
+            Sidx++;
+        }
+    }
+
+    for (int i = 0; i < r; i++)
+    {
+        Sidx = l*p + l + p*i;
+        for (int j= 0; j <= i; j++)
+        {
+            S[Sidx] = Sbr[j + i * r];
+            Sidx++;
+        }
+    }
+}
+
+
+void Algorithm::check_result(float* AL, float* AR,
+                             int rowsA, int colsA, int rhs, int colsAR,
+                             float* y, float* res,struct Settings params,int iX,int iiX, int jY, int jjY)
+{
+    float* A = (float*)malloc(rowsA * colsA *
+                                                sizeof(float));
+
+    int i, ar_idx = 0;
+    for (i = 0; i < rowsA * (colsA - colsAR); i++)
+    {
+        A[i] =  AL[i];
+    }
+
+    for (i = rowsA * (colsA - colsAR); i < rowsA * colsA; i++)
+    {
+        A[i] = AR[ar_idx];
+        ar_idx++;
+    }
+
+    //matlab_print_matrix("A", rowsA, colsA, A);
+
+
+    float* ynew = replicate_vec(y, rowsA*rhs);
+    float* new_sol = (float*)malloc(colsA * rhs *
+                                                      sizeof(float));
+
+    lapack_int info = LAPACKE_sgels(STORAGE_TYPE, 'N', rowsA, colsA, rhs, A,
+                                    rowsA, ynew, rowsA);
     myassert(info == 0, "Error Check");
 
-
-
-
-    int index = 0;
-    int index_new = 0;
-    for (i = 0; i < rhs; i++)
-    {
-         copy_vec(&ynew[index], &new_sol[index_new], colsA);
-         index += rowsA;
-         index_new += colsA;
+
+
+
+    int index = 0;
+    int index_new = 0;
+    for (i = 0; i < rhs; i++)
+    {
+         copy_vec(&ynew[index], &new_sol[index_new], colsA);
+         index += rowsA;
+         index_new += colsA;
     }
 
     float* precomp_betas = new float[colsA];
@@ -271,10 +271,10 @@
     if(!params.use_fake_files)
     {
         //cout << ".";
-        FILE* fp_Bprecomputed = fopen("examples/bpre.fvd", "rb");
-        if(fp_Bprecomputed == 0)
-        {
-            cout << "Error Reading File of precomputed B values " << endl;
+        FILE* fp_Bprecomputed = fopen("examples/bpre.fvd", "rb");
+        if(fp_Bprecomputed == 0)
+        {
+            cout << "Error Reading File of precomputed B values " << endl;
         }
         else
         {
@@ -287,19 +287,19 @@
             size_t result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
 
             //matlab_print_matrix("res", colsA, 1, res);
-            //matlab_print_matrix("new_sol", colsA, 1, new_sol);
+            //matlab_print_matrix("new_sol", colsA, 1, new_sol);
             //matlab_print_matrix("precomp_betas", colsA, 1, precomp_betas);
 
-            cblas_saxpy(colsA, -1.0, res, 1, precomp_betas, 1);
+            cblas_saxpy(colsA, -1.0, res, 1, precomp_betas, 1);
             float u_norm = cblas_snrm2(colsA, precomp_betas, 1);
             //cout << "pa:"<< u_norm ;
 
 
-            if (fabs(u_norm) > 0.001 || isnan(u_norm))
+            if (fabs(u_norm) > 0.001 || isnan(u_norm))
             {
                 fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
                 result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
-                cblas_saxpy(colsA, 1.0, res, 1, precomp_betas, 1);
+                cblas_saxpy(colsA, 1.0, res, 1, precomp_betas, 1);
                 float u_norm2 = cblas_snrm2(colsA, precomp_betas, 1);
                 //cout << u_norm2 << " ";
                 //cout << file_pos <<"c "<< u_norm2 << "\n";
@@ -307,25 +307,25 @@
                 if(fabs(u_norm2) > 0.001 || isnan(u_norm))
                 {
                     fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
-                    result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
+                    result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
                     fflush(stdout);
 
-//                    matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
-//                    matlab_print_matrix("AR", rowsA, colsAR, AR);
-//                    matlab_print_matrix("Y", rowsA, rhs, y);
-                    matlab_print_matrix("res", colsA, 1, res);
+//                    matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+//                    matlab_print_matrix("AR", rowsA, colsAR, AR);
+//                    matlab_print_matrix("Y", rowsA, rhs, y);
+                    matlab_print_matrix("res", colsA, 1, res);
                     matlab_print_matrix("precomp_betas", colsA, 1, precomp_betas);
                     cout << file_pos <<" "<< u_norm << "\n";
-                    cout << file_pos <<" "<< u_norm2 << "\n";
-                    //printf("\n%%\tBeta computed nrom: %0.2g\n", u_norm2);
+                    cout << file_pos <<" "<< u_norm2 << "\n";
+                    //printf("\n%%\tBeta computed nrom: %0.2g\n", u_norm2);
                     exit(1);
                 }
-
+
             }
 
             fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
             result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
-            cblas_saxpy(colsA, -1.0, new_sol, 1, precomp_betas, 1);
+            cblas_saxpy(colsA, -1.0, new_sol, 1, precomp_betas, 1);
             u_norm = cblas_snrm2(colsA, precomp_betas, 1);
             //cout << " pl:" << u_norm ;
             if (fabs(u_norm) > 0.001 || isnan(u_norm))
@@ -339,55 +339,55 @@
         fclose(fp_Bprecomputed);
     }
     //else
-    {
-
-
-    //    if (PRINT)
-    //        printf("\n Btop=(Rtl\\(Ql'*Y))-(Rtl\\Rtr)*(Rbr\\(Qr'*Y)); \n [Btop ; Rbr\\Qr'*Y] - bcomputed \n");
-
-        cblas_saxpy(rhs * colsA, -1.0, res, 1, new_sol, 1);
-        float u_norm = cblas_snrm2(rhs * colsA, new_sol, 1);
-        //
-        if (fabs(u_norm) >= 0.1 || isnan(u_norm))
-        {
-            fflush(stdout);
-           // matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+    {
+
+
+    //    if (PRINT)
+    //        printf("\n Btop=(Rtl\\(Ql'*Y))-(Rtl\\Rtr)*(Rbr\\(Qr'*Y)); \n [Btop ; Rbr\\Qr'*Y] - bcomputed \n");
+
+        cblas_saxpy(rhs * colsA, -1.0, res, 1, new_sol, 1);
+        float u_norm = cblas_snrm2(rhs * colsA, new_sol, 1);
+        //
+        if (fabs(u_norm) >= 0.1 || isnan(u_norm))
+        {
+            fflush(stdout);
+           // matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
             //matlab_print_matrix("AR", rowsA, colsAR, AR);
-            //matlab_print_matrix("A", rowsA, colsA, A);
-            //matlab_print_matrix("Y", rowsA, rhs, y);
-//            printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr = R\\(Q'*Y)\n");
-            matlab_print_matrix("bcomputed", colsA, rhs, res);
-            matlab_print_matrix("newsol", colsA, rhs, ynew);
-            printf("\n%%\tnrom: %0.5g\n\n", u_norm);
-             exit(1);
-        }
-        else
-        {
-            //matlab_print_matrix("bcomputed", colsA, rhs, res);
-            //matlab_print_matrix("newsol", colsA, rhs, ynew);
+            //matlab_print_matrix("A", rowsA, colsA, A);
+            //matlab_print_matrix("Y", rowsA, rhs, y);
+//            printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr = R\\(Q'*Y)\n");
+            matlab_print_matrix("bcomputed", colsA, rhs, res);
+            matlab_print_matrix("newsol", colsA, rhs, ynew);
+            printf("\n%%\tnrom: %0.5g\n\n", u_norm);
+             exit(1);
+        }
+        else
+        {
+            //matlab_print_matrix("bcomputed", colsA, rhs, res);
+            //matlab_print_matrix("newsol", colsA, rhs, ynew);
            //printf("%%%0.3g \n", u_norm);
-           //matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+           //matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
 //            matlab_print_matrix("AR", rowsA, colsAR, AR);
-
+
             //matlab_print_matrix("Y", rowsA, rhs, y);
 
-           //cout << " la:" << u_norm << "\n";
+           //cout << " la:" << u_norm << "\n";
         }
 
 
 
 
-    }
-
-
-    // cout << "\t**************";
-    free(ynew);
-    free(new_sol);
+    }
+
+
+    // cout << "\t**************";
+    free(ynew);
+    free(new_sol);
     free(A);
-    delete []precomp_betas;
-}
-
+    delete []precomp_betas;
+}
 
+
 void Algorithm::applyDefaultParams(struct Settings &params)
 {
 
@@ -424,22 +424,22 @@
 
 }
 
-///////////////////////////////
-
-
-void Algorithm::partialNEQ_Blocked_STL_MD(struct Settings params,
-                                          struct Outputs &out)
-{
-
-
-
-    srand(time(NULL));
-
+///////////////////////////////
 
-
-    //float *Ytemp;
-    lapack_int info, n, lda, l, r, p;
-
+
+void Algorithm::partialNEQ_Blocked_STL_MD(struct Settings params,
+                                          struct Outputs &out)
+{
+
+
+
+    srand(time(NULL));
+
+
+
+    //float *Ytemp;
+    lapack_int info, n, lda, l, r, p;
+
     cputime_type start_tick, start_tick2, start_tick3, end_tick;
 
 
@@ -453,10 +453,10 @@
     AIOfile.initialize(params);//THIS HAS TO BE DONE FIRST! ALWAYS
 
 
-    total_results = 0;
+    total_results = 0;
 
     //cout << params.n <<  "\n";
-    //parameters read on AIOfile.initialize are then copied localy
+    //parameters read on AIOfile.initialize are then copied localy
     n = params.n; l = params.l; r = params.r; p = l+r;
     disp_cov = params.disp_cov;
     storePInd = params.storePInd;
@@ -471,58 +471,58 @@
     max_threads = params.threads;
     blas_set_num_threads(max_threads);
     omp_set_num_threads(max_threads);
-
-
-    int y_amount = params.t;
-    int y_block_size = params.tb;
+
+
+    int y_amount = params.t;
+    int y_block_size = params.tb;
     //int orig_y_block_size = y_block_size;
-    //cout << "yt:"<< y_amount << " oybz:"<<y_block_size << flush;
-
-    int a_amount = params.m;
-    int a_block_size = params.mb;
+    //cout << "yt:"<< y_amount << " oybz:"<<y_block_size << flush;
+
+    int a_amount = params.m;
+    int a_block_size = params.mb;
     //int orig_a_block_size = a_block_size;
 
     //cout << r << endl;
-
-
-    int a_iters = (a_amount + a_block_size - 1) / a_block_size;
-
+
+
+    int a_iters = (a_amount + a_block_size - 1) / a_block_size;
+
     int y_iters = (y_amount + y_block_size - 1) / y_block_size;
 
-    //cout << "yiters:" <<  y_iters << " aiters:" << a_iters << endl;
-
-
-    lda = n;
+    //cout << "yiters:" <<  y_iters << " aiters:" << a_iters << endl;
+
+
+    lda = n;
     if(!params.ForceCheck && params.mpi_id == 0)
-    {
+    {
         cout << endl;
     }
-
-    for (int j = 0; j < y_iters && !params.ForceCheck && params.mpi_id == 0; j++)
-    {
-        if (!params.ForceCheck &&  ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
-        {
-            cout << "*" << flush;
-        }
-
-        for (int i = 0; i < a_iters; i++)
-        {
-
+
+    for (int j = 0; j < y_iters && !params.ForceCheck && params.mpi_id == 0; j++)
+    {
+        if (!params.ForceCheck &&  ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
+        {
+            cout << "*" << flush;
+        }
+
+        for (int i = 0; i < a_iters; i++)
+        {
+
                 if ( !params.ForceCheck &&  y_iters <= 2 &&
-                (  (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
-                {
-                    cout << "*" << flush;
-                }
-
-        }
+                (  (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
+                {
+                    cout << "*" << flush;
+                }
+
+        }
     }
 
-    if(!params.ForceCheck && params.mpi_id == 0)
-        cout << endl;
-
+    if(!params.ForceCheck && params.mpi_id == 0)
+        cout << endl;
+
     //add memalign
-
-    //float Stl[l*l];
+
+    //float Stl[l*l];
     //float Str[l*r*a_block_size];
     float* Stl = new float[l*l*1];
     float* Str = new float[l*r*a_block_size*1];
@@ -535,14 +535,14 @@
 
     float* Sbr = new float[r *  r * a_block_size];
     float* Ay = new float[p * a_block_size * y_block_size];
-    //float* B_resorted = new float[p * a_block_size*y_block_size];
+    //float* B_resorted = new float[p * a_block_size*y_block_size];
 
     //float* S = new float[p * p];
 
     float* S2global = new float[p*p*max_threads];
-
-
-    float* Ay_top = new float[l * y_amount];
+
+
+    float* Ay_top = new float[l * y_amount];
     float* Ay_bot = new float[y_block_size * a_block_size * r];
 
     float* y_residual = new float[n * y_block_size ];
@@ -554,9 +554,9 @@
     int* Ymiss=new int[y_block_size];
 
     std::map <int , list < int > > y_missings_jj;
-
 
-    float* A = new float[n * p * 1];
+
+    float* A = new float[n * p * 1];
     float* AR;// = new float[n * r * a_block_size * 1];
 
     //Sum of squares for A parts
@@ -573,18 +573,18 @@
     list < resultH >* sigResults;
 
 
-
+
 //  float* AL = new float[n * l * 1];
     float* AL = A;
 
     float* B = Ay;
-
-    float* backupAR;  // = new float[n*r*a_block_size];
-    float* backupAL;  // = new float[n*l];
-
-
-    AIOfile.load_AL(&backupAL);
 
+    float* backupAR;  // = new float[n*r*a_block_size];
+    float* backupAL;  // = new float[n*l];
+
+
+    AIOfile.load_AL(&backupAL);
+
     //pthread_barrier_wait(&(AIOfile.Fhandler->finalize_barrier));
 
     //matlab_print_matrix("AL", n,l,backupAL);
@@ -601,16 +601,16 @@
 
 
     //LAPACKE_dgesdd()
-
+
     copy_vec(backupAL, AL, n*l);
 
     get_ticks(start_tick2);
-
-    //! Generate Stl
-    cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
+
+    //! Generate Stl
+    cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
                 l, n, 1.0, AL, lda, 0.0, Stl, l);
 
-    get_ticks(end_tick);
+    get_ticks(end_tick);
     out.acc_stl += ticks2sec(end_tick,start_tick2);
 
 
@@ -646,39 +646,39 @@
     delete []vt;
     delete []superb;
 
-
-
-    float* Y;
-
+
+
+    float* Y;
+
     // printf("\n\n%%Computations\n%%");
 
-    copy_vec(backupAL, AL, n*l);
-
-
-    get_ticks(start_tick);
-
-    for (int j = 0; j < y_iters; j++)
-    {
-        if (!params.ForceCheck && params.mpi_id == 0 && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
-        {
-            cout << AIOfile.io_overhead << flush;
-            AIOfile.io_overhead = "*";
-        }
+    copy_vec(backupAL, AL, n*l);
 
+
+    get_ticks(start_tick);
+
+    for (int j = 0; j < y_iters; j++)
+    {
+        if (!params.ForceCheck && params.mpi_id == 0 && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
+        {
+            cout << AIOfile.io_overhead << flush;
+            AIOfile.io_overhead = "*";
+        }
+
         get_ticks(start_tick2);
-
+
         AIOfile.load_Yblock(&Y, y_block_size);
         //cout << "ybz:"<< y_block_size << " " << flush;
 
-        get_ticks(end_tick);
-        out.acc_loady += ticks2sec(end_tick,start_tick2);
-
+        get_ticks(end_tick);
+        out.acc_loady += ticks2sec(end_tick,start_tick2);
+
         get_ticks(start_tick2);
 
         replace_nans(&y_nan_idxs[0],y_block_size, Y, n,1);
         sumSquares(Y,y_block_size,n,ssY,y_nan_idxs);
 
-        for (int jj = 0; jj < y_block_size; jj++)
+        for (int jj = 0; jj < y_block_size; jj++)
         {
 //            for (list< long int  >::iterator it=y_nan_idxs[jj].begin(); it != y_nan_idxs[jj].end(); ++it)
 //            {
@@ -695,38 +695,38 @@
         //matlab_print_matrix("Y", n, y_block_size, Y);
         get_ticks(end_tick);
         out.acc_other += ticks2sec(end_tick,start_tick2);
-
-
+
+
         get_ticks(start_tick2);
-
-        //! Ay_top = AL'*Y
-        cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                    l, y_block_size, n, 1.0, AL, n, Y, n, 0.0,
+
+        //! Ay_top = AL'*Y
+        cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+                    l, y_block_size, n, 1.0, AL, n, Y, n, 0.0,
                     &Ay_top[j * l * y_block_size], l);
 
-        get_ticks(end_tick);
+        get_ticks(end_tick);
         out.acc_gemm += ticks2sec(end_tick,start_tick2);
 
 
         get_ticks(start_tick2);
         generate_correction_missings_Stl(y_block_size,1,1,n,l,l,AL,AL,Stl_corr,y_nan_idxs);
 
-        get_ticks(end_tick);
-        out.acc_scorrect += ticks2sec(end_tick,start_tick2);
-
-
-        for (int i = 0; i < a_iters; i++)
+        get_ticks(end_tick);
+        out.acc_scorrect += ticks2sec(end_tick,start_tick2);
+
+
+        for (int i = 0; i < a_iters; i++)
         {
 
             if (!params.ForceCheck && y_iters <= 2 && params.mpi_id == 0 &&
                 (  (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
-            {
-                cout << AIOfile.io_overhead << flush;
-                AIOfile.io_overhead = "*";
+            {
+                cout << AIOfile.io_overhead << flush;
+                AIOfile.io_overhead = "*";
             }
 
             get_ticks(start_tick2);
-            //cout << "^"  << flush;
+            //cout << "^"  << flush;
             AIOfile.load_ARblock(&backupAR, a_block_size);
            // cout << "^" << endl << flush;
 
@@ -741,43 +741,43 @@
 
             replace_nans_avgs(a_block_size, backupAR, n, r, ar_nan_idxs);
 
-            get_ticks(end_tick);
+            get_ticks(end_tick);
             out.acc_other += ticks2sec(end_tick,start_tick2);
 
             AR = backupAR;
 
 
-            get_ticks(start_tick2);
-
-            //! Generate Str
-            cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                                l, r * a_block_size, n, 1.0, AL,
+            get_ticks(start_tick2);
+
+            //! Generate Str
+            cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+                                l, r * a_block_size, n, 1.0, AL,
                                             n, AR, n, 0.0, Str, l);//!
-
-            get_ticks(end_tick);
+
+            get_ticks(end_tick);
             out.acc_str += ticks2sec(end_tick,start_tick2);
 
 
             get_ticks(start_tick2);
 
-            blas_set_num_threads(1);
+            blas_set_num_threads(1);
             omp_set_num_threads(max_threads);
 
             #pragma omp parallel default(shared)
             {
-
+
             #pragma omp for nowait
-            for (int ii= 0; ii < a_block_size; ii++)
-            {
-                //! Generate Sbr
-                cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                            r, r, n, 1.0, &AR[ii*r*n], n,
-                            &AR[ii * r * n], n, 0.0,
+            for (int ii= 0; ii < a_block_size; ii++)
+            {
+                //! Generate Sbr
+                cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+                            r, r, n, 1.0, &AR[ii*r*n], n,
+                            &AR[ii * r * n], n, 0.0,
                             &Sbr[ii * r * r], r);
 
                 float* S2 = &S2global[p*p*omp_get_thread_num()];
-
-                //! build S for X'X-1 for fake "variance of x"
+
+                //! build S for X'X-1 for fake "variance of x"
                 build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
 
 
@@ -798,17 +798,17 @@
             }
 
             blas_set_num_threads(max_threads);
-
 
 
-            get_ticks(end_tick);
+
+            get_ticks(end_tick);
             out.acc_sbr += ticks2sec(end_tick,start_tick2 );
 
             get_ticks(start_tick2);
 
             generate_correction_missings_Str(y_block_size,a_block_size,n,l,r,AL,AR,Str_corr,Sbr_corr,y_nan_idxs);
 
-            get_ticks(end_tick);
+            get_ticks(end_tick);
             out.acc_scorrect += ticks2sec(end_tick,start_tick2);
 
 
@@ -819,53 +819,53 @@
 
             //matlab_print_matrix("ARb",n,a_block_size*r,backupAR);
 
-
-
+
+
             get_ticks(start_tick2);
-
-            //! Ay_bot = AR'*Y
-            cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                        r * a_block_size, y_block_size, n, 1.0, AR, n, Y, n,
-                        0.0, Ay_bot, r * a_block_size);
-
-            get_ticks(end_tick);
+
+            //! Ay_bot = AR'*Y
+            cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+                        r * a_block_size, y_block_size, n, 1.0, AR, n, Y, n,
+                        0.0, Ay_bot, r * a_block_size);
+
+            get_ticks(end_tick);
             out.acc_gemm += ticks2sec(end_tick,start_tick2);
 
-
-            get_ticks(start_tick3);
-            for (int jj = 0; jj < y_block_size; jj++)
+
+            get_ticks(start_tick3);
+            for (int jj = 0; jj < y_block_size; jj++)
             {
 
                 //int thread_id = 0 * omp_get_thread_num();//so far singel thread version only
-                //cout << "y_block_size:" << y_block_size << " a_block_size:" << a_block_size <<" r:" << r << "\n";
+                //cout << "y_block_size:" << y_block_size << " a_block_size:" << a_block_size <<" r:" << r << "\n";
 
-                blas_set_num_threads(1);
+                blas_set_num_threads(1);
                 omp_set_num_threads(max_threads);
 
                 #pragma omp parallel default(shared)
                 {
-
-                #pragma omp for nowait
-                for (int ii= 0; ii < a_block_size; ii++)
+
+                #pragma omp for nowait
+                for (int ii= 0; ii < a_block_size; ii++)
                 {
 //                    //cout << omp_get_thread_num() << endl << flush;
                     cputime_type start_tick2;
 
                     get_ticks(start_tick2);
-
-                    copy_vec(&Ay_top[j*l*y_block_size+jj*l], &Ay[jj*p*a_block_size+ ii*p], l);
-                    copy_vec(&Ay_bot[ii*r + jj*r*a_block_size],
+
+                    copy_vec(&Ay_top[j*l*y_block_size+jj*l], &Ay[jj*p*a_block_size+ ii*p], l);
+                    copy_vec(&Ay_bot[ii*r + jj*r*a_block_size],
                              &Ay[jj*p*a_block_size+l+ii*p], r);
 
 
                     float* S2 = &S2global[p*p*omp_get_thread_num()];
-
-                    //! Rebuild S
-                    build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
 
+                    //! Rebuild S
+                    build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
+
                     if(omp_get_thread_num()==0)
                     {
-                    get_ticks(end_tick);//5%
+                    get_ticks(end_tick);//5%
                     out.acc_other += ticks2sec(end_tick,start_tick2);
                     }
 
@@ -875,41 +875,41 @@
 
                     if(omp_get_thread_num()==0)
                     {
-                    get_ticks(end_tick);
+                    get_ticks(end_tick);
                     out.acc_inner_scorrect += ticks2sec(end_tick,start_tick2);
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 1850


More information about the Genabel-commits mailing list