[Genabel-commits] r1850 - in pkg/OmicABELnoMM: src tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Oct 27 17:07:09 CET 2014
Author: lckarssen
Date: 2014-10-27 17:07:09 +0100 (Mon, 27 Oct 2014)
New Revision: 1850
Modified:
pkg/OmicABELnoMM/src/Algorithm.cpp
pkg/OmicABELnoMM/src/Algorithm.h
pkg/OmicABELnoMM/src/Definitions.h
pkg/OmicABELnoMM/src/Utility.cpp
pkg/OmicABELnoMM/src/Utility.h
pkg/OmicABELnoMM/src/main.cpp
pkg/OmicABELnoMM/tests/test.cpp
Log:
Several OmicABELnoMM source files contained ^M characters (Windows EOL markers), in fact they appeared to contain a mix of Windows and Unix EOL charachters. For this commit I used dos2unix to convert Windows EOLs to Unix ones.
No functional changes, only changes in the code layout.
Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp 2014-10-26 21:51:40 UTC (rev 1849)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp 2014-10-27 16:07:09 UTC (rev 1850)
@@ -1,269 +1,269 @@
#include "Algorithm.h"
-
-
-Algorithm::Algorithm()
-{
+
+
+Algorithm::Algorithm()
+{
// ctor
-
-}
-
-
-Algorithm::~Algorithm()
-{
- // dtor
-}
-
-
-void Algorithm::solve(struct Settings params, struct Outputs &out, int type)
-{
- switch (type)
- {
- case P_NEQ_B_OPT_MD:
- partialNEQ_Blocked_STL_MD(params, out);
- break;
-
- default:
- break;
- }
-}
-
-
-void Algorithm::extract_subMatrix(float* source, float* dest,
- int dim1_source, int dim2_source,
- int dim1_ini, int dim1_end, int dim2_ini,
- int dim2_end)
-{
- int i, j, idx = 0;
- int size, source_ini;
- for (i = dim2_ini; i < dim2_end; i++)
- {
- j = dim1_ini;
- source_ini = i * dim1_source+j;
- size = dim1_end - dim1_ini;
- memcpy( (float*)&dest[idx],
- (float*)&source[source_ini],
- size * sizeof(float) );
-// for (j = dim1_ini; j<dim1_end; j++)
-// {
-// dest[idx] = source[i*dim1_source+j];
-// idx++;
-// }
- idx += size;
- }
-}
-
-
-void Algorithm::prepare_Bfinal(float* bfinal, float* bsource, int a_amount, int y_amount, int p)
-{
-// // memcpy are faster version of the fors
-// int i, k, w, top_idx, bot_idx;
-// int size;
-// top_idx = 0;
-// bot_idx = 0;
-// for (k = 0; k < dim2_b; k++)
-// {
-// size = k * dim1_b + (dim1_b - dim1_b_bot) - (k * dim1_b);
-// memcpy( (float*)&bfinal[k * dim1_b],
-// (float*)&top[top_idx],
-// size * sizeof(float) );
-//// for (i = k*dim1_b; i < k*dim1_b+(dim1_b-dim1_b_bot); i++)
-//// {
-//// bfinal[i] = top[top_idx];
-//// top_idx++;
-//// }
-// top_idx += size;
-// i = k * dim1_b + size;
-// w = i;
-//
-// size = w + dim1_b_bot - w;
-// memcpy( (float*)&bfinal[w],
-// (float*)&bot[bot_idx],
-// size * sizeof(float) );
-//// for (i = w; i < w+dim1_b_bot; i++)
-//// {
-//// bfinal[i] = bot[bot_idx];
-//// bot_idx++;
-//// }
-// bot_idx += size;
-// }
-}
-
-
-void Algorithm::prepare_QY(float* qy, float* top,
- float* bot, int dim1_QY,
- int dim2_QY, int dim1_qy_bot, int bot_blocks )
-{
- int i, k, w, top_idx, bot_idx;
- top_idx = 0;
- bot_idx = 0;
- for (k = 0; k < dim2_QY; k++)
- {
- for (i = k * dim1_QY; i < (k+1) * dim1_QY - dim1_qy_bot; i++)
- {
- qy[i] = top[top_idx];
- top_idx++;
- }
- w = i;
-
- for (i = w; i < w + dim1_qy_bot; i++)
- {
- qy[i] = bot[bot_idx];
- bot_idx++;
- }
- bot_idx += (bot_blocks-1) * dim1_qy_bot;
- }
-}
-
-
-float* Algorithm::extract_R(float* A, int dim1_A, int dim2_A)
-{
- float* R = (float*)calloc(dim2_A * dim2_A,
- sizeof(float));
- int i, j;
-
- int R_idx = 0;
-
- for (i = 0; i < dim2_A; i++)
- {
- for (j = 0; j <= i; j++)
- {
- R[R_idx] = A[j + i * dim1_A];
- R_idx++;
- }
- R_idx = dim2_A * (i+1);
- }
- return R;
-}
-
-
-float* Algorithm::prepare_R(float* RL, int dim1_A,
- int dim2_AL, int dim2_AR)
-{
- int R_dim = (dim2_AR + dim2_AL);
- float* R = new float[R_dim * R_dim];
-
- int i, j;
-
- int RL_idx = 0;
- int R_idx = 0;
-
- for (i = 0; i < dim2_AL; i++)
- {
- for (j = 0; j <= i; j++)
- {
- RL_idx = i * dim1_A + j;
- R_idx = i * R_dim + j;
- R[R_idx] = RL[RL_idx];
- }
- }
- return R;
-}
-
-
-void Algorithm::update_R(float* R, float* topRr,
- float* botRr, int dim1, int dim2, int r)
-{
- int i, j, w;
- int max = dim1 * dim2;
- int rtr_idx = 0;
- int rbr_idx = 0;
- for (j = r; j > 0; j--)
- {
- for (i = max - dim1 * j; i < max - dim1 * j + dim2 - r; i++)
- {
- R[i] = topRr[rtr_idx];
- rtr_idx++;
- }
- w = i;
-
- for (i = w; i < w + r; i++)
- {
- R[i] = botRr[rbr_idx];
- rbr_idx++;
- }
- }
-}
-
-
-void Algorithm::build_S(float* S, float* Stl,
- float* Str, float* Sbr, int l, int r)
-{
- int Sidx;
- int p = l + r;
-
- for (int i = 0; i < l; i++)
- {
- Sidx = i * p;
- for (int j = 0; j <= i; j++)
- {
- S[Sidx] = Stl[j+i*l];
- Sidx++;
- }
- }
-
- for (int i = 0; i < r; i++)
- {
- Sidx = l*p + p*i;
- for (int j = 0; j < l; j++)
- {
- S[Sidx] = Str[j + i * l];
- Sidx++;
- }
- }
-
- for (int i = 0; i < r; i++)
- {
- Sidx = l*p + l + p*i;
- for (int j= 0; j <= i; j++)
- {
- S[Sidx] = Sbr[j + i * r];
- Sidx++;
- }
- }
-}
-
-
-void Algorithm::check_result(float* AL, float* AR,
- int rowsA, int colsA, int rhs, int colsAR,
- float* y, float* res,struct Settings params,int iX,int iiX, int jY, int jjY)
-{
- float* A = (float*)malloc(rowsA * colsA *
- sizeof(float));
-
- int i, ar_idx = 0;
- for (i = 0; i < rowsA * (colsA - colsAR); i++)
- {
- A[i] = AL[i];
- }
-
- for (i = rowsA * (colsA - colsAR); i < rowsA * colsA; i++)
- {
- A[i] = AR[ar_idx];
- ar_idx++;
+
+}
+
+
+Algorithm::~Algorithm()
+{
+ // dtor
+}
+
+
+void Algorithm::solve(struct Settings params, struct Outputs &out, int type)
+{
+ switch (type)
+ {
+ case P_NEQ_B_OPT_MD:
+ partialNEQ_Blocked_STL_MD(params, out);
+ break;
+
+ default:
+ break;
}
+}
- //matlab_print_matrix("A", rowsA, colsA, A);
-
-
- float* ynew = replicate_vec(y, rowsA*rhs);
- float* new_sol = (float*)malloc(colsA * rhs *
- sizeof(float));
-
- lapack_int info = LAPACKE_sgels(STORAGE_TYPE, 'N', rowsA, colsA, rhs, A,
- rowsA, ynew, rowsA);
+
+void Algorithm::extract_subMatrix(float* source, float* dest,
+ int dim1_source, int dim2_source,
+ int dim1_ini, int dim1_end, int dim2_ini,
+ int dim2_end)
+{
+ int i, j, idx = 0;
+ int size, source_ini;
+ for (i = dim2_ini; i < dim2_end; i++)
+ {
+ j = dim1_ini;
+ source_ini = i * dim1_source+j;
+ size = dim1_end - dim1_ini;
+ memcpy( (float*)&dest[idx],
+ (float*)&source[source_ini],
+ size * sizeof(float) );
+// for (j = dim1_ini; j<dim1_end; j++)
+// {
+// dest[idx] = source[i*dim1_source+j];
+// idx++;
+// }
+ idx += size;
+ }
+}
+
+
+void Algorithm::prepare_Bfinal(float* bfinal, float* bsource, int a_amount, int y_amount, int p)
+{
+// // memcpy are faster version of the fors
+// int i, k, w, top_idx, bot_idx;
+// int size;
+// top_idx = 0;
+// bot_idx = 0;
+// for (k = 0; k < dim2_b; k++)
+// {
+// size = k * dim1_b + (dim1_b - dim1_b_bot) - (k * dim1_b);
+// memcpy( (float*)&bfinal[k * dim1_b],
+// (float*)&top[top_idx],
+// size * sizeof(float) );
+//// for (i = k*dim1_b; i < k*dim1_b+(dim1_b-dim1_b_bot); i++)
+//// {
+//// bfinal[i] = top[top_idx];
+//// top_idx++;
+//// }
+// top_idx += size;
+// i = k * dim1_b + size;
+// w = i;
+//
+// size = w + dim1_b_bot - w;
+// memcpy( (float*)&bfinal[w],
+// (float*)&bot[bot_idx],
+// size * sizeof(float) );
+//// for (i = w; i < w+dim1_b_bot; i++)
+//// {
+//// bfinal[i] = bot[bot_idx];
+//// bot_idx++;
+//// }
+// bot_idx += size;
+// }
+}
+
+
+void Algorithm::prepare_QY(float* qy, float* top,
+ float* bot, int dim1_QY,
+ int dim2_QY, int dim1_qy_bot, int bot_blocks )
+{
+ int i, k, w, top_idx, bot_idx;
+ top_idx = 0;
+ bot_idx = 0;
+ for (k = 0; k < dim2_QY; k++)
+ {
+ for (i = k * dim1_QY; i < (k+1) * dim1_QY - dim1_qy_bot; i++)
+ {
+ qy[i] = top[top_idx];
+ top_idx++;
+ }
+ w = i;
+
+ for (i = w; i < w + dim1_qy_bot; i++)
+ {
+ qy[i] = bot[bot_idx];
+ bot_idx++;
+ }
+ bot_idx += (bot_blocks-1) * dim1_qy_bot;
+ }
+}
+
+
+float* Algorithm::extract_R(float* A, int dim1_A, int dim2_A)
+{
+ float* R = (float*)calloc(dim2_A * dim2_A,
+ sizeof(float));
+ int i, j;
+
+ int R_idx = 0;
+
+ for (i = 0; i < dim2_A; i++)
+ {
+ for (j = 0; j <= i; j++)
+ {
+ R[R_idx] = A[j + i * dim1_A];
+ R_idx++;
+ }
+ R_idx = dim2_A * (i+1);
+ }
+ return R;
+}
+
+
+float* Algorithm::prepare_R(float* RL, int dim1_A,
+ int dim2_AL, int dim2_AR)
+{
+ int R_dim = (dim2_AR + dim2_AL);
+ float* R = new float[R_dim * R_dim];
+
+ int i, j;
+
+ int RL_idx = 0;
+ int R_idx = 0;
+
+ for (i = 0; i < dim2_AL; i++)
+ {
+ for (j = 0; j <= i; j++)
+ {
+ RL_idx = i * dim1_A + j;
+ R_idx = i * R_dim + j;
+ R[R_idx] = RL[RL_idx];
+ }
+ }
+ return R;
+}
+
+
+void Algorithm::update_R(float* R, float* topRr,
+ float* botRr, int dim1, int dim2, int r)
+{
+ int i, j, w;
+ int max = dim1 * dim2;
+ int rtr_idx = 0;
+ int rbr_idx = 0;
+ for (j = r; j > 0; j--)
+ {
+ for (i = max - dim1 * j; i < max - dim1 * j + dim2 - r; i++)
+ {
+ R[i] = topRr[rtr_idx];
+ rtr_idx++;
+ }
+ w = i;
+
+ for (i = w; i < w + r; i++)
+ {
+ R[i] = botRr[rbr_idx];
+ rbr_idx++;
+ }
+ }
+}
+
+
+void Algorithm::build_S(float* S, float* Stl,
+ float* Str, float* Sbr, int l, int r)
+{
+ int Sidx;
+ int p = l + r;
+
+ for (int i = 0; i < l; i++)
+ {
+ Sidx = i * p;
+ for (int j = 0; j <= i; j++)
+ {
+ S[Sidx] = Stl[j+i*l];
+ Sidx++;
+ }
+ }
+
+ for (int i = 0; i < r; i++)
+ {
+ Sidx = l*p + p*i;
+ for (int j = 0; j < l; j++)
+ {
+ S[Sidx] = Str[j + i * l];
+ Sidx++;
+ }
+ }
+
+ for (int i = 0; i < r; i++)
+ {
+ Sidx = l*p + l + p*i;
+ for (int j= 0; j <= i; j++)
+ {
+ S[Sidx] = Sbr[j + i * r];
+ Sidx++;
+ }
+ }
+}
+
+
+void Algorithm::check_result(float* AL, float* AR,
+ int rowsA, int colsA, int rhs, int colsAR,
+ float* y, float* res,struct Settings params,int iX,int iiX, int jY, int jjY)
+{
+ float* A = (float*)malloc(rowsA * colsA *
+ sizeof(float));
+
+ int i, ar_idx = 0;
+ for (i = 0; i < rowsA * (colsA - colsAR); i++)
+ {
+ A[i] = AL[i];
+ }
+
+ for (i = rowsA * (colsA - colsAR); i < rowsA * colsA; i++)
+ {
+ A[i] = AR[ar_idx];
+ ar_idx++;
+ }
+
+ //matlab_print_matrix("A", rowsA, colsA, A);
+
+
+ float* ynew = replicate_vec(y, rowsA*rhs);
+ float* new_sol = (float*)malloc(colsA * rhs *
+ sizeof(float));
+
+ lapack_int info = LAPACKE_sgels(STORAGE_TYPE, 'N', rowsA, colsA, rhs, A,
+ rowsA, ynew, rowsA);
myassert(info == 0, "Error Check");
-
-
-
- int index = 0;
- int index_new = 0;
- for (i = 0; i < rhs; i++)
- {
- copy_vec(&ynew[index], &new_sol[index_new], colsA);
- index += rowsA;
- index_new += colsA;
+
+
+
+ int index = 0;
+ int index_new = 0;
+ for (i = 0; i < rhs; i++)
+ {
+ copy_vec(&ynew[index], &new_sol[index_new], colsA);
+ index += rowsA;
+ index_new += colsA;
}
float* precomp_betas = new float[colsA];
@@ -271,10 +271,10 @@
if(!params.use_fake_files)
{
//cout << ".";
- FILE* fp_Bprecomputed = fopen("examples/bpre.fvd", "rb");
- if(fp_Bprecomputed == 0)
- {
- cout << "Error Reading File of precomputed B values " << endl;
+ FILE* fp_Bprecomputed = fopen("examples/bpre.fvd", "rb");
+ if(fp_Bprecomputed == 0)
+ {
+ cout << "Error Reading File of precomputed B values " << endl;
}
else
{
@@ -287,19 +287,19 @@
size_t result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
//matlab_print_matrix("res", colsA, 1, res);
- //matlab_print_matrix("new_sol", colsA, 1, new_sol);
+ //matlab_print_matrix("new_sol", colsA, 1, new_sol);
//matlab_print_matrix("precomp_betas", colsA, 1, precomp_betas);
- cblas_saxpy(colsA, -1.0, res, 1, precomp_betas, 1);
+ cblas_saxpy(colsA, -1.0, res, 1, precomp_betas, 1);
float u_norm = cblas_snrm2(colsA, precomp_betas, 1);
//cout << "pa:"<< u_norm ;
- if (fabs(u_norm) > 0.001 || isnan(u_norm))
+ if (fabs(u_norm) > 0.001 || isnan(u_norm))
{
fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
- cblas_saxpy(colsA, 1.0, res, 1, precomp_betas, 1);
+ cblas_saxpy(colsA, 1.0, res, 1, precomp_betas, 1);
float u_norm2 = cblas_snrm2(colsA, precomp_betas, 1);
//cout << u_norm2 << " ";
//cout << file_pos <<"c "<< u_norm2 << "\n";
@@ -307,25 +307,25 @@
if(fabs(u_norm2) > 0.001 || isnan(u_norm))
{
fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
- result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
+ result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
fflush(stdout);
-// matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
-// matlab_print_matrix("AR", rowsA, colsAR, AR);
-// matlab_print_matrix("Y", rowsA, rhs, y);
- matlab_print_matrix("res", colsA, 1, res);
+// matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+// matlab_print_matrix("AR", rowsA, colsAR, AR);
+// matlab_print_matrix("Y", rowsA, rhs, y);
+ matlab_print_matrix("res", colsA, 1, res);
matlab_print_matrix("precomp_betas", colsA, 1, precomp_betas);
cout << file_pos <<" "<< u_norm << "\n";
- cout << file_pos <<" "<< u_norm2 << "\n";
- //printf("\n%%\tBeta computed nrom: %0.2g\n", u_norm2);
+ cout << file_pos <<" "<< u_norm2 << "\n";
+ //printf("\n%%\tBeta computed nrom: %0.2g\n", u_norm2);
exit(1);
}
-
+
}
fseek ( fp_Bprecomputed , file_pos*sizeof(float) , SEEK_SET );
result = fread (precomp_betas,sizeof(float),colsA,fp_Bprecomputed); result++;
- cblas_saxpy(colsA, -1.0, new_sol, 1, precomp_betas, 1);
+ cblas_saxpy(colsA, -1.0, new_sol, 1, precomp_betas, 1);
u_norm = cblas_snrm2(colsA, precomp_betas, 1);
//cout << " pl:" << u_norm ;
if (fabs(u_norm) > 0.001 || isnan(u_norm))
@@ -339,55 +339,55 @@
fclose(fp_Bprecomputed);
}
//else
- {
-
-
- // if (PRINT)
- // printf("\n Btop=(Rtl\\(Ql'*Y))-(Rtl\\Rtr)*(Rbr\\(Qr'*Y)); \n [Btop ; Rbr\\Qr'*Y] - bcomputed \n");
-
- cblas_saxpy(rhs * colsA, -1.0, res, 1, new_sol, 1);
- float u_norm = cblas_snrm2(rhs * colsA, new_sol, 1);
- //
- if (fabs(u_norm) >= 0.1 || isnan(u_norm))
- {
- fflush(stdout);
- // matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+ {
+
+
+ // if (PRINT)
+ // printf("\n Btop=(Rtl\\(Ql'*Y))-(Rtl\\Rtr)*(Rbr\\(Qr'*Y)); \n [Btop ; Rbr\\Qr'*Y] - bcomputed \n");
+
+ cblas_saxpy(rhs * colsA, -1.0, res, 1, new_sol, 1);
+ float u_norm = cblas_snrm2(rhs * colsA, new_sol, 1);
+ //
+ if (fabs(u_norm) >= 0.1 || isnan(u_norm))
+ {
+ fflush(stdout);
+ // matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
//matlab_print_matrix("AR", rowsA, colsAR, AR);
- //matlab_print_matrix("A", rowsA, colsA, A);
- //matlab_print_matrix("Y", rowsA, rhs, y);
-// printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr = R\\(Q'*Y)\n");
- matlab_print_matrix("bcomputed", colsA, rhs, res);
- matlab_print_matrix("newsol", colsA, rhs, ynew);
- printf("\n%%\tnrom: %0.5g\n\n", u_norm);
- exit(1);
- }
- else
- {
- //matlab_print_matrix("bcomputed", colsA, rhs, res);
- //matlab_print_matrix("newsol", colsA, rhs, ynew);
+ //matlab_print_matrix("A", rowsA, colsA, A);
+ //matlab_print_matrix("Y", rowsA, rhs, y);
+// printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr = R\\(Q'*Y)\n");
+ matlab_print_matrix("bcomputed", colsA, rhs, res);
+ matlab_print_matrix("newsol", colsA, rhs, ynew);
+ printf("\n%%\tnrom: %0.5g\n\n", u_norm);
+ exit(1);
+ }
+ else
+ {
+ //matlab_print_matrix("bcomputed", colsA, rhs, res);
+ //matlab_print_matrix("newsol", colsA, rhs, ynew);
//printf("%%%0.3g \n", u_norm);
- //matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
+ //matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
// matlab_print_matrix("AR", rowsA, colsAR, AR);
-
+
//matlab_print_matrix("Y", rowsA, rhs, y);
- //cout << " la:" << u_norm << "\n";
+ //cout << " la:" << u_norm << "\n";
}
- }
-
-
- // cout << "\t**************";
- free(ynew);
- free(new_sol);
+ }
+
+
+ // cout << "\t**************";
+ free(ynew);
+ free(new_sol);
free(A);
- delete []precomp_betas;
-}
-
+ delete []precomp_betas;
+}
+
void Algorithm::applyDefaultParams(struct Settings ¶ms)
{
@@ -424,22 +424,22 @@
}
-///////////////////////////////
-
-
-void Algorithm::partialNEQ_Blocked_STL_MD(struct Settings params,
- struct Outputs &out)
-{
-
-
-
- srand(time(NULL));
-
+///////////////////////////////
-
- //float *Ytemp;
- lapack_int info, n, lda, l, r, p;
-
+
+void Algorithm::partialNEQ_Blocked_STL_MD(struct Settings params,
+ struct Outputs &out)
+{
+
+
+
+ srand(time(NULL));
+
+
+
+ //float *Ytemp;
+ lapack_int info, n, lda, l, r, p;
+
cputime_type start_tick, start_tick2, start_tick3, end_tick;
@@ -453,10 +453,10 @@
AIOfile.initialize(params);//THIS HAS TO BE DONE FIRST! ALWAYS
- total_results = 0;
+ total_results = 0;
//cout << params.n << "\n";
- //parameters read on AIOfile.initialize are then copied localy
+ //parameters read on AIOfile.initialize are then copied localy
n = params.n; l = params.l; r = params.r; p = l+r;
disp_cov = params.disp_cov;
storePInd = params.storePInd;
@@ -471,58 +471,58 @@
max_threads = params.threads;
blas_set_num_threads(max_threads);
omp_set_num_threads(max_threads);
-
-
- int y_amount = params.t;
- int y_block_size = params.tb;
+
+
+ int y_amount = params.t;
+ int y_block_size = params.tb;
//int orig_y_block_size = y_block_size;
- //cout << "yt:"<< y_amount << " oybz:"<<y_block_size << flush;
-
- int a_amount = params.m;
- int a_block_size = params.mb;
+ //cout << "yt:"<< y_amount << " oybz:"<<y_block_size << flush;
+
+ int a_amount = params.m;
+ int a_block_size = params.mb;
//int orig_a_block_size = a_block_size;
//cout << r << endl;
-
-
- int a_iters = (a_amount + a_block_size - 1) / a_block_size;
-
+
+
+ int a_iters = (a_amount + a_block_size - 1) / a_block_size;
+
int y_iters = (y_amount + y_block_size - 1) / y_block_size;
- //cout << "yiters:" << y_iters << " aiters:" << a_iters << endl;
-
-
- lda = n;
+ //cout << "yiters:" << y_iters << " aiters:" << a_iters << endl;
+
+
+ lda = n;
if(!params.ForceCheck && params.mpi_id == 0)
- {
+ {
cout << endl;
}
-
- for (int j = 0; j < y_iters && !params.ForceCheck && params.mpi_id == 0; j++)
- {
- if (!params.ForceCheck && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
- {
- cout << "*" << flush;
- }
-
- for (int i = 0; i < a_iters; i++)
- {
-
+
+ for (int j = 0; j < y_iters && !params.ForceCheck && params.mpi_id == 0; j++)
+ {
+ if (!params.ForceCheck && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
+ {
+ cout << "*" << flush;
+ }
+
+ for (int i = 0; i < a_iters; i++)
+ {
+
if ( !params.ForceCheck && y_iters <= 2 &&
- ( (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
- {
- cout << "*" << flush;
- }
-
- }
+ ( (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
+ {
+ cout << "*" << flush;
+ }
+
+ }
}
- if(!params.ForceCheck && params.mpi_id == 0)
- cout << endl;
-
+ if(!params.ForceCheck && params.mpi_id == 0)
+ cout << endl;
+
//add memalign
-
- //float Stl[l*l];
+
+ //float Stl[l*l];
//float Str[l*r*a_block_size];
float* Stl = new float[l*l*1];
float* Str = new float[l*r*a_block_size*1];
@@ -535,14 +535,14 @@
float* Sbr = new float[r * r * a_block_size];
float* Ay = new float[p * a_block_size * y_block_size];
- //float* B_resorted = new float[p * a_block_size*y_block_size];
+ //float* B_resorted = new float[p * a_block_size*y_block_size];
//float* S = new float[p * p];
float* S2global = new float[p*p*max_threads];
-
-
- float* Ay_top = new float[l * y_amount];
+
+
+ float* Ay_top = new float[l * y_amount];
float* Ay_bot = new float[y_block_size * a_block_size * r];
float* y_residual = new float[n * y_block_size ];
@@ -554,9 +554,9 @@
int* Ymiss=new int[y_block_size];
std::map <int , list < int > > y_missings_jj;
-
- float* A = new float[n * p * 1];
+
+ float* A = new float[n * p * 1];
float* AR;// = new float[n * r * a_block_size * 1];
//Sum of squares for A parts
@@ -573,18 +573,18 @@
list < resultH >* sigResults;
-
+
// float* AL = new float[n * l * 1];
float* AL = A;
float* B = Ay;
-
- float* backupAR; // = new float[n*r*a_block_size];
- float* backupAL; // = new float[n*l];
-
-
- AIOfile.load_AL(&backupAL);
+ float* backupAR; // = new float[n*r*a_block_size];
+ float* backupAL; // = new float[n*l];
+
+
+ AIOfile.load_AL(&backupAL);
+
//pthread_barrier_wait(&(AIOfile.Fhandler->finalize_barrier));
//matlab_print_matrix("AL", n,l,backupAL);
@@ -601,16 +601,16 @@
//LAPACKE_dgesdd()
-
+
copy_vec(backupAL, AL, n*l);
get_ticks(start_tick2);
-
- //! Generate Stl
- cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
+
+ //! Generate Stl
+ cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
l, n, 1.0, AL, lda, 0.0, Stl, l);
- get_ticks(end_tick);
+ get_ticks(end_tick);
out.acc_stl += ticks2sec(end_tick,start_tick2);
@@ -646,39 +646,39 @@
delete []vt;
delete []superb;
-
-
- float* Y;
-
+
+
+ float* Y;
+
// printf("\n\n%%Computations\n%%");
- copy_vec(backupAL, AL, n*l);
-
-
- get_ticks(start_tick);
-
- for (int j = 0; j < y_iters; j++)
- {
- if (!params.ForceCheck && params.mpi_id == 0 && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
- {
- cout << AIOfile.io_overhead << flush;
- AIOfile.io_overhead = "*";
- }
+ copy_vec(backupAL, AL, n*l);
+
+ get_ticks(start_tick);
+
+ for (int j = 0; j < y_iters; j++)
+ {
+ if (!params.ForceCheck && params.mpi_id == 0 && ((y_iters < 10 && y_iters > 2 )|| (y_iters >= 10 && (j%(y_iters/10)) == 0 )) )
+ {
+ cout << AIOfile.io_overhead << flush;
+ AIOfile.io_overhead = "*";
+ }
+
get_ticks(start_tick2);
-
+
AIOfile.load_Yblock(&Y, y_block_size);
//cout << "ybz:"<< y_block_size << " " << flush;
- get_ticks(end_tick);
- out.acc_loady += ticks2sec(end_tick,start_tick2);
-
+ get_ticks(end_tick);
+ out.acc_loady += ticks2sec(end_tick,start_tick2);
+
get_ticks(start_tick2);
replace_nans(&y_nan_idxs[0],y_block_size, Y, n,1);
sumSquares(Y,y_block_size,n,ssY,y_nan_idxs);
- for (int jj = 0; jj < y_block_size; jj++)
+ for (int jj = 0; jj < y_block_size; jj++)
{
// for (list< long int >::iterator it=y_nan_idxs[jj].begin(); it != y_nan_idxs[jj].end(); ++it)
// {
@@ -695,38 +695,38 @@
//matlab_print_matrix("Y", n, y_block_size, Y);
get_ticks(end_tick);
out.acc_other += ticks2sec(end_tick,start_tick2);
-
-
+
+
get_ticks(start_tick2);
-
- //! Ay_top = AL'*Y
- cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
- l, y_block_size, n, 1.0, AL, n, Y, n, 0.0,
+
+ //! Ay_top = AL'*Y
+ cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+ l, y_block_size, n, 1.0, AL, n, Y, n, 0.0,
&Ay_top[j * l * y_block_size], l);
- get_ticks(end_tick);
+ get_ticks(end_tick);
out.acc_gemm += ticks2sec(end_tick,start_tick2);
get_ticks(start_tick2);
generate_correction_missings_Stl(y_block_size,1,1,n,l,l,AL,AL,Stl_corr,y_nan_idxs);
- get_ticks(end_tick);
- out.acc_scorrect += ticks2sec(end_tick,start_tick2);
-
-
- for (int i = 0; i < a_iters; i++)
+ get_ticks(end_tick);
+ out.acc_scorrect += ticks2sec(end_tick,start_tick2);
+
+
+ for (int i = 0; i < a_iters; i++)
{
if (!params.ForceCheck && y_iters <= 2 && params.mpi_id == 0 &&
( (a_iters >= 10 && (i%(a_iters/(10))) == 0) || (a_iters < (10)) ))
- {
- cout << AIOfile.io_overhead << flush;
- AIOfile.io_overhead = "*";
+ {
+ cout << AIOfile.io_overhead << flush;
+ AIOfile.io_overhead = "*";
}
get_ticks(start_tick2);
- //cout << "^" << flush;
+ //cout << "^" << flush;
AIOfile.load_ARblock(&backupAR, a_block_size);
// cout << "^" << endl << flush;
@@ -741,43 +741,43 @@
replace_nans_avgs(a_block_size, backupAR, n, r, ar_nan_idxs);
- get_ticks(end_tick);
+ get_ticks(end_tick);
out.acc_other += ticks2sec(end_tick,start_tick2);
AR = backupAR;
- get_ticks(start_tick2);
-
- //! Generate Str
- cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
- l, r * a_block_size, n, 1.0, AL,
+ get_ticks(start_tick2);
+
+ //! Generate Str
+ cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+ l, r * a_block_size, n, 1.0, AL,
n, AR, n, 0.0, Str, l);//!
-
- get_ticks(end_tick);
+
+ get_ticks(end_tick);
out.acc_str += ticks2sec(end_tick,start_tick2);
get_ticks(start_tick2);
- blas_set_num_threads(1);
+ blas_set_num_threads(1);
omp_set_num_threads(max_threads);
#pragma omp parallel default(shared)
{
-
+
#pragma omp for nowait
- for (int ii= 0; ii < a_block_size; ii++)
- {
- //! Generate Sbr
- cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
- r, r, n, 1.0, &AR[ii*r*n], n,
- &AR[ii * r * n], n, 0.0,
+ for (int ii= 0; ii < a_block_size; ii++)
+ {
+ //! Generate Sbr
+ cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+ r, r, n, 1.0, &AR[ii*r*n], n,
+ &AR[ii * r * n], n, 0.0,
&Sbr[ii * r * r], r);
float* S2 = &S2global[p*p*omp_get_thread_num()];
-
- //! build S for X'X-1 for fake "variance of x"
+
+ //! build S for X'X-1 for fake "variance of x"
build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
@@ -798,17 +798,17 @@
}
blas_set_num_threads(max_threads);
-
- get_ticks(end_tick);
+
+ get_ticks(end_tick);
out.acc_sbr += ticks2sec(end_tick,start_tick2 );
get_ticks(start_tick2);
generate_correction_missings_Str(y_block_size,a_block_size,n,l,r,AL,AR,Str_corr,Sbr_corr,y_nan_idxs);
- get_ticks(end_tick);
+ get_ticks(end_tick);
out.acc_scorrect += ticks2sec(end_tick,start_tick2);
@@ -819,53 +819,53 @@
//matlab_print_matrix("ARb",n,a_block_size*r,backupAR);
-
-
+
+
get_ticks(start_tick2);
-
- //! Ay_bot = AR'*Y
- cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
- r * a_block_size, y_block_size, n, 1.0, AR, n, Y, n,
- 0.0, Ay_bot, r * a_block_size);
-
- get_ticks(end_tick);
+
+ //! Ay_bot = AR'*Y
+ cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
+ r * a_block_size, y_block_size, n, 1.0, AR, n, Y, n,
+ 0.0, Ay_bot, r * a_block_size);
+
+ get_ticks(end_tick);
out.acc_gemm += ticks2sec(end_tick,start_tick2);
-
- get_ticks(start_tick3);
- for (int jj = 0; jj < y_block_size; jj++)
+
+ get_ticks(start_tick3);
+ for (int jj = 0; jj < y_block_size; jj++)
{
//int thread_id = 0 * omp_get_thread_num();//so far singel thread version only
- //cout << "y_block_size:" << y_block_size << " a_block_size:" << a_block_size <<" r:" << r << "\n";
+ //cout << "y_block_size:" << y_block_size << " a_block_size:" << a_block_size <<" r:" << r << "\n";
- blas_set_num_threads(1);
+ blas_set_num_threads(1);
omp_set_num_threads(max_threads);
#pragma omp parallel default(shared)
{
-
- #pragma omp for nowait
- for (int ii= 0; ii < a_block_size; ii++)
+
+ #pragma omp for nowait
+ for (int ii= 0; ii < a_block_size; ii++)
{
// //cout << omp_get_thread_num() << endl << flush;
cputime_type start_tick2;
get_ticks(start_tick2);
-
- copy_vec(&Ay_top[j*l*y_block_size+jj*l], &Ay[jj*p*a_block_size+ ii*p], l);
- copy_vec(&Ay_bot[ii*r + jj*r*a_block_size],
+
+ copy_vec(&Ay_top[j*l*y_block_size+jj*l], &Ay[jj*p*a_block_size+ ii*p], l);
+ copy_vec(&Ay_bot[ii*r + jj*r*a_block_size],
&Ay[jj*p*a_block_size+l+ii*p], r);
float* S2 = &S2global[p*p*omp_get_thread_num()];
-
- //! Rebuild S
- build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
+ //! Rebuild S
+ build_S(S2, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
+
if(omp_get_thread_num()==0)
{
- get_ticks(end_tick);//5%
+ get_ticks(end_tick);//5%
out.acc_other += ticks2sec(end_tick,start_tick2);
}
@@ -875,41 +875,41 @@
if(omp_get_thread_num()==0)
{
- get_ticks(end_tick);
+ get_ticks(end_tick);
out.acc_inner_scorrect += ticks2sec(end_tick,start_tick2);
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1850
More information about the Genabel-commits
mailing list