[Genabel-commits] r1479 - in pkg/OmicABELnoMM: . src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 17 21:49:17 CET 2013
Author: lckarssen
Date: 2013-12-17 21:49:16 +0100 (Tue, 17 Dec 2013)
New Revision: 1479
Modified:
pkg/OmicABELnoMM/
pkg/OmicABELnoMM/configure.ac
pkg/OmicABELnoMM/src/
pkg/OmicABELnoMM/src/Algorithm.cpp
Log:
In OmicABELnoMM:
- Set some more svn:ignore properties for files/dirs in ./ and ./src
- src/Algorithm.cpp: more whitespace clean up; no functional changes
- configure.ac:
+ Added check for openMP (and add -lopenmp to the CXXFLAGS
+ Check for pthread lib
+ Lower minimum version of autoconf from 2.69 to 2.67, to make sure autoreconf runs on the current Jenkins server as well (Ubuntu 12.04)
+ Set C++ as the default language
+ Set some default CXXFLAGS and CPPFLAGS
Property changes on: pkg/OmicABELnoMM
___________________________________________________________________
Modified: svn:ignore
- Makefile.in
configure
aclocal.m4
autom4te.cache
autoscan.log
config.h.in
.deps
install-sh
missing
depcomp
+ Makefile.in
Makefile
configure
aclocal.m4
autom4te.cache
autoscan.log
config.h.in
.deps
install-sh
missing
depcomp
config.log
config.status
Modified: pkg/OmicABELnoMM/configure.ac
===================================================================
--- pkg/OmicABELnoMM/configure.ac 2013-12-17 16:41:04 UTC (rev 1478)
+++ pkg/OmicABELnoMM/configure.ac 2013-12-17 20:49:16 UTC (rev 1479)
@@ -1,7 +1,7 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
-AC_PREREQ([2.69])
+AC_PREREQ([2.67])
AC_INIT([OmicABELnoMM], [0.1.0], [genabel-devel at r-forge.wu-wien.ac.at])
AM_INIT_AUTOMAKE([silent-rules subdir-objects])
AM_SILENT_RULES([yes])
@@ -14,12 +14,37 @@
# change it here to keep the old behaviour by default.
AM_MAINTAINER_MODE([enable])
+# Set some default compile flags
+if test -z "$CXXFLAGS"; then
+ # User did not set CXXFLAGS, so we can put in our own defaults
+ CXXFLAGS="-g -O2"
+fi
+if test -z "$CPPFLAGS"; then
+ # User did not set CPPFLAGS, so we can put in our own defaults
+ CPPFLAGS="-Wall"
+fi
+# If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
+# with its own defaults
+
+
# Checks for programs.
+AC_PROG_CC
AC_PROG_CXX
-AC_PROG_CC
+# Since our code is in C++, set that language as the default for the
+# subsequent checks
+AC_LANG_PUSH([C++])
+
# Checks for libraries.
+# pthread library
+AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
+ AC_MSG_ERROR([Unable to find the pthread_mutex_init() function])
+])
+# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
+AC_OPENMP
+AC_SUBST(AM_CXXFLAGS, "$OPENMP_CXXFLAGS")
+
# Checks for header files.
AC_CHECK_HEADERS([limits.h stdlib.h string.h sys/time.h unistd.h])
Property changes on: pkg/OmicABELnoMM/src
___________________________________________________________________
Modified: svn:ignore
- config.h.in
+ config.h.in
.deps
.dirstamp
stamp-h1
Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp 2013-12-17 16:41:04 UTC (rev 1478)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp 2013-12-17 20:49:16 UTC (rev 1479)
@@ -2,13 +2,13 @@
Algorithm::Algorithm()
{
- //ctor
+ // ctor
}
Algorithm::~Algorithm()
{
- //dtor
+ // dtor
}
@@ -16,32 +16,31 @@
{
switch (type)
{
- case FULL_NEQ:
- fullNEQ(params, out);
- break;
- case P_NEQ:
- partialNEQ(params, out);
- break;
- case P_NEQ_B_OPT:
- partialNEQ_Blocked_STL(params, out);
- break;
- case FULL_QR:
- fullQR(params, out);
- break;
- case P_QR:
- partialQR(params, out);
- break;
- case P_QR_B_OPT:
- partialQR_Blocked_Rtl(params, out);
- break;
- case P_NEQ_B_OPT_MD:
- partialNEQ_Blocked_STL_MD(params, out);
- break;
+ case FULL_NEQ:
+ fullNEQ(params, out);
+ break;
+ case P_NEQ:
+ partialNEQ(params, out);
+ break;
+ case P_NEQ_B_OPT:
+ partialNEQ_Blocked_STL(params, out);
+ break;
+ case FULL_QR:
+ fullQR(params, out);
+ break;
+ case P_QR:
+ partialQR(params, out);
+ break;
+ case P_QR_B_OPT:
+ partialQR_Blocked_Rtl(params, out);
+ break;
+ case P_NEQ_B_OPT_MD:
+ partialNEQ_Blocked_STL_MD(params, out);
+ break;
- default:
- break;
+ default:
+ break;
}
-
}
@@ -51,13 +50,13 @@
int dim2_end)
{
- int i, j, idx=0;
+ int i, j, idx = 0;
int size, source_ini;
- for (i = dim2_ini; i<dim2_end; i++)
+ for (i = dim2_ini; i < dim2_end; i++)
{
j = dim1_ini;
- source_ini = i*dim1_source+j;
- size = dim1_end-dim1_ini;
+ source_ini = i * dim1_source+j;
+ size = dim1_end - dim1_ini;
memcpy( (type_precision*)&dest[idx],
(type_precision*)&source[source_ini],
size * sizeof(type_precision) );
@@ -75,15 +74,15 @@
type_precision* bot, int dim1_b,
int dim2_b, int dim1_b_bot)
{
- //memcpy are faster version of the fors
+ // memcpy are faster version of the fors
int i, k, w, top_idx, bot_idx, max = dim1_b*dim2_b;
int size;
top_idx = 0;
bot_idx = 0;
for (k = 0; k < dim2_b; k++)
{
- size = k*dim1_b+(dim1_b-dim1_b_bot)-(k*dim1_b);
- memcpy( (type_precision*)&bfinal[k*dim1_b],
+ size = k * dim1_b + (dim1_b - dim1_b_bot) - (k * dim1_b);
+ memcpy( (type_precision*)&bfinal[k * dim1_b],
(type_precision*)&top[top_idx],
size * sizeof(type_precision) );
// for (i = k*dim1_b; i < k*dim1_b+(dim1_b-dim1_b_bot); i++)
@@ -92,7 +91,7 @@
// top_idx++;
// }
top_idx += size;
- i = k*dim1_b + size;
+ i = k * dim1_b + size;
w = i;
size = w + dim1_b_bot - w;
@@ -119,14 +118,14 @@
bot_idx = 0;
for (k = 0; k < dim2_QY; k++)
{
- for (i = k*dim1_QY; i < (k+1)*dim1_QY-dim1_qy_bot; i++)
+ for (i = k * dim1_QY; i < (k+1) * dim1_QY - dim1_qy_bot; i++)
{
qy[i] = top[top_idx];
top_idx++;
}
w = i;
- for (i = w; i < w+dim1_qy_bot; i++)
+ for (i = w; i < w + dim1_qy_bot; i++)
{
qy[i] = bot[bot_idx];
bot_idx++;
@@ -190,14 +189,14 @@
int rbr_idx = 0;
for (j = r; j > 0; j--)
{
- for (i = max-dim1*j; i < max-dim1*j+dim2-r; i++)
+ for (i = max - dim1 * j; i < max - dim1 * j + dim2 - r; i++)
{
R[i] = topRr[rtr_idx];
rtr_idx++;
}
w = i;
- for (i = w; i < w+r; i++)
+ for (i = w; i < w + r; i++)
{
R[i] = botRr[rbr_idx];
rbr_idx++;
@@ -214,7 +213,7 @@
for (int i = 0; i < l; i++)
{
- Sidx = i*p;
+ Sidx = i * p;
for (int j = 0; j <= i; j++)
{
S[Sidx] = Stl[j+i*l];
@@ -227,7 +226,7 @@
Sidx = l*p + p*i;
for (int j = 0; j < l; j++)
{
- S[Sidx] = Str[j+i*l];
+ S[Sidx] = Str[j + i * l];
Sidx++;
}
}
@@ -237,7 +236,7 @@
Sidx = l*p + l + p*i;
for (int j= 0; j <= i; j++)
{
- S[Sidx] = Sbr[j+i*r];
+ S[Sidx] = Sbr[j + i * r];
Sidx++;
}
}
@@ -252,12 +251,12 @@
sizeof(type_precision));
int i, ar_idx = 0;
- for (i = 0; i < rowsA*(colsA-colsAR); i++)
+ for (i = 0; i < rowsA * (colsA - colsAR); i++)
{
A[i] = AL[i];
}
- for (i = rowsA*(colsA-colsAR); i < rowsA*colsA; i++)
+ for (i = rowsA * (colsA - colsAR); i < rowsA * colsA; i++)
{
A[i] = AR[ar_idx];
ar_idx++;
@@ -295,7 +294,7 @@
matlab_print_matrix("AL", rowsA, colsA-colsAR, AL);
matlab_print_matrix("AR", rowsA, colsAR, AR);
matlab_print_matrix("Y", rowsA, rhs, y);
- printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr=R\\(Q'*Y)\n");
+ printf("\nA = [AL AR]; [Q, R] = qr(A, 0); rr = R\\(Q'*Y)\n");
matlab_print_matrix("bcomputed", colsA, rhs, res);
matlab_print_matrix("newsol", colsA, rhs, ynew);
printf("\n%%\tnrom: %0.2g", u_norm);
@@ -309,7 +308,7 @@
}
- //cout << "\t**************";
+ // cout << "\t**************";
free(ynew);
free(new_sol);
free(A);
@@ -339,10 +338,10 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.t;
- int y_block_size = params.tb;//kk
+ int y_block_size = params.tb; // kk
int a_amount = params.m;
int a_block_size = params.mb;
@@ -398,7 +397,7 @@
type_precision* Y;
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
get_ticks(start_tick);
@@ -464,7 +463,7 @@
copy_vec(backupAL,&AL[aL_idx], n * l);
replace_with_zeros(&y_nan_idxs[jj], &AL[aL_idx], n, l, 1);
- //!Generate Stl
+ //! Generate Stl
cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
l, n, 1.0,&AL[aL_idx], lda, 0.0, Stl, l);
@@ -473,7 +472,7 @@
n, r, a_block_size);
get_ticks(start_tick2);
- //!Generate Str
+ //! Generate Str
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, r * a_block_size, n, 1.0, &AL[aL_idx],
n,&AR[aR_idx], n, 0.0, Str, l);
@@ -489,7 +488,7 @@
for (int ii= 0; ii < a_block_size; ii++)
{
get_ticks(start_tick2);
- //!Generate Sbr
+ //! Generate Sbr
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
r, r, n, 1.0, &AR[aR_idx+ii*r*n], n,
&AR[aR_idx + ii * r * n], n, 0.0,
@@ -508,9 +507,9 @@
type_precision* B = Ay;
type_precision S[p * p];
- //!Rebuild S
+ //! Rebuild S
build_S(S, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
- //matlab_print_matrix("S", p, p, S);
+ // matlab_print_matrix("S", p, p, S);
get_ticks(end_tick);
out.acc_b += ticks2sec(end_tick - start_tick2,
@@ -518,7 +517,7 @@
get_ticks(start_tick2);
- //!b=S\Ay
+ //! b = S\Ay
info = LAPACKE_sposv(STORAGE_TYPE,'U', p, 1, S, p,
&Ay[ii*p], p);
@@ -546,7 +545,7 @@
get_ticks(end_tick);
out.duration = ticks2sec(end_tick - start_tick, cpu_freq);
- //out.gflops = a_amount/1000.0*(n*l*r+n*r*r+y_amount*(n*r+p*p*p)))/1000.0/1000.0;
+ // out.gflops = a_amount/1000.0*(n*l*r+n*r*r+y_amount*(n*r+p*p*p)))/1000.0/1000.0;
out.gflops = y_amount * (gemm_flops(l, n, 1, 0) +
a_amount * (gemm_flops(r, n, 1, 0) +
gemm_flops(l, n, l, 0) +
@@ -561,8 +560,8 @@
delete []Ay_bot;
delete []AR;
delete []AL;
- //delete []backupAL;
- //delete []backupAR;
+ // delete []backupAL;
+ // delete []backupAR;
}
@@ -589,10 +588,10 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.m;
- int y_block_size = params.mb; //kk
+ int y_block_size = params.mb; // kk
int a_amount = params.t;
int a_block_size = params.tb;
@@ -628,13 +627,13 @@
type_precision* Y;
type_precision* backupAR;
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
get_ticks(start_tick);
- //!Generate S
+ //! Generate S
cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
l, n, 1.0, backupAL, lda, 0.0, Stl, l);
@@ -642,7 +641,7 @@
{
AIOfile.load_Yblock(&Y, y_block_size);
- //! Ay_top=AL'*Y
+ //! Ay_top = AL'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, y_block_size, n, 1.0, AL, n, Y, n, 0.0,
&Ay_top[j * l * y_block_size], l);
@@ -661,8 +660,8 @@
AIOfile.reset_Y();
copy_vec(backupAR, AR, n*r*a_block_size);
- //matlab_print_matrix("A", n, p, A);
- //!Generate Str
+ // matlab_print_matrix("A", n, p, A);
+ //! Generate Str
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, r * a_block_size, n, 1.0, AL, n, AR, n, 0.0, Str, l);
@@ -671,7 +670,7 @@
#pragma omp parallel for default(shared) schedule(static)
for (int ii= 0; ii < a_block_size; ii++)
{
- //!Generate Sbr
+ //! Generate Sbr
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
r, r, n, 1.0, &AR[ii*r*n], n, &AR[ii*r*n],
n, 0.0, &Sbr[ii*r*r], r);
@@ -691,7 +690,7 @@
AIOfile.load_Yblock(&Y, y_block_size);
- //!Ay_bot=AR'*Y
+ //! Ay_bot = AR'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
r * a_block_size, y_block_size, n, 1.0, AR, n, Y, n,
0.0, Ay_bot, r * a_block_size);
@@ -699,9 +698,9 @@
#pragma omp parallel for private(S, Ay) default(shared) schedule(static)
for (int ii= 0; ii < a_block_size; ii++)
{
- //matlab_print_matrix("Y", n, y_block_size, Y);
+ // matlab_print_matrix("Y", n, y_block_size, Y);
- //!Rebuild AY
+ //! Rebuild AY
for (int jj = 0; jj < y_block_size; jj++)
{
@@ -710,18 +709,18 @@
}
type_precision* B = Ay;
- //matlab_print_matrix("Ay_top", l, y_block_size,&Ay_top[j*l*y_block_size]);
- //matlab_print_matrix("Ay_bot", r, y_block_size, Ay_bot);
- //matlab_print_matrix("Ay", p, y_block_size, Ay);
+ // matlab_print_matrix("Ay_top", l, y_block_size,&Ay_top[j*l*y_block_size]);
+ // matlab_print_matrix("Ay_bot", r, y_block_size, Ay_bot);
+ // matlab_print_matrix("Ay", p, y_block_size, Ay);
- //!Rebuild S
+ //! Rebuild S
build_S(S, Stl, &Str[ii*r*l], &Sbr[ii*r*r], l, r);
- //matlab_print_matrix("S", p, p, S);
+ // matlab_print_matrix("S", p, p, S);
- //!b=S\Ay
+ //! b = S\Ay
info = LAPACKE_sposv(STORAGE_TYPE, 'U', p, y_block_size,
S, p, Ay, p);
- //assert(info == 0,"POSV");
+ // assert(info == 0,"POSV");
if (ForceCheck)
@@ -734,8 +733,8 @@
}
get_ticks(end_tick);
- out.duration = ticks2sec(end_tick-start_tick, cpu_freq);
- //out.gflops = a_amount/1000.0*(n*l*r+n*r*r+y_amount*(n*r+p*p*p)))/1000.0/1000.0;
+ out.duration = ticks2sec(end_tick - start_tick, cpu_freq);
+ // out.gflops = a_amount/1000.0*(n*l*r+n*r*r+y_amount*(n*r+p*p*p)))/1000.0/1000.0;
out.gflops = gemm_flops(l, n, l, 0) + gemm_flops(l, n, y_amount, 0) +
a_amount * (gemm_flops(l, n, r, 0) +
gemm_flops(r, n, r, 0) +
@@ -774,10 +773,10 @@
cputime_type start_tick, start_tick2, end_tick;
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.m;
- int y_block_size = params.mb;//kk
+ int y_block_size = params.mb;// kk
int a_amount = params.t;
int a_block_size = 1;
@@ -793,33 +792,33 @@
AIOfile.load_AL(&backupAL);
- type_precision* S = new type_precision[p*p];
- type_precision Stl[l*l];
- type_precision Str[l*r];
- type_precision Sbr[r*r];
- type_precision* A = new type_precision[n*p];
+ type_precision* S = new type_precision[p * p];
+ type_precision Stl[l * l];
+ type_precision Str[l * r];
+ type_precision Sbr[r * r];
+ type_precision* A = new type_precision[n * p];
type_precision* AL = A;
- type_precision* AR = &A[l*n];
+ type_precision* AR = &A[l * n];
copy_vec(backupAL, AL, n*l);
- int y_iters = y_amount/y_block_size;
+ int y_iters = y_amount / y_block_size;
type_precision* Y;
type_precision* backupAR;
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
get_ticks(start_tick);
- //!Generate S
+ //! Generate S
cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
l, n, 1.0, backupAL, lda, 0.0, Stl, l);
for (i = 0; i < a_iters; i++)
{
- if (a_iters < 10 || (i%(a_iters/10))==0)
+ if (a_iters < 10 || (i%(a_iters/10)) == 0)
{
cout << "%" << flush;
}
@@ -828,42 +827,42 @@
AIOfile.reset_Y();
copy_vec(backupAR, AR, n*r);
- //matlab_print_matrix("A", n, p, A);
- //!Generate Str
+ // matlab_print_matrix("A", n, p, A);
+ //! Generate Str
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, r, n, 1.0, AL, n, AR, n, 0.0, Str, l);
- //!Generate Sbr
+ //! Generate Sbr
cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
r, n, 1.0, backupAR, lda, 0.0, Sbr, r);
- //matlab_print_matrix("S", p, p, S);
+ // matlab_print_matrix("S", p, p, S);
for (j = 0; j < y_iters; j++)
{
build_S(S, Stl, Str, Sbr, l, r);
AIOfile.load_Yblock(&Y, y_block_size);
- type_precision Ay[y_block_size*p];
+ type_precision Ay[y_block_size * p];
- //matlab_print_matrix("Y", n, y_block_size, Y);
+ // matlab_print_matrix("Y", n, y_block_size, Y);
- //!Ay=A'*Y
+ //! Ay = A'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
p, y_block_size, n, 1.0, A, n, Y, n, 0.0, Ay, p);
type_precision* B = Ay;
- //matlab_print_matrix("Ay", p, y_block_size, Ay);
+ // matlab_print_matrix("Ay", p, y_block_size, Ay);
- //!b=S\qy
+ //! b = S\qy
info = LAPACKE_sposv(STORAGE_TYPE, 'U', p, y_block_size,
S, p, Ay, p);
- assert(info == 0,"POSV");
+ assert(info == 0, "POSV");
- //matlab_print_matrix("B", p, y_block_size, B);
- //exit(0);
+ // matlab_print_matrix("B", p, y_block_size, B);
+ // exit(0);
if ( ForceCheck)
{
check_result(backupAL, backupAR, n, p, y_block_size, r, Y, B);
@@ -907,10 +906,10 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.m;
- int y_block_size = params.mb;//kk
+ int y_block_size = params.mb;// kk
int a_amount = params.t;
int a_block_size = 1;
@@ -926,24 +925,24 @@
AIOfile.load_AL(&backupAL);
- type_precision* S = new type_precision[p*p];
- type_precision* Stemp = new type_precision[p*p];
- type_precision* A = new type_precision[n*p];
+ type_precision* S = new type_precision[p * p];
+ type_precision* Stemp = new type_precision[p * p];
+ type_precision* A = new type_precision[n * p];
type_precision* AL = A;
- type_precision* AR = &A[l*n];
+ type_precision* AR = &A[l * n];
- int y_iters = y_amount/y_block_size;
+ int y_iters = y_amount / y_block_size;
type_precision* Y;
type_precision* backupAR;
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
get_ticks(start_tick);
for (i = 0; i < a_iters; i++)
{
- if (a_iters < 10 || (i%(a_iters/10))==0)
+ if (a_iters < 10 || (i%(a_iters/10)) == 0)
{
cout << "%" << flush;
}
@@ -954,34 +953,34 @@
AIOfile.reset_Y();
copy_vec(backupAR, AR, n*r);
- //matlab_print_matrix("A", n, p, A);
- //!Generate S
+ // matlab_print_matrix("A", n, p, A);
+ //! Generate S
cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans,
p, n, 1.0, A, lda, 0.0, S, p);
- //matlab_print_matrix("S", p, p, S);
+ // matlab_print_matrix("S", p, p, S);
for (j = 0; j < y_iters; j++)
{
AIOfile.load_Yblock(&Y, y_block_size);
- type_precision Ay[y_block_size*p];
+ type_precision Ay[y_block_size * p];
copy_vec(S, Stemp, p*p);
- //matlab_print_matrix("Y", n, y_block_size, Y);
+ // matlab_print_matrix("Y", n, y_block_size, Y);
- //!Ay=A'*Y
+ //! Ay = A'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
p, y_block_size, n, 1.0, A, n, Y, n, 0.0, Ay, p);
type_precision* B = Ay;
- //matlab_print_matrix("Ay", p, y_block_size, Ay);
+ // matlab_print_matrix("Ay", p, y_block_size, Ay);
- //!b=S\qy
+ //! b = S\qy
info = LAPACKE_sposv(STORAGE_TYPE, 'U', p, y_block_size,
Stemp, p, Ay, p);
assert(info == 0, "POSV");
- //matlab_print_matrix("B", p, y_block_size, B);
- //exit(0);
+ // matlab_print_matrix("B", p, y_block_size, B);
+ // exit(0);
if ( ForceCheck)
{
check_result(backupAL, backupAR, n, p, y_block_size, r, Y, B);
@@ -1018,12 +1017,12 @@
cputime_type start_tick, start_tick2, end_tick;
- double acc_gemm=0;
- double acc_trsm=0;
+ double acc_gemm = 0;
+ double acc_trsm = 0;
double acc_other = 0;
- double acc_atemp=0;
- double acc_rtr=0;
- double acc_rqr=0;
+ double acc_atemp = 0;
+ double acc_rtr = 0;
+ double acc_rqr = 0;
type_precision *Ytemp;
@@ -1034,10 +1033,10 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.m;
- int y_block_size = 1;//kk
+ int y_block_size = 1;// kk
int a_amount = params.t;
int a_block_size = 1;
@@ -1069,7 +1068,7 @@
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
get_ticks(start_tick);
for (i = 0; i < a_iters; i++)
@@ -1085,12 +1084,12 @@
AIOfile.reset_Y();
copy_vec(backupAR, AR, n*r);
- //!Generate R
+ //! Generate R
info = LAPACKE_sgeqrf(STORAGE_TYPE, n, p, A, lda, tau);
assert(info == 0,"QR decomp");
type_precision* R = extract_R(A, n, p);
- //!generate Q
+ //! generate Q
info = LAPACKE_sorgqr(STORAGE_TYPE, n, p, k, Q, lda, tau);
assert(info == 0,"Q form");
@@ -1102,19 +1101,19 @@
for (j = 0; j < y_iters; j++)
{
- //matlab_print_matrix("RR", p, p, R);
- //matlab_print_matrix("QQ", n, p, Q);
+ // matlab_print_matrix("RR", p, p, R);
+ // matlab_print_matrix("QQ", n, p, Q);
AIOfile.load_Yblock(&Y, y_block_size);
type_precision Qy[y_block_size*p];
- //!qy=Q'*Y
+ //! qy = Q'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
p, y_block_size, n, 1.0, Q, n, Y, n, 0.0, Qy, p);
type_precision* B = Qy;
- //!b=R\qy
+ //! b = R\qy
cblas_strsm(CblasColMajor, CblasLeft, CblasUpper,
CblasNoTrans, CblasNonUnit,
p, y_block_size, 1.0, R, p, Qy, p);
@@ -1155,12 +1154,12 @@
cputime_type start_tick, start_tick2, end_tick;
- double acc_gemm=0;
- double acc_trsm=0;
+ double acc_gemm = 0;
+ double acc_trsm = 0;
double acc_other = 0;
- double acc_atemp=0;
- double acc_rtr=0;
- double acc_rqr=0;
+ double acc_atemp = 0;
+ double acc_rtr = 0;
+ double acc_rqr = 0;
type_precision *RL, *Ytemp, *RtlRtr;
@@ -1171,10 +1170,10 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
int y_amount = params.m;
- int y_block_size = params.mb;//kk
+ int y_block_size = params.mb;// kk
int a_amount = params.t;
int a_block_size = 1;
@@ -1210,21 +1209,21 @@
type_precision Rbr[r*r];
get_ticks(start_tick);
- //!Generate RTL
+ //! Generate RTL
info = LAPACKE_sgeqrf(STORAGE_TYPE, n, l, AL, lda, tau);
assert(info == 0,"QR decomp");
type_precision* R =prepare_R(AL, n, l, r);
type_precision* Rtl = R;
- QL=AL;//same as Q
- AL=backupAL;
- //!generate QL
+ QL = AL;// same as Q
+ AL = backupAL;
+ //! generate QL
info = LAPACKE_sorgqr(STORAGE_TYPE, n, l, k, QL, lda, tau);
assert(info == 0,"Q form");
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
for (i = 0; i < a_iters; i++)
@@ -1237,13 +1236,13 @@
AIOfile.load_ARblock(&Ar, a_block_size);
AIOfile.reset_Y();
- //!Rtr=Q1'*AR
+ //! Rtr = Q1'*AR
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, r*a_block_size, n, 1.0, QL, n, Ar, n, 0.0, Rtr, l);
type_precision* Atemp = replicate_vec(Ar, n*r*a_block_size);
- //! Atemp=AR-Q1*Rtr
+ //! Atemp = AR-Q1*Rtr
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
n, r*a_block_size, l, -1.0, QL, n, Rtr, l, 1.0, Atemp, n);
@@ -1252,7 +1251,7 @@
info = LAPACKE_sgeqrf(STORAGE_TYPE, n, r, QrRbr, lda, tau);
assert(info == 0,"QR decomp of QrRbr");
- //reuse of old function
+ // reuse of old function
type_precision* Rbrtemp = prepare_R(QrRbr, n, r, 0);
copy_vec(Rbrtemp, Rbr, r*r);
@@ -1267,10 +1266,10 @@
// matlab_print_matrix("Rbr", r, r, Rbr);
// matlab_print_matrix("RR", p, p, R);
// matlab_print_matrix("QQ", n, p, Q);
- //matlab_print_matrix("ALL", n, l, AL);
- //matlab_print_matrix("ARR", n, r, Ar);
- //matlab_print_matrix("R", p, p, R);
- int top_idx=0;
+ // matlab_print_matrix("ALL", n, l, AL);
+ // matlab_print_matrix("ARR", n, r, Ar);
+ // matlab_print_matrix("R", p, p, R);
+ int top_idx = 0;
for (j = 0; j < y_iters; j++)
{
@@ -1278,18 +1277,18 @@
type_precision Qy[y_block_size*p];
- //!qy=Q'*Y
+ //! qy = Q'*Y
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
p, y_block_size, n, 1.0, Q, n, Y, n, 0.0, Qy, p);
type_precision* B = Qy;
- //!b=R\qy
+ //! b = R\qy
cblas_strsm(CblasColMajor, CblasLeft, CblasUpper,
CblasNoTrans, CblasNonUnit,
p, y_block_size, 1.0, R, p, Qy, p);
- //matlab_print_matrix("bcomp", p, y_block_size, B);
+ // matlab_print_matrix("bcomp", p, y_block_size, B);
if ( ForceCheck)
@@ -1325,7 +1324,7 @@
{
int max_threads = params.threads;
- //srand (time(NULL));
+ // srand (time(NULL));
int cpu_cores = max_threads;
@@ -1337,16 +1336,16 @@
cputime_type start_tick, start_tick2, end_tick;
- double acc_pre=0;
- double acc_trsm=0;
- double acc_other = 0;
- double acc_atemp=0;
- double acc_rtr=0;
- double acc_rqr=0;
- double acc_RTL_QLY=0;
+ double acc_pre = 0;
+ double acc_trsm = 0;
+ double acc_other = 0;
+ double acc_atemp = 0;
+ double acc_rtr = 0;
+ double acc_rqr = 0;
+ double acc_RTL_QLY = 0;
- type_precision *AL,*RL, *RQy_top, *Ytemp, *RtlRtr;
+ type_precision *AL, *RL, *RQy_top, *Ytemp, *RtlRtr;
lapack_int info, n, lda, ldy, l, r, k, p;
int i, j, w;
@@ -1355,7 +1354,7 @@
AIOfile.initialize(params);
- n = params.n; l=params.l; r=params.r; p = l+r;
+ n = params.n; l = params.l; r = params.r; p = l+r;
@@ -1363,7 +1362,7 @@
int y_amount = params.t;
- int y_block_size = params.tb;//kk
+ int y_block_size = params.tb;// kk
int a_amount = params.m;
int a_block_size = params.mb;
@@ -1372,7 +1371,7 @@
int y_iters = (y_amount+ y_block_size - 1)/y_block_size;
- int qy_idx=0;
+ int qy_idx = 0;
out.gflops = n/1000.0*l/1000.0*l/1000.0 +
gemm_flops(l, n, y_amount, 0) +
@@ -1390,7 +1389,7 @@
(1.0*1000*1000) /
(double)((sizeof(type_precision) * n * r))));
- //cout << endl<< "taskChunk "<< sch_block_size <<endl;
+ // cout << endl<< "taskChunk "<< sch_block_size <<endl;
lda = n; ldy = n;
k = l;
@@ -1425,26 +1424,26 @@
- //printf("\n\n%%Computations\n%%");
+ // printf("\n\n%%Computations\n%%");
for (i = 0; i < a_iters; i++)
{
if (a_iters >= 10 && (i%(a_iters/10))==0)
{
- //cout << "*" << flush;
+ // cout << "*" << flush;
}
for (j = 0; j < y_iters; j++)
{
if (a_iters < 10 && (y_iters < 10 || (j%(y_iters/10))==0))
{
- //cout << "*" << flush;
+ // cout << "*" << flush;
}
}
}
cout << endl;
- //!Start of Computations
+ //! Start of Computations
get_ticks(start_tick);
@@ -1452,9 +1451,9 @@
info = LAPACKE_sgeqrf(STORAGE_TYPE, n, l, AL, lda, tau);
assert(info == 0,"QR decomp");
- type_precision* Rtl=prepare_R(AL, n, l, 0);
+ type_precision* Rtl = prepare_R(AL, n, l, 0);
- QL=AL;
+ QL = AL;
AL = backupAL;
info = LAPACKE_sorgqr(STORAGE_TYPE, n, l, k, QL, lda, tau);
@@ -1463,28 +1462,28 @@
matlab_print_matrix("QL", n, l, QL);
- qy_idx=0;
+ qy_idx = 0;
- //printf("\n%%Preparing IO\n");
- //!Prepare File for the Y
+ // printf("\n%%Preparing IO\n");
+ //! Prepare File for the Y
type_precision* Y;
- //#pragma omp parallel default(shared)
+ // #pragma omp parallel default(shared)
// {
-// //#pragma omp for private(Y, y_block_size, qy_idx) nowait schedule(static, 1)
+// // #pragma omp for private(Y, y_block_size, qy_idx) nowait schedule(static, 1)
// for (int j = 0; j < y_iters; j++)
// {
-// //qy_idx = j*y_block_size*l;
-// //#pragma omp critical
+// // qy_idx = j*y_block_size*l;
+// // #pragma omp critical
// {AIOfile.load_Yblock(&Y, y_block_size);}
//
-// //!qy_top=QL'*Y
+// //! qy_top = QL'*Y
// cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
// l, y_block_size, n, 1.0, QL, n, Y, n, 0.0,&RQy_top[qy_idx], l);
//
-// //!K | RtlQlY =RTL\qy_top
+// //! K | RtlQlY =RTL\qy_top
// cblas_strsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit,
// l, y_block_size, 1.0, Rtl, l,&RQy_top[qy_idx], l);
// qy_idx += y_block_size*l;
@@ -1495,7 +1494,7 @@
out.acc_pre = ticks2sec(end_tick-start_tick, cpu_freq);
- //cout << "\npre " << out.acc_pre << endl;
+ // cout << "\npre " << out.acc_pre << endl;
y_block_size = params.tb;
@@ -1532,11 +1531,11 @@
#pragma omp sections
{
{
- //!Rtr=Q1'*AR
+ //! Rtr = Q1'*AR
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
l, r * a_block_size, n, 1.0, QL, n,
Ar, n, 0.0, Rtr, l);
- //matlab_print_matrix("QL'*Ar", l, r*a_block_size, Rtr);
+ // matlab_print_matrix("QL'*Ar", l, r*a_block_size, Rtr);
}
#pragma omp section
@@ -1554,11 +1553,11 @@
- //!Atemp=AR-Q1*Rtr
+ //! Atemp = AR-Q1*Rtr
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
n, r*a_block_size, l,-1.0, QL, n, Rtr, l, 1.0, Atemp, n);
- //!H=RTL\Rtr
+ //! H = RTL\Rtr
cblas_strsm(CblasColMajor, CblasLeft, CblasUpper,
CblasNoTrans, CblasNonUnit,
l, r*a_block_size, 1.0, Rtl, l, Rtr, l);
@@ -1573,20 +1572,22 @@
info = LAPACKE_sgeqrf(STORAGE_TYPE, n, r, QrRbr, lda, tau);
- //assert(info == 0,"QR decomp of QrRbr");
- type_precision* Rbrtemp = prepare_R(QrRbr, n, r, 0);//reuse of old function
+ // assert(info == 0,"QR decomp of QrRbr");
+
+ // reuse of old function
+ type_precision* Rbrtemp = prepare_R(QrRbr, n, r, 0);
copy_vec(Rbrtemp,&Rbr[r*r*ii], r*r);
info = LAPACKE_sorgqr(STORAGE_TYPE, n, r, r, QrRbr, lda, tau);
- //assert(info == 0,"QR form");
+ // assert(info == 0,"QR form");
free(Rbrtemp);
}
}
- //matlab_print_matrix("RTR", l, r, Rtr);
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1479
More information about the Genabel-commits
mailing list