[GenABEL-dev] [Genabel-commits] r1768 - in pkg/OmicABELnoMM: . doc src tests
L.C. Karssen
lennart at karssen.org
Fri Jul 25 15:47:12 CEST 2014
Hi Alvaro,
Please find a few other comments below.
On 25-07-14 12:48, noreply at r-forge.r-project.org wrote:
> Author: afrank
> Date: 2014-07-25 12:48:03 +0200 (Fri, 25 Jul 2014)
> New Revision: 1768
>
> Modified:
> pkg/OmicABELnoMM/configure.ac
> pkg/OmicABELnoMM/doc/howtocompile.txt
> pkg/OmicABELnoMM/src/AIOwrapper.cpp
> pkg/OmicABELnoMM/src/AIOwrapper.h
> pkg/OmicABELnoMM/src/Algorithm.cpp
> pkg/OmicABELnoMM/src/Definitions.h
> pkg/OmicABELnoMM/src/Utility.cpp
> pkg/OmicABELnoMM/src/main.cpp
> pkg/OmicABELnoMM/test-driver
> pkg/OmicABELnoMM/tests/test.cpp
> Log:
> Added AMD ACML 6 support for heteregenous systems (CPU + GPU). ACML fixes unusable performance issues under older Opteron (non AVX) systems. Several Bug fixes of IO. Added incomplete functionalities.
>
> Modified: pkg/OmicABELnoMM/configure.ac
> ===================================================================
> --- pkg/OmicABELnoMM/configure.ac 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/configure.ac 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -3,8 +3,9 @@
>
> AC_PREREQ([2.67])
> AC_INIT([OmicABELnoMM], [0.1.0], [genabel-devel at r-forge.wu-wien.ac.at])
> -AM_INIT_AUTOMAKE([silent-rules subdir-objects])
> -AM_SILENT_RULES([yes])
> +AM_INIT_AUTOMAKE([])
> +#AM_INIT_AUTOMAKE([silent-rules subdir-objects])
> +#AM_SILENT_RULES([yes])
> AC_CONFIG_SRCDIR([src/Utility.h])
> AC_CONFIG_HEADERS([src/config.h])
>
> @@ -17,12 +18,12 @@
> # Set some default compile flags
> if test -z "$CXXFLAGS"; then
> # User did not set CXXFLAGS, so we can put in our own defaults
> - CXXFLAGS="-O3"
> + CXXFLAGS=""
Is there a special reason why you remove optimisations here and set -O0
below? You seem to later enable O3 with AM_CXXFLAGS.
> fi
> if test -z "$CPPFLAGS"; then
> # User did not set CPPFLAGS, so we can put in our own defaults
> - CPPFLAGS="-Wall -g -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
> - #CPPFLAGS="-Wall"
> + #CPPFLAGS="-Wall -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
> + CPPFLAGS="-O0"
> fi
> # If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
> # with its own defaults
> @@ -31,33 +32,49 @@
> AC_PROG_CC
> AC_PROG_CXX
>
> +# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
> +AC_OPENMP
> +AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
>
> +AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
> +
> # Checks for libraries.
> # pthread library
> AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
> - AC_MSG_ERROR([Unable to find the pthread_mutex_init() function])
> + AC_MSG_ERROR([Make sure pthread is available on the system])
> ])
> -# Openblas
> -AC_SEARCH_LIBS([cblas_sgemm], [openblas], [], [
> - AC_MSG_ERROR([Unable to find the openblas library])
> -])
> -# Lapack
> -AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
> - AC_MSG_ERROR([Unable to find a Lapack library])
> -])
>
> -#Boost
> +if test -z "$LDFLAGS"; then
> + LDFLAGS="-L./libs/lib/ -L../libs/lib/"
> +fi
>
> +found_blas=0
>
> +AC_SEARCH_LIBS([__iso_c_binding_c_f_pointer_l4],[gfortran])
>
> -# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
> -AC_OPENMP
> -AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
> +# ACML
> +AC_SEARCH_LIBS(dgemm, acml_mp,[found_blas=1 AM_CXXFLAGS="-D_acml_ $AM_CXXFLAGS"], [
> + AC_MSG_NOTICE([NOT using AMD the ACML library],[-lgfortran])
> +])
>
>
>
> +if test "$found_blas" -eq 0
> + then
>
> +# Openblas
> +AC_SEARCH_LIBS([cblas_sgemm], [openblas], [AM_CXXFLAGS="$AM_CXXFLAGS -D_openblas_"], [
> + AC_MSG_ERROR([OpenBLAS library NOT found])
> +])
> + #Lapack
> +AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
> + AC_MSG_ERROR([Unable to find a Lapack library])
> +])
> +AC_MSG_NOTICE([Using OpenBLAS library])
> +fi
>
Could you add a bit of indentation in the lines above. It took me a
while to see that the if was closed by the fi a few lines below it.
Thanks,
Lennart.
> +
> +
> # Checks for header files.
> AC_CHECK_HEADERS([limits.h stdlib.h string.h sys/time.h unistd.h])
>
> @@ -73,6 +90,7 @@
> # Files to be generated by autotools
> AC_CONFIG_FILES([
> Makefile
> +
> ])
>
> AC_OUTPUT
>
> Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
> ===================================================================
> --- pkg/OmicABELnoMM/doc/howtocompile.txt 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/doc/howtocompile.txt 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -1,4 +1,7 @@
> -
> +
> +-------------------------Install BLAS----------------------------
> +#STEP 1:
> +#a) Install OPENBLAS:
> mkdir GWAS_PROJECT
> cd GWAS_PROJECT
>
> @@ -10,21 +13,52 @@
> #make sure g++ its 4.8 or above!on rwth cluster module load gcc/4.8, choose 32 or 64 also
> make all HOSTCC=g++ FC=gfortran USE_OPENMP=1
>
> -sudo make install PREFIX="/usr"
> +make install PREFIX="path_to_/OmicABELnoMM/libs/"
>
> -sudo ldconfig
> +ldconfig
>
> -cd ..
> -#BLAS END
>
> +cd ..
> +
> +-------------------------AMD ACML Alternative Version-------------------------
> +#STEP 1:
> +# b)
> +mkdir OmicABELnoMM/libs/
> +
> +cd OmicABELnoMM/libs/
> +
> +You can use AMD ACML. Download from:
> +http://developer.amd.com/tools-and-sdks/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
> +
> + and copy the supplied binary
> + libraries to "OmicABELnoMM/libs/"
> +
> +IF both libraries are present (Openblas + ACML), the system will use ACML.
> +
> +cd ../../
> +
> +---------------------------------#BLAS END----------------------------------------------------
> +
> +#STEP 2:
> +
> +
> +ON the folder GWAS_PROJECT
> +
> svn checkout svn+ssh://developername@svn.r-forge.r-project.org/svnroot/genabel/OmicABELnoMM
>
> cd OmicABELnoMM
> +
> +autoreconf -fi
>
> -./configure LDFLAGS="-L/usr/lib/"
> +./configure
>
> -make
> +make
>
> +make check
> +
> +#DONE
> +
> +------------------------------Example--------------------------------
> #test it
> ./omicabelnomm -c examples/XL --geno examples/XR -p examples/Y -o examples/B -n 2 -t 2
>
> @@ -42,12 +76,19 @@
>
>
>
> --------------------------Alternative Version-------------------------
> +-------------------------Alternative Version of BLAS-------------------------
>
> -Install all required libraries for your system:
> +#Make sure autoconf is installed:
>
> --Iinux ubuntu:
> +sudo apt-get install autoconf
> +autoreconf -fi
> +autoconf
>
> +#Install all required libraries for your system,
> +#but they will not work, 99% of the time, due to not having openmp support:
> +
> +#Iinux ubuntu:
> +
> sudo apt-get install libopenblas-dev
> sudo apt-get install libopenblas-base
> sudo apt-get install liblapack3gf
> @@ -56,11 +97,7 @@
> sudo apt-get install liblapacke
> sudo apt-get install liblapacke-dev
>
> --Make sure autoconf is installed:
>
> -sudo apt-get install autoconf
> -autoreconf -fi
> -autoconf
>
>
>
>
> Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
> ===================================================================
> --- pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -40,7 +40,7 @@
> Fhandler->fnameAL = params.fnameAL;
> Fhandler->fnameAR = params.fnameAR;
> Fhandler->fnameY = params.fnameY;
> - Fhandler->fnameOutB = params.fnameOutB;
> + Fhandler->fnameOutFiles = params.fnameOutFiles;
>
>
> Yfvi = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
> @@ -91,7 +91,7 @@
>
> prepare_AL(params.l,params.n);
> prepare_AR( params.mb, params.n, params.m, params.r);
> - prepare_B(params.tb, params.l+params.r);
> + prepare_OutFiles(params.mb, params.l+params.r);
> prepare_Y(params.tb, params.n, params.t);
>
>
> @@ -118,7 +118,7 @@
> finalize_Y();
> finalize_AR();
> finalize_AL();
> - finalize_B();
> + finalize_OutFiles();
>
> pthread_attr_destroy(&(Fhandler->attr));
>
> @@ -135,7 +135,7 @@
>
>
>
> -void AIOwrapper::finalize_B()
> +void AIOwrapper::finalize_OutFiles()
> {
>
> }
> @@ -149,7 +149,10 @@
>
> struct timespec timeToWait;
> FILE* fp_Y;
> - FILE* fp_B;
> + FILE* fp_B;
> + FILE* fp_R;
> + FILE* fp_SD2;
> + FILE* fp_P;
> FILE* fp_Ar;
> if(!Fhandler->fakefiles)
> {
> @@ -167,11 +170,29 @@
> exit(1);
> }
>
> - fp_B = fopen((Fhandler->fnameOutB+".fvd").c_str(), "w+b");
> + fp_B = fopen((Fhandler->fnameOutFiles+"_B.fvd").c_str(), "w+b");
> if(fp_B == 0)
> {
> - cout << "Error Opening File B " << Fhandler->fnameOutB << endl;
> + cout << "Error Opening File B " << Fhandler->fnameOutFiles << "_B" << endl;
> exit(1);
> + }
> + fp_R = fopen((Fhandler->fnameOutFiles+"_R.fvd").c_str(), "w+b");
> + if(fp_R == 0)
> + {
> + cout << "Error Opening File R " << Fhandler->fnameOutFiles << "_R" << endl;
> + exit(1);
> + }
> + fp_SD2 = fopen((Fhandler->fnameOutFiles+"_SD2.fvd").c_str(), "w+b");
> + if(fp_SD2 == 0)
> + {
> + cout << "Error Opening File SD2 " << Fhandler->fnameOutFiles << "_SD2" << endl;
> + exit(1);
> + }
> + fp_P = fopen((Fhandler->fnameOutFiles+"_P.fvd").c_str(), "w+b");
> + if(fp_P == 0)
> + {
> + cout << "Error Opening File P " << Fhandler->fnameOutFiles << "_P" << endl;
> + exit(1);
> }
> }
> else
> @@ -205,6 +226,24 @@
> {
> cout << "Error setting up temp File B " << endl;
> exit(1);
> + }
> + fp_R = fopen("tempR.bin", "w+b");
> + if(fp_R == 0)
> + {
> + cout << "Error setting up temp File R " << endl;
> + exit(1);
> + }
> + fp_SD2 = fopen("tempSD2.bin", "w+b");
> + if(fp_SD2 == 0)
> + {
> + cout << "Error setting up temp File SD2 " << endl;
> + exit(1);
> + }
> + fp_P = fopen("tempP.bin", "w+b");
> + if(fp_P == 0)
> + {
> + cout << "Error setting up temp File P " << endl;
> + exit(1);
> }
> //cout << "\nEnd preping files\n" << flush;
>
> @@ -229,14 +268,16 @@
>
> Fhandler->y_to_readSize -= tmp_y_blockSize;
> size_buff = Fhandler->n * tmp_y_blockSize;
> - //cout << Fhandler->y_to_readSize << endl;
> +
>
> +
> pthread_mutex_lock(&(Fhandler->m_buff_upd));
> - //cout << " pre;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
> +
> +
> type_buffElement* tobeFilled = Fhandler->empty_buffers.front();
> Fhandler->empty_buffers.pop();
> - //pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>
> +
> tobeFilled->size = tmp_y_blockSize;
>
> if(Fhandler->fakefiles)
> @@ -273,8 +314,8 @@
>
> }
> }
> -// size_t result = fread (tobeFilled->buff,sizeof(type_precision),size_buff,fp_Y);
> -// result++;
> +
> +
> if(Fhandler->y_to_readSize <= 0)
> {
> fseek ( fp_Y , 0 , SEEK_SET );
> @@ -282,10 +323,9 @@
> }
>
>
> - //pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +
> Fhandler->full_buffers.push(tobeFilled);
> - // cout << "\nStoring " << tobeFilled << endl;
> - //cout << " post;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
> +
> pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>
> pthread_mutex_lock(&(Fhandler->m_read));
> @@ -294,8 +334,12 @@
>
> }
>
> - while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize)
> - {
> + while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize )
> + {
> +
> +
> +
> +
> tmp_ar_blockSize = Fhandler->Ar_blockSize;
> if(Fhandler->Ar_to_readSize < Fhandler->Ar_blockSize)
> tmp_ar_blockSize = Fhandler->Ar_to_readSize;
> @@ -343,12 +387,13 @@
> }
> }
>
> -// size_t result = fread(tobeFilled->buff,sizeof(type_precision),size_buff,fp_Ar);
> -// result++;
> - if (Fhandler->Ar_to_readSize <= 0)
> - {
> - fseek ( fp_Ar , 0 , SEEK_SET );
> - }
> +
> + }
> +
> + if(Fhandler->Ar_to_readSize <= 0)
> + {
> + Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> + fseek ( fp_Ar , 0 , SEEK_SET );
> }
>
> Fhandler->ar_full_buffers.push(tobeFilled);
> @@ -362,23 +407,29 @@
> }
> //B write
>
> - while(!Fhandler->b_full_buffers.empty())
> + while(!Fhandler->write_full_buffers.empty())
> {
>
>
> pthread_mutex_lock(&(Fhandler->m_buff_upd));
> - type_buffElement* tobeWritten = Fhandler->b_full_buffers.front();
> - Fhandler->b_full_buffers.pop();
> + type_buffElement* tobeWritten = Fhandler->write_full_buffers.front();
> + Fhandler->write_full_buffers.pop();
> int size = Fhandler->p*Fhandler->b_blockSize;
>
> if(Fhandler->fakefiles)
> {
> - fseek ( fp_B , 0 , SEEK_SET );
> + fseek ( fp_B , 0 , SEEK_SET );
> + fseek ( fp_R , 0 , SEEK_SET );
> + fseek ( fp_SD2 , 0 , SEEK_SET );
> + fseek ( fp_P , 0 , SEEK_SET );
> }
> - fwrite (tobeWritten->buff,sizeof(type_precision),size,fp_B);
> + fwrite (&(tobeWritten->buff[0]),sizeof(type_precision),size,fp_B);
> + fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*Fhandler->p]),sizeof(type_precision),Fhandler->b_blockSize,fp_R);
> + fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]),sizeof(type_precision),Fhandler->b_blockSize,fp_SD2);
> + fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]),sizeof(type_precision),size,fp_P);
>
>
> - Fhandler->b_empty_buffers.push(tobeWritten);
> + Fhandler->write_empty_buffers.push(tobeWritten);
> // cout << "\nStoring " << tobeWritten << endl;
> pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>
> @@ -411,12 +462,31 @@
> pthread_cond_signal( &(Fhandler->condition_read ));
> pthread_mutex_unlock(&(Fhandler->m_read));
>
> - if(Fhandler->reset_wait)
> - {
> - pthread_barrier_wait(&(Fhandler->finalize_barrier));
> - //wait for main thread to reset everything
> - pthread_barrier_wait(&(Fhandler->finalize_barrier));
> - }
> +// if(Fhandler->reset_wait)
> +// {
> +// pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +// //wait for main thread to reset everything
> +//
> +// pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +// Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> +//
> +// if(Fhandler->Ar_currentReadBuff)
> +// {
> +// Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> +// Fhandler->Ar_currentReadBuff=0;
> +// }
> +// while(!Fhandler->ar_full_buffers.empty())
> +// {
> +// Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> +// Fhandler->ar_full_buffers.pop();
> +// }
> +// pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +//
> +// Fhandler->reset_wait = false;
> +//
> +//
> +// pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +// }
>
>
> }
> @@ -425,7 +495,14 @@
> pthread_barrier_wait(&(Fhandler->finalize_barrier));
>
> {
> - type_buffElement* tmp;
> + type_buffElement* tmp;
> +
> + if(Fhandler->currentReadBuff)
> + {
> + Fhandler->full_buffers.push(Fhandler->currentReadBuff);
> + Fhandler->currentReadBuff=0;
> + }
> +
> while(!Fhandler->full_buffers.empty())
> {
> tmp= Fhandler->full_buffers.front();
> @@ -438,8 +515,15 @@
> {
> tmp= Fhandler->empty_buffers.front();
> Fhandler->empty_buffers.pop();
> - delete []tmp->buff;
> - delete tmp;
> + delete []tmp->buff;
> + delete tmp;
> +
> + }
> +
> + if(Fhandler->Ar_currentReadBuff)
> + {
> + Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> + Fhandler->Ar_currentReadBuff=0;
> }
>
> while(!Fhandler->ar_full_buffers.empty())
> @@ -458,18 +542,18 @@
> delete tmp;
> }
>
> - while(!Fhandler->b_full_buffers.empty())
> + while(!Fhandler->write_full_buffers.empty())
> {
> - tmp= Fhandler->b_full_buffers.front();
> - Fhandler->b_full_buffers.pop();
> + tmp= Fhandler->write_full_buffers.front();
> + Fhandler->write_full_buffers.pop();
> delete []tmp->buff;
> delete tmp;
> }
>
> - while(!Fhandler->b_empty_buffers.empty())
> + while(!Fhandler->write_empty_buffers.empty())
> {
> - tmp= Fhandler->b_empty_buffers.front();
> - Fhandler->b_empty_buffers.pop();
> + tmp= Fhandler->write_empty_buffers.front();
> + Fhandler->write_empty_buffers.pop();
> delete []tmp->buff;
> delete tmp;
> }
> @@ -482,6 +566,9 @@
> fclose(fp_Y);
> fclose(fp_Ar);
> fclose(fp_B);
> + fclose(fp_R);
> + fclose(fp_SD2);
> + fclose(fp_P);
>
> //cout << "\nexited io\n";
>
> @@ -514,15 +601,16 @@
>
>
> //!read new rdy buffer
> - pthread_mutex_lock(&(Fhandler->m_buff_upd));
> - if(Fhandler->Ar_currentReadBuff)
> - {
> - Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
> - }
> + pthread_mutex_lock(&(Fhandler->m_buff_upd));
>
> - Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
> - Fhandler->ar_full_buffers.pop();
> + if(Fhandler->Ar_currentReadBuff)
> + {
> + Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
> + }
>
> + Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
> + Fhandler->ar_full_buffers.pop();
> +
> //cout << "\nReading " << Fhandler->Ar_currentReadBuff << endl;
> Fhandler->Ar = Fhandler->Ar_currentReadBuff->buff;
> Ar_blockSize = Fhandler->Ar_currentReadBuff->size;
> @@ -580,24 +668,23 @@
>
> //!read new rdy buffer
> pthread_mutex_lock(&(Fhandler->m_buff_upd));
> - //cout << " pre," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
>
> +
> if(Fhandler->currentReadBuff)
> {
> - //memset(Fhandler->currentReadBuff->buff,0,y_blockSize);
> Fhandler->empty_buffers.push(Fhandler->currentReadBuff);
> }
> Fhandler->currentReadBuff = Fhandler->full_buffers.front();
> Fhandler->full_buffers.pop();
>
> - //cout << "\nReading " << Fhandler->currentReadBuff << endl;
> +
> Fhandler->Yb = Fhandler->currentReadBuff->buff;
> y_blockSize = Fhandler->currentReadBuff->size;
>
> (*Y) = Fhandler->Yb;
>
> - //cout << " post," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
>
> +
> pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>
>
> @@ -611,50 +698,7 @@
>
> }
>
> -void AIOwrapper::write_B(type_precision* B, int p, int blockSize)
> -{
> -
> - while(Fhandler->b_empty_buffers.empty())
> - {
> - pthread_mutex_lock(&(Fhandler->m_more));
> - pthread_cond_signal( &(Fhandler->condition_more ));
> - pthread_mutex_unlock(&(Fhandler->m_more));
> -
> - io_overhead = "b";
> -
> - pthread_mutex_lock(&(Fhandler->m_read));
> - pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
> - pthread_mutex_unlock(&(Fhandler->m_read));
> -
> - }
> -
> -
> - pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -
> -
> -
> - //cout << Fhandler->b_empty_buffers.size() << flush;
> - Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
> - Fhandler->b_empty_buffers.pop();
> -
> -
> -
> - Fhandler->B = Fhandler->currentWriteBuff->buff;
> - Fhandler->b_blockSize = blockSize;
> - copy_vec(B,Fhandler->B,p*blockSize);
> -
> - Fhandler->b_full_buffers.push(Fhandler->currentWriteBuff);
> -
> -
> -
> - pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> -
> -
> - pthread_mutex_lock(&(Fhandler->m_more));
> - pthread_cond_signal( &(Fhandler->condition_more ));
> - pthread_mutex_unlock(&(Fhandler->m_more));
> -}
> -
> +
> void AIOwrapper::prepare_Y(int y_blockSize, int n, int totalY)
> {
> //for fake files
> @@ -679,10 +723,6 @@
> tmp = new type_buffElement();
> tmp->buff = new type_precision[Fhandler->n*Fhandler->y_blockSize];
> tmp->size = y_blockSize;
> -// for( int i = 0; i < Fhandler->n*Fhandler->y_blockSize; i++)
> -// {
> -// (tmp->buff)[i] = 0;
> -// }
> Fhandler->empty_buffers.push(tmp);
> Fhandler->Yb = tmp->buff;
> }
> @@ -701,52 +741,98 @@
>
>
>
> -}
> -
> -void AIOwrapper::prepare_B(int b_blockSize, int p)
> +}
> +
> +void AIOwrapper::getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P)
> +{
> + B = &(Fhandler->currentWriteBuff->buff[0]);
> + R = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*Fhandler->p]);
> + SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]);
> + P = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]);
> +}
> +
> +void AIOwrapper::write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P, int blockSize)
> {
> - //for fake files
>
> + while(Fhandler->write_empty_buffers.empty())
> + {
> + pthread_mutex_lock(&(Fhandler->m_more));
> + pthread_cond_signal( &(Fhandler->condition_more ));
> + pthread_mutex_unlock(&(Fhandler->m_more));
>
> - Fhandler->b_blockSize = b_blockSize;
> + io_overhead = "W";
>
> - Fhandler->p=p;
> + pthread_mutex_lock(&(Fhandler->m_read));
> + pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
> + pthread_mutex_unlock(&(Fhandler->m_read));
> + }
>
>
> - int buff_count = 4;
> + pthread_mutex_lock(&(Fhandler->m_buff_upd));
>
> - Fhandler->currentWriteBuff = 0;
>
> - type_buffElement* tmp;
> + Fhandler->write_full_buffers.push(Fhandler->currentWriteBuff);
> + Fhandler->b_blockSize = blockSize;
> +
> +
> + Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
> + Fhandler->write_empty_buffers.pop();
> +
> + B = &(Fhandler->currentWriteBuff->buff[0]);
> + R = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*Fhandler->p]);
> + SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+1)]);
> + P = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+2)]);
>
>
> + pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +
> +
> + pthread_mutex_lock(&(Fhandler->m_more));
> + pthread_cond_signal( &(Fhandler->condition_more ));
> + pthread_mutex_unlock(&(Fhandler->m_more));
> +}
> +
> +
> +
> +
> +
> +void AIOwrapper::prepare_OutFiles(int max_b_blockSize, int p)
> +{
> +
> + Fhandler->max_b_blockSize = max_b_blockSize;
> + Fhandler->p=p;
> + int buff_count = 4;
> +
> + type_buffElement* tmp;
> +
> +
> for(int i = 0; i< buff_count ; i++)
> {
> -
> tmp = new type_buffElement();
> - tmp->buff = new type_precision[Fhandler->p*Fhandler->b_blockSize];
> - tmp->size = b_blockSize;
> -// for( int i = 0; i < Fhandler->n*Fhandler->b_blockSize; i++)
> -// {
> -// (tmp->buff)[i] = 0;
> -// }
> - Fhandler->b_empty_buffers.push(tmp);
> -
> -// Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
> -// Fhandler->b_empty_buffers.pop();
> + tmp->buff = new type_precision[Fhandler->max_b_blockSize*(2*Fhandler->p+2)];
> + tmp->size = max_b_blockSize;
> + Fhandler->write_empty_buffers.push(tmp);
> + }
> + Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
> + Fhandler->write_empty_buffers.pop();
>
>
> - }
> -
> }
>
> +
> + void AIOwrapper::write_significantValues(int Y, int X_R, float R, float SD2, float P)
> + {
> +
> + }
> +
> +
> void AIOwrapper::reset_Y()
> {
> //void *status;
>
> Fhandler->seed = 1337;
>
> - //cout << "ry" << flush;
> + cout << "ry" << flush;
>
> Fhandler->reset_wait = true;
> pthread_barrier_wait(&(Fhandler->finalize_barrier));
> @@ -789,38 +875,38 @@
>
> //cout << "ra" << flush;
>
> - Fhandler->reset_wait = true;
> - pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +// Fhandler->reset_wait = true;
> +// pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//
> +//// pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +//// Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> +////
> +//// if(Fhandler->Ar_currentReadBuff)
> +//// {
> +//// Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> +//// Fhandler->Ar_currentReadBuff=0;
> +//// }
> +////
> +//// while(!Fhandler->ar_full_buffers.empty())
> +//// {
> +//// Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> +////// for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
> +////// {
> +////// ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
> +////// }
> +//// Fhandler->ar_full_buffers.pop();
> +//// }
> +//// pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +////
> +//// Fhandler->reset_wait = false;
> +//
> +// pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//
> +// pthread_mutex_lock(&(Fhandler->m_more));
> +// pthread_cond_signal( &(Fhandler->condition_more ));
> +// pthread_mutex_unlock(&(Fhandler->m_more));
>
> - pthread_mutex_lock(&(Fhandler->m_buff_upd));
> - Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
>
> - if(Fhandler->Ar_currentReadBuff)
> - {
> - Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> - Fhandler->Ar_currentReadBuff=0;
> - }
> -
> - while(!Fhandler->ar_full_buffers.empty())
> - {
> - Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> -// for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
> -// {
> -// ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
> -// }
> - Fhandler->ar_full_buffers.pop();
> - }
> - pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> -
> - Fhandler->reset_wait = false;
> -
> - pthread_barrier_wait(&(Fhandler->finalize_barrier));
> -
> - pthread_mutex_lock(&(Fhandler->m_more));
> - pthread_cond_signal( &(Fhandler->condition_more ));
> - pthread_mutex_unlock(&(Fhandler->m_more));
> -
> -
> }
>
> void AIOwrapper::finalize_Y()
> @@ -838,7 +924,7 @@
> Fhandler->Ar_Amount = totalR;
> Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
>
> - int buff_count = min(3,(totalR+ desired_blockSize - 1)/desired_blockSize);
> + int buff_count = 4;
>
> Fhandler->Ar_currentReadBuff = 0;
> type_buffElement* tmp;
>
> Modified: pkg/OmicABELnoMM/src/AIOwrapper.h
> ===================================================================
> --- pkg/OmicABELnoMM/src/AIOwrapper.h 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/AIOwrapper.h 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -25,7 +25,7 @@
> string fnameY;
>
>
> - string fnameOutB;
> + string fnameOutFiles;
>
>
> list< pair<int,int> >* excl_List;
> @@ -46,8 +46,8 @@
> queue<type_buffElement*> empty_buffers;
> queue<type_buffElement*> full_buffers;
>
> - queue<type_buffElement*> b_empty_buffers;
> - queue<type_buffElement*> b_full_buffers;
> + queue<type_buffElement*> write_empty_buffers;
> + queue<type_buffElement*> write_full_buffers;
>
> queue<type_buffElement*> ar_empty_buffers;
> queue<type_buffElement*> ar_full_buffers;
> @@ -67,7 +67,8 @@
> int y_blockSize;
> int y_to_readSize;
>
> - int b_blockSize;
> + int b_blockSize;
> + int max_b_blockSize;
>
> bool not_done;
> bool reset_wait;
> @@ -139,7 +140,11 @@
> void reset_Y();
> void reset_AR();
>
> - void write_B(type_precision* B, int p, int blockSize);
> + void getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P);
> +
> + void write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P, int blockSize);
> +
> + void write_significantValues(int Y, int X_R, float R, float SD2, float P);
>
> string io_overhead;
>
> @@ -161,8 +166,8 @@
> void prepare_AL( int columns, int n);
> void finalize_AL();
>
> - void prepare_B(int b_blockSize, int p);
> - void finalize_B();
> + void prepare_OutFiles(int max_b_blockSize, int p);
> + void finalize_OutFiles();
>
>
> static void* async_io(void *ptr );
>
> Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
> ===================================================================
> --- pkg/OmicABELnoMM/src/Algorithm.cpp 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/Algorithm.cpp 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -621,7 +621,7 @@
>
> get_ticks(start_tick2);
>
> - AIOfile.write_B(B, p, a_block_size);
> + //AIOfile.write_B(B, p, a_block_size);
>
> get_ticks(end_tick);
> out.acc_storeb += ticks2sec(end_tick,start_tick2);
>
> Modified: pkg/OmicABELnoMM/src/Definitions.h
> ===================================================================
> --- pkg/OmicABELnoMM/src/Definitions.h 2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/Definitions.h 2014-07-25 10:48:03 UTC (rev 1768)
> @@ -5,41 +5,110 @@
> #define LINUX
> #else
> #define WINDOWS
> -#endif
> +#endif
>
> +#include <unistd.h>
> +#include <limits.h>
> +#include <queue>
> +#include <iostream>
> #include <stdio.h>
> #include <stdlib.h>
> #include <time.h> /* time */
> #include <cstring>
> #include <math.h>
> -#include <omp.h>
> +#include <omp.h>
> +#include <pthread.h>
>
> -
> -
> +
> #ifdef WINDOWS
> #include <windows.h>
> - #include <cblas.h>
> #else
> - //#include "mpi.h"
> - //#define cpu_freq 3.0
> - #define cpu_freq 3.2
> - #include "cblas.h"
> +
> +#endif
> +
> +//!For intel use propetary MKL, it will be preferred over others
> +#ifdef __INTEL_MKL__
> + #pragma message("MKL will Probably NOT compile")
> + #include "mkl.h"
> + #include "cblas.h"
> + #include <lapacke.h>
> + #define blas_set_num_threads(n) mkl_set_num_threads(n)
> + #define STORAGE_TYPE LAPACK_COL_MAJOR
> +#else
> +
> + //!For AMD systems use the proper ACML library, preferred over openblas ON AMD
> + #ifdef _acml_
> + #pragma message("Compiled with AMD ACML")
> + #define blas_set_num_threads(n) omp_set_num_threads(n)
> +
> + #include <acml.h>
> +
> + #define lapack_int int
> +
> + #define CblasTrans 'T'
> + #define CblasNoTrans 'N'
> + #define CblasUpper 'U'
> + #define CblasColMajor 1
> +
> +
> + #define STORAGE_TYPE CblasColMajor
> +
> +
> + #define cblas_snrm2 snrm2
> + #define cblas_saxpy saxpy
> +
> + #ifndef BLASdefs_H_INCLUDED
> + #define BLASdefs_H_INCLUDED
> +
> + inline void cblas_sgemm(int storage, char transa, char transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
> + {
> + sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
> + }
> +
> + inline lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
> + {
> + int info;
> + sposv( uplo, n, nrhs, a, lda, b, ldb, &info);
> + return info;
> + }
> +
> + inline void cblas_ssyrk(int Order, char uplo, char Trans,
> + int N, int K, float alpha, float *A, int lda, float beta, float *C, int ldc)
> + {
> + ssyrk(uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
> + }
> +
> + inline lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
> + {
> + int info;
> + sgels(trans, m, n, nrhs, a, lda, b, ldb,&info);
> + return info;
> + }
> +
> + #endif
> +
> +
> +
> +
> +
> + #else
> +
> + //!IF MKL is not present on INTEL, use openblas
> + #ifdef _openblas_
> + #pragma message("Compiled with OPENBLAS")
> + #define STORAGE_TYPE LAPACK_COL_MAJOR
> + #include "cblas.h"
> + #include <lapacke.h>
> + extern "C" void openblas_set_num_threads(int num_threads);
> + #define blas_set_num_threads(n) openblas_set_num_threads(n)
> + #endif
> +
> + #endif
> +
> #endif
> -#ifdef __INTEL_MKL__
> - #include "mkl.h"
> - #define blas_set_num_threads(n) mkl_set_num_threads(n)
> -#else
> - extern "C" void openblas_set_num_threads(int num_threads);
> - #define blas_set_num_threads(n) openblas_set_num_threads(n)
> -#endif
>
> -#include <unistd.h>
> -#include <pthread.h>
> -#include <limits.h>
> -#include <queue>
> -#include <iostream>
> -#include <lapacke.h>
>
> +
> //!SETTINGS
>
> #define EXTENDEDTEST 0
> @@ -47,7 +116,7 @@
>
> #define OUTPUT 0
>
> -#define STORAGE_TYPE LAPACK_COL_MAJOR
> +
> #define type_precision float
>
> #define MIN(a,b) (((a)<(b))?(a):(b))
> @@ -56,86 +125,8 @@
> #define _10MB 10*_1MB
> #define _1GB 1024*1024*1024
>
> -//!for CPU speed!
>
> -//#ifdef WIN32
> -//#define WIN32_LEAN_AND_MEAN
> -//#include <windows.h>
> -//typedef unsigned __int64 usCount;
> -//static usCount GetUsCount()
> -//{
> -// static LARGE_INTEGER ticksPerSec;
> -// static double scalefactor;
> -// LARGE_INTEGER val;
> -// if (!scalefactor)
> -// {
> -// if (QueryPerformanceFrequency(&ticksPerSec))
> -// scalefactor=ticksPerSec.QuadPart/1000000000000.0;
> -// else
> -// scalefactor=1;
> -// }
> -// if (!QueryPerformanceCounter(&val))
> -// return (usCount) GetTickCount() * 1000000000;
> -// return (usCount) (val.QuadPart/scalefactor);
> -//}
> -//#else
> -//#include <sys/time.h>
> -//#include <time.h>
> -//#include <sched.h>
> -//typedef unsigned long long usCount;
> -//static usCount GetUsCount()
> -//{
> -//#ifdef CLOCK_MONOTONIC
> -// struct timespec ts;
> -// clock_gettime(CLOCK_MONOTONIC, &ts);
> -// return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
> -//#else
> -// struct timeval tv;
> -// gettimeofday(&tv, 0);
> -// return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
> -//#endif
> -//}
> -//#endif
> -//static usCount usCountOverhead;
> -//#ifdef __GNUC__
> -//#include "x86intrin.h"
> -//#define __rdtsc() __builtin_ia32_rdtsc()
> -//#endif
>
> -//static usCount GetClockSpeed()
> -//{
> -// int n;
> -// usCount start, end, start_tsc, end_tsc;
> -// if (!usCountOverhead)
> -// {
> -// usCount foo = 0;
> -// start=GetUsCount();
> -// for (n = 0; n < 1000000; n++)
> -// {
> -// foo += GetUsCount();
> -// }
> -// end = GetUsCount();
> -// usCountOverhead = (end - start)/n;
> -// }
> -//
> -// start = GetUsCount();
> -// start_tsc = __rdtsc();
> -// for (n = 0; n <1000; n++)
> -// {
> -//#ifdef WIN32
> -// Sleep(0);
> -//#else
> -// sched_yield();
> -//#endif
> -// }
> -//
> -// end_tsc = __rdtsc();
> -// end = GetUsCount();
> -// return(usCount)((1000000000000.0 * (end_tsc - start_tsc)) /
> -// (end - start - usCountOverhead));
> -//}
> -
> -
> using namespace std;
>
>
> @@ -149,7 +140,9 @@
> int p;
> int tb;
> int mb;
> - int id;
> + int id;
> +
> + float sig_threshold;
>
> int threads;
>
> @@ -158,7 +151,7 @@
> string fnameAL;
> string fnameAR;
> string fnameY;
> - string fnameOutB;
> + string fnameOutFiles;
> string fname_excludelist;
>
> [TRUNCATED]
>
> To get the complete diff run:
> svnlook diff /svnroot/genabel -r 1768
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits
>
--
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
L.C. Karssen
Utrecht
The Netherlands
lennart at karssen.org
http://blog.karssen.org
GPG key ID: A88F554A
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 213 bytes
Desc: OpenPGP digital signature
URL: <http://lists.r-forge.r-project.org/pipermail/genabel-devel/attachments/20140725/caf54e2e/attachment-0001.sig>
More information about the genabel-devel
mailing list