[GenABEL-dev] [Genabel-commits] r1768 - in pkg/OmicABELnoMM: . doc src tests

L.C. Karssen lennart at karssen.org
Fri Jul 25 15:47:12 CEST 2014


Hi Alvaro,

Please find a few other comments below.

On 25-07-14 12:48, noreply at r-forge.r-project.org wrote:
> Author: afrank
> Date: 2014-07-25 12:48:03 +0200 (Fri, 25 Jul 2014)
> New Revision: 1768
> 
> Modified:
>    pkg/OmicABELnoMM/configure.ac
>    pkg/OmicABELnoMM/doc/howtocompile.txt
>    pkg/OmicABELnoMM/src/AIOwrapper.cpp
>    pkg/OmicABELnoMM/src/AIOwrapper.h
>    pkg/OmicABELnoMM/src/Algorithm.cpp
>    pkg/OmicABELnoMM/src/Definitions.h
>    pkg/OmicABELnoMM/src/Utility.cpp
>    pkg/OmicABELnoMM/src/main.cpp
>    pkg/OmicABELnoMM/test-driver
>    pkg/OmicABELnoMM/tests/test.cpp
> Log:
> Added AMD ACML 6 support for heteregenous systems (CPU + GPU). ACML fixes unusable performance issues under older Opteron (non AVX) systems. Several Bug fixes of IO. Added incomplete functionalities.
> 
> Modified: pkg/OmicABELnoMM/configure.ac
> ===================================================================
> --- pkg/OmicABELnoMM/configure.ac	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/configure.ac	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -3,8 +3,9 @@
>  
>  AC_PREREQ([2.67])
>  AC_INIT([OmicABELnoMM], [0.1.0], [genabel-devel at r-forge.wu-wien.ac.at])
> -AM_INIT_AUTOMAKE([silent-rules subdir-objects])
> -AM_SILENT_RULES([yes])
> +AM_INIT_AUTOMAKE([])
> +#AM_INIT_AUTOMAKE([silent-rules subdir-objects])
> +#AM_SILENT_RULES([yes])
>  AC_CONFIG_SRCDIR([src/Utility.h])
>  AC_CONFIG_HEADERS([src/config.h])
>  
> @@ -17,12 +18,12 @@
>  # Set some default compile flags
>  if test -z "$CXXFLAGS"; then
>     # User did not set CXXFLAGS, so we can put in our own defaults
> -    CXXFLAGS="-O3"
> +    CXXFLAGS=""

Is there a special reason why you remove optimisations here and set -O0
below? You seem to later enable O3 with AM_CXXFLAGS.

>  fi
>  if test -z "$CPPFLAGS"; then
>     # User did not set CPPFLAGS, so we can put in our own defaults
> -    CPPFLAGS="-Wall -g -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
> -    #CPPFLAGS="-Wall"
> +    #CPPFLAGS="-Wall -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
> +    CPPFLAGS="-O0"
>  fi
>  # If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
>  # with its own defaults
> @@ -31,33 +32,49 @@
>  AC_PROG_CC
>  AC_PROG_CXX
>  
> +# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
> +AC_OPENMP
> +AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
>  
> +AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
> +
>  # Checks for libraries.
>  # pthread library
>  AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
> -   AC_MSG_ERROR([Unable to find the pthread_mutex_init() function])
> +   AC_MSG_ERROR([Make sure pthread is available on the system])
>  ])
> -# Openblas
> -AC_SEARCH_LIBS([cblas_sgemm], [openblas], [], [
> -   AC_MSG_ERROR([Unable to find the openblas library])
> -])
> -# Lapack
> -AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
> -   AC_MSG_ERROR([Unable to find a Lapack library])
> -])
>  
> -#Boost
> +if test -z "$LDFLAGS"; then
> +	LDFLAGS="-L./libs/lib/ -L../libs/lib/"
> +fi
>  
> +found_blas=0
>  
> +AC_SEARCH_LIBS([__iso_c_binding_c_f_pointer_l4],[gfortran])
>  
> -# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
> -AC_OPENMP
> -AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
> +# ACML
> +AC_SEARCH_LIBS(dgemm, acml_mp,[found_blas=1 AM_CXXFLAGS="-D_acml_ $AM_CXXFLAGS"], [
> +   AC_MSG_NOTICE([NOT using AMD the ACML  library],[-lgfortran])
> +])
>  
>  
>  
> +if test "$found_blas" -eq 0
> +	then
>  
> +# Openblas
> +AC_SEARCH_LIBS([cblas_sgemm], [openblas], [AM_CXXFLAGS="$AM_CXXFLAGS -D_openblas_"], [
> +   AC_MSG_ERROR([OpenBLAS  library NOT found])
> +])
> + #Lapack
> +AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
> +   AC_MSG_ERROR([Unable to find a Lapack library])
> +])
> +AC_MSG_NOTICE([Using OpenBLAS  library])
> +fi
>  

Could you add a bit of indentation in the lines above. It took me a
while to see that the if was closed by the fi a few lines below it.



Thanks,

Lennart.

> +
> +
>  # Checks for header files.
>  AC_CHECK_HEADERS([limits.h stdlib.h string.h sys/time.h unistd.h])
>  
> @@ -73,6 +90,7 @@
>  # Files to be generated by autotools
>  AC_CONFIG_FILES([
>          Makefile
> +
>  ])
>  
>  AC_OUTPUT
> 
> Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
> ===================================================================
> --- pkg/OmicABELnoMM/doc/howtocompile.txt	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/doc/howtocompile.txt	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -1,4 +1,7 @@
> -
> +
> +-------------------------Install BLAS----------------------------
> +#STEP 1:
> +#a) Install OPENBLAS:
>  mkdir GWAS_PROJECT
>  cd GWAS_PROJECT
>  
> @@ -10,21 +13,52 @@
>  #make sure g++ its 4.8 or above!on rwth cluster module load gcc/4.8, choose 32 or 64 also
>  make all HOSTCC=g++ FC=gfortran USE_OPENMP=1
>  
> -sudo make install PREFIX="/usr"
> +make install PREFIX="path_to_/OmicABELnoMM/libs/"
>  
> -sudo ldconfig
> +ldconfig
>  
> -cd ..
> -#BLAS END
>  
> +cd ..
> +
> +-------------------------AMD ACML Alternative Version-------------------------
> +#STEP 1:
> +# b)
> +mkdir OmicABELnoMM/libs/
> +
> +cd OmicABELnoMM/libs/
> +
> +You can use AMD ACML. Download from:
> +http://developer.amd.com/tools-and-sdks/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
> +
> + and copy the supplied binary 
> + libraries to "OmicABELnoMM/libs/"
> +
> +IF both libraries are present (Openblas + ACML), the system will use ACML.
> +
> +cd ../../
> +
> +---------------------------------#BLAS END----------------------------------------------------
> +
> +#STEP 2:
> +
> +
> +ON the folder GWAS_PROJECT
> +
>  svn checkout svn+ssh://developername@svn.r-forge.r-project.org/svnroot/genabel/OmicABELnoMM
>  
>  cd OmicABELnoMM
> +
> +autoreconf -fi
>  
> -./configure LDFLAGS="-L/usr/lib/"
> +./configure
>  
> -make
> +make
>  
> +make check
> +
> +#DONE
> +
> +------------------------------Example--------------------------------
>  #test it
>  ./omicabelnomm -c examples/XL --geno examples/XR -p examples/Y -o examples/B -n 2 -t 2
>  
> @@ -42,12 +76,19 @@
>  
>  
>  
> --------------------------Alternative Version-------------------------
> +-------------------------Alternative Version of BLAS-------------------------
>  
> -Install all required libraries for your system: 
> +#Make sure autoconf is installed:
>  
> --Iinux ubuntu:
> +sudo apt-get install autoconf
> +autoreconf -fi
> +autoconf
>  
> +#Install all required libraries for your system, 
> +#but they will not work, 99% of the time, due to not having openmp support: 
> +
> +#Iinux ubuntu:
> +
>  sudo apt-get install libopenblas-dev
>  sudo apt-get install libopenblas-base
>  sudo apt-get install liblapack3gf
> @@ -56,11 +97,7 @@
>  sudo apt-get install liblapacke
>  sudo apt-get install liblapacke-dev
>  
> --Make sure autoconf is installed:
>  
> -sudo apt-get install autoconf
> -autoreconf -fi
> -autoconf
>  
>  
>  
> 
> Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
> ===================================================================
> --- pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -40,7 +40,7 @@
>          Fhandler->fnameAL = params.fnameAL;
>          Fhandler->fnameAR = params.fnameAR;
>          Fhandler->fnameY = params.fnameY;
> -        Fhandler->fnameOutB = params.fnameOutB;
> +        Fhandler->fnameOutFiles = params.fnameOutFiles;
>  
>  
>          Yfvi  = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
> @@ -91,7 +91,7 @@
>  
>      prepare_AL(params.l,params.n);
>      prepare_AR(  params.mb,  params.n,  params.m,  params.r);
> -    prepare_B(params.tb, params.l+params.r);
> +    prepare_OutFiles(params.mb, params.l+params.r);
>      prepare_Y(params.tb, params.n, params.t);
>  
>  
> @@ -118,7 +118,7 @@
>      finalize_Y();
>      finalize_AR();
>      finalize_AL();
> -    finalize_B();
> +    finalize_OutFiles();
>  
>      pthread_attr_destroy(&(Fhandler->attr));
>  
> @@ -135,7 +135,7 @@
>  
>  
>  
> -void AIOwrapper::finalize_B()
> +void AIOwrapper::finalize_OutFiles()
>  {
>  
>  }
> @@ -149,7 +149,10 @@
>  
>      struct timespec timeToWait;
>      FILE*  fp_Y;
> -    FILE*  fp_B;
> +    FILE*  fp_B;
> +    FILE*  fp_R;
> +    FILE*  fp_SD2;
> +    FILE*  fp_P;
>      FILE*  fp_Ar;
>      if(!Fhandler->fakefiles)
>      {
> @@ -167,11 +170,29 @@
>              exit(1);
>          }
>  
> -        fp_B = fopen((Fhandler->fnameOutB+".fvd").c_str(), "w+b");
> +        fp_B = fopen((Fhandler->fnameOutFiles+"_B.fvd").c_str(), "w+b");
>          if(fp_B == 0)
>          {
> -            cout << "Error Opening File B " << Fhandler->fnameOutB << endl;
> +            cout << "Error Opening File B " << Fhandler->fnameOutFiles << "_B" << endl;
>              exit(1);
> +        }
> +        fp_R = fopen((Fhandler->fnameOutFiles+"_R.fvd").c_str(), "w+b");
> +        if(fp_R == 0)
> +        {
> +            cout << "Error Opening File R " << Fhandler->fnameOutFiles << "_R" << endl;
> +            exit(1);
> +        }
> +        fp_SD2 = fopen((Fhandler->fnameOutFiles+"_SD2.fvd").c_str(), "w+b");
> +        if(fp_SD2 == 0)
> +        {
> +            cout << "Error Opening File SD2 " << Fhandler->fnameOutFiles << "_SD2" << endl;
> +            exit(1);
> +        }
> +        fp_P = fopen((Fhandler->fnameOutFiles+"_P.fvd").c_str(), "w+b");
> +        if(fp_P == 0)
> +        {
> +            cout << "Error Opening File P " << Fhandler->fnameOutFiles << "_P" << endl;
> +            exit(1);
>          }
>      }
>      else
> @@ -205,6 +226,24 @@
>          {
>              cout << "Error setting up temp File B " << endl;
>              exit(1);
> +        }
> +        fp_R = fopen("tempR.bin", "w+b");
> +        if(fp_R == 0)
> +        {
> +            cout << "Error setting up temp File R " << endl;
> +            exit(1);
> +        }
> +        fp_SD2 = fopen("tempSD2.bin", "w+b");
> +        if(fp_SD2 == 0)
> +        {
> +            cout << "Error setting up temp File SD2 " << endl;
> +            exit(1);
> +        }
> +        fp_P = fopen("tempP.bin", "w+b");
> +        if(fp_P == 0)
> +        {
> +            cout << "Error setting up temp File P " << endl;
> +            exit(1);
>          }
>          //cout << "\nEnd preping files\n" << flush;
>  
> @@ -229,14 +268,16 @@
>  
>              Fhandler->y_to_readSize -= tmp_y_blockSize;
>              size_buff = Fhandler->n * tmp_y_blockSize;
> -            //cout << Fhandler->y_to_readSize << endl;
> +
>  
> +
>              pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -            //cout << " pre;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
> +
> +
>              type_buffElement* tobeFilled = Fhandler->empty_buffers.front();
>              Fhandler->empty_buffers.pop();
> -            //pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>  
> +
>              tobeFilled->size = tmp_y_blockSize;
>  
>              if(Fhandler->fakefiles)
> @@ -273,8 +314,8 @@
>  
>                      }
>                  }
> -//                size_t result = fread (tobeFilled->buff,sizeof(type_precision),size_buff,fp_Y);
> -//                result++;
> +
> +
>                  if(Fhandler->y_to_readSize <= 0)
>                  {
>                      fseek ( fp_Y , 0 , SEEK_SET );
> @@ -282,10 +323,9 @@
>              }
>  
>  
> -            //pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +
>              Fhandler->full_buffers.push(tobeFilled);
> -            //  cout << "\nStoring " << tobeFilled << endl;
> -            //cout << " post;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
> +
>              pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>  
>              pthread_mutex_lock(&(Fhandler->m_read));
> @@ -294,8 +334,12 @@
>  
>          }
>  
> -        while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize)
> -        {
> +        while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize )
> +        {
> +
> +
> +
> +
>              tmp_ar_blockSize = Fhandler->Ar_blockSize;
>              if(Fhandler->Ar_to_readSize < Fhandler->Ar_blockSize)
>                  tmp_ar_blockSize = Fhandler->Ar_to_readSize;
> @@ -343,12 +387,13 @@
>                      }
>                  }
>  
> -//                size_t result = fread(tobeFilled->buff,sizeof(type_precision),size_buff,fp_Ar);
> -//                result++;
> -                if (Fhandler->Ar_to_readSize <= 0)
> -                {
> -                    fseek ( fp_Ar , 0 , SEEK_SET );
> -                }
> +
> +            }
> +
> +            if(Fhandler->Ar_to_readSize <= 0)
> +            {
> +                Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> +                fseek ( fp_Ar , 0 , SEEK_SET );
>              }
>  
>              Fhandler->ar_full_buffers.push(tobeFilled);
> @@ -362,23 +407,29 @@
>          }
>          //B write
>  
> -        while(!Fhandler->b_full_buffers.empty())
> +        while(!Fhandler->write_full_buffers.empty())
>          {
>  
>  
>              pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -            type_buffElement* tobeWritten = Fhandler->b_full_buffers.front();
> -            Fhandler->b_full_buffers.pop();
> +            type_buffElement* tobeWritten = Fhandler->write_full_buffers.front();
> +            Fhandler->write_full_buffers.pop();
>              int size = Fhandler->p*Fhandler->b_blockSize;
>  
>              if(Fhandler->fakefiles)
>              {
> -                fseek ( fp_B , 0 , SEEK_SET );
> +                fseek ( fp_B , 0 , SEEK_SET );
> +                fseek ( fp_R , 0 , SEEK_SET );
> +                fseek ( fp_SD2 , 0 , SEEK_SET );
> +                fseek ( fp_P , 0 , SEEK_SET );
>              }
> -            fwrite (tobeWritten->buff,sizeof(type_precision),size,fp_B);
> +            fwrite (&(tobeWritten->buff[0]),sizeof(type_precision),size,fp_B);
> +            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*Fhandler->p]),sizeof(type_precision),Fhandler->b_blockSize,fp_R);
> +            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]),sizeof(type_precision),Fhandler->b_blockSize,fp_SD2);
> +            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]),sizeof(type_precision),size,fp_P);
>  
>  
> -            Fhandler->b_empty_buffers.push(tobeWritten);
> +            Fhandler->write_empty_buffers.push(tobeWritten);
>              //  cout << "\nStoring " << tobeWritten << endl;
>              pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>  
> @@ -411,12 +462,31 @@
>          pthread_cond_signal( &(Fhandler->condition_read ));
>          pthread_mutex_unlock(&(Fhandler->m_read));
>  
> -        if(Fhandler->reset_wait)
> -        {
> -            pthread_barrier_wait(&(Fhandler->finalize_barrier));
> -            //wait for main thread to reset everything
> -            pthread_barrier_wait(&(Fhandler->finalize_barrier));
> -        }
> +//        if(Fhandler->reset_wait)
> +//        {
> +//            pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//            //wait for main thread to reset everything
> +//
> +//            pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +//            Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> +//
> +//            if(Fhandler->Ar_currentReadBuff)
> +//            {
> +//                Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> +//                Fhandler->Ar_currentReadBuff=0;
> +//            }
> +//            while(!Fhandler->ar_full_buffers.empty())
> +//            {
> +//                Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> +//                Fhandler->ar_full_buffers.pop();
> +//            }
> +//            pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +//
> +//            Fhandler->reset_wait = false;
> +//
> +//
> +//            pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//        }
>  
>  
>      }
> @@ -425,7 +495,14 @@
>      pthread_barrier_wait(&(Fhandler->finalize_barrier));
>  
>      {
> -    type_buffElement* tmp;
> +    type_buffElement* tmp;
> +
> +    if(Fhandler->currentReadBuff)
> +    {
> +        Fhandler->full_buffers.push(Fhandler->currentReadBuff);
> +        Fhandler->currentReadBuff=0;
> +    }
> +
>      while(!Fhandler->full_buffers.empty())
>      {
>         tmp= Fhandler->full_buffers.front();
> @@ -438,8 +515,15 @@
>      {
>         tmp= Fhandler->empty_buffers.front();
>         Fhandler->empty_buffers.pop();
> -       delete []tmp->buff;
> -       delete tmp;
> +        delete []tmp->buff;
> +        delete tmp;
> +
> +    }
> +
> +    if(Fhandler->Ar_currentReadBuff)
> +    {
> +        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> +        Fhandler->Ar_currentReadBuff=0;
>      }
>  
>      while(!Fhandler->ar_full_buffers.empty())
> @@ -458,18 +542,18 @@
>         delete tmp;
>      }
>  
> -    while(!Fhandler->b_full_buffers.empty())
> +    while(!Fhandler->write_full_buffers.empty())
>      {
> -       tmp= Fhandler->b_full_buffers.front();
> -       Fhandler->b_full_buffers.pop();
> +       tmp= Fhandler->write_full_buffers.front();
> +       Fhandler->write_full_buffers.pop();
>         delete []tmp->buff;
>         delete tmp;
>      }
>  
> -    while(!Fhandler->b_empty_buffers.empty())
> +    while(!Fhandler->write_empty_buffers.empty())
>      {
> -       tmp= Fhandler->b_empty_buffers.front();
> -       Fhandler->b_empty_buffers.pop();
> +       tmp= Fhandler->write_empty_buffers.front();
> +       Fhandler->write_empty_buffers.pop();
>         delete []tmp->buff;
>         delete tmp;
>      }
> @@ -482,6 +566,9 @@
>          fclose(fp_Y);
>          fclose(fp_Ar);
>          fclose(fp_B);
> +        fclose(fp_R);
> +        fclose(fp_SD2);
> +        fclose(fp_P);
>  
>          //cout << "\nexited io\n";
>  
> @@ -514,15 +601,16 @@
>  
>  
>      //!read new rdy buffer
> -    pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -        if(Fhandler->Ar_currentReadBuff)
> -        {
> -            Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
> -        }
> +    pthread_mutex_lock(&(Fhandler->m_buff_upd));
>  
> -        Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
> -        Fhandler->ar_full_buffers.pop();
> +    if(Fhandler->Ar_currentReadBuff)
> +    {
> +        Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
> +    }
>  
> +    Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
> +    Fhandler->ar_full_buffers.pop();
> +
>      //cout << "\nReading " << Fhandler->Ar_currentReadBuff << endl;
>      Fhandler->Ar = Fhandler->Ar_currentReadBuff->buff;
>      Ar_blockSize = Fhandler->Ar_currentReadBuff->size;
> @@ -580,24 +668,23 @@
>  
>      //!read new rdy buffer
>      pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -    //cout << " pre," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
>  
> +
>          if(Fhandler->currentReadBuff)
>          {
> -            //memset(Fhandler->currentReadBuff->buff,0,y_blockSize);
>              Fhandler->empty_buffers.push(Fhandler->currentReadBuff);
>          }
>          Fhandler->currentReadBuff = Fhandler->full_buffers.front();
>          Fhandler->full_buffers.pop();
>  
> -    //cout << "\nReading " << Fhandler->currentReadBuff << endl;
> +
>      Fhandler->Yb = Fhandler->currentReadBuff->buff;
>      y_blockSize = Fhandler->currentReadBuff->size;
>  
>      (*Y) = Fhandler->Yb;
>  
> -     //cout << " post," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
>  
> +
>      pthread_mutex_unlock(&(Fhandler->m_buff_upd));
>  
>  
> @@ -611,50 +698,7 @@
>  
>  }
>  
> -void AIOwrapper::write_B(type_precision* B, int p, int blockSize)
> -{
> -
> -    while(Fhandler->b_empty_buffers.empty())
> -    {
> -        pthread_mutex_lock(&(Fhandler->m_more));
> -        pthread_cond_signal( &(Fhandler->condition_more ));
> -        pthread_mutex_unlock(&(Fhandler->m_more));
> -
> -        io_overhead = "b";
> -
> -        pthread_mutex_lock(&(Fhandler->m_read));
> -        pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
> -        pthread_mutex_unlock(&(Fhandler->m_read));
> -
> -    }
> -
> -
> -    pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -
> -
> -
> -        //cout << Fhandler->b_empty_buffers.size() << flush;
> -        Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
> -        Fhandler->b_empty_buffers.pop();
> -
> -
> -
> -    Fhandler->B = Fhandler->currentWriteBuff->buff;
> -    Fhandler->b_blockSize = blockSize;
> -    copy_vec(B,Fhandler->B,p*blockSize);
> -
> -    Fhandler->b_full_buffers.push(Fhandler->currentWriteBuff);
> -
> -
> -
> -    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> -
> -
> -    pthread_mutex_lock(&(Fhandler->m_more));
> -    pthread_cond_signal( &(Fhandler->condition_more ));
> -    pthread_mutex_unlock(&(Fhandler->m_more));
> -}
> -
> +
>  void AIOwrapper::prepare_Y(int y_blockSize, int n, int totalY)
>  {
>      //for fake files
> @@ -679,10 +723,6 @@
>          tmp = new type_buffElement();
>          tmp->buff = new type_precision[Fhandler->n*Fhandler->y_blockSize];
>          tmp->size = y_blockSize;
> -//        for( int i = 0; i < Fhandler->n*Fhandler->y_blockSize; i++)
> -//        {
> -//            (tmp->buff)[i] = 0;
> -//        }
>          Fhandler->empty_buffers.push(tmp);
>          Fhandler->Yb = tmp->buff;
>      }
> @@ -701,52 +741,98 @@
>  
>  
>  
> -}
> -
> -void AIOwrapper::prepare_B(int b_blockSize, int p)
> +}
> +
> +void AIOwrapper::getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P)
> +{
> +    B = &(Fhandler->currentWriteBuff->buff[0]);
> +    R = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*Fhandler->p]);
> +    SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]);
> +    P = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]);
> +}
> +
> +void AIOwrapper::write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P,  int blockSize)
>  {
> -    //for fake files
>  
> +    while(Fhandler->write_empty_buffers.empty())
> +    {
> +        pthread_mutex_lock(&(Fhandler->m_more));
> +        pthread_cond_signal( &(Fhandler->condition_more ));
> +        pthread_mutex_unlock(&(Fhandler->m_more));
>  
> -    Fhandler->b_blockSize = b_blockSize;
> +        io_overhead = "W";
>  
> -    Fhandler->p=p;
> +        pthread_mutex_lock(&(Fhandler->m_read));
> +        pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
> +        pthread_mutex_unlock(&(Fhandler->m_read));
> +    }
>  
>  
> -    int buff_count = 4;
> +    pthread_mutex_lock(&(Fhandler->m_buff_upd));
>  
> -    Fhandler->currentWriteBuff = 0;
>  
> -    type_buffElement* tmp;
> +    Fhandler->write_full_buffers.push(Fhandler->currentWriteBuff);
> +    Fhandler->b_blockSize = blockSize;
> +
> +
> +    Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
> +    Fhandler->write_empty_buffers.pop();
> +
> +    B = &(Fhandler->currentWriteBuff->buff[0]);
> +    R = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*Fhandler->p]);
> +    SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+1)]);
> +    P = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+2)]);
>  
>  
> +    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +
> +
> +    pthread_mutex_lock(&(Fhandler->m_more));
> +    pthread_cond_signal( &(Fhandler->condition_more ));
> +    pthread_mutex_unlock(&(Fhandler->m_more));
> +}
> +
> +
> +
> +
> +
> +void AIOwrapper::prepare_OutFiles(int max_b_blockSize, int p)
> +{
> +
> +    Fhandler->max_b_blockSize = max_b_blockSize;
> +    Fhandler->p=p;
> +    int buff_count = 4;
> +
> +    type_buffElement* tmp;
> +
> +
>      for(int i = 0; i< buff_count  ; i++)
>      {
> -
>          tmp = new type_buffElement();
> -        tmp->buff = new type_precision[Fhandler->p*Fhandler->b_blockSize];
> -        tmp->size = b_blockSize;
> -//        for( int i = 0; i < Fhandler->n*Fhandler->b_blockSize; i++)
> -//        {
> -//            (tmp->buff)[i] = 0;
> -//        }
> -        Fhandler->b_empty_buffers.push(tmp);
> -
> -//        Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
> -//        Fhandler->b_empty_buffers.pop();
> +        tmp->buff = new type_precision[Fhandler->max_b_blockSize*(2*Fhandler->p+2)];
> +        tmp->size = max_b_blockSize;
> +        Fhandler->write_empty_buffers.push(tmp);
> +    }
> +    Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
> +    Fhandler->write_empty_buffers.pop();
>  
>  
> -    }
> -
>  }
>  
> +
> + void AIOwrapper::write_significantValues(int Y, int X_R, float R, float SD2, float P)
> + {
> +
> + }
> +
> +
>  void AIOwrapper::reset_Y()
>  {
>      //void *status;
>  
>      Fhandler->seed = 1337;
>  
> -    //cout << "ry" << flush;
> +    cout << "ry" << flush;
>  
>      Fhandler->reset_wait = true;
>      pthread_barrier_wait(&(Fhandler->finalize_barrier));
> @@ -789,38 +875,38 @@
>  
>      //cout << "ra" << flush;
>  
> -    Fhandler->reset_wait = true;
> -    pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//    Fhandler->reset_wait = true;
> +//    pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//
> +////    pthread_mutex_lock(&(Fhandler->m_buff_upd));
> +////    Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
> +////
> +////    if(Fhandler->Ar_currentReadBuff)
> +////    {
> +////        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> +////        Fhandler->Ar_currentReadBuff=0;
> +////    }
> +////
> +////    while(!Fhandler->ar_full_buffers.empty())
> +////    {
> +////        Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> +//////        for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
> +//////        {
> +//////            ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
> +//////        }
> +////        Fhandler->ar_full_buffers.pop();
> +////    }
> +////    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> +////
> +////    Fhandler->reset_wait = false;
> +//
> +//    pthread_barrier_wait(&(Fhandler->finalize_barrier));
> +//
> +//    pthread_mutex_lock(&(Fhandler->m_more));
> +//    pthread_cond_signal( &(Fhandler->condition_more ));
> +//    pthread_mutex_unlock(&(Fhandler->m_more));
>  
> -    pthread_mutex_lock(&(Fhandler->m_buff_upd));
> -    Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
>  
> -    if(Fhandler->Ar_currentReadBuff)
> -    {
> -        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
> -        Fhandler->Ar_currentReadBuff=0;
> -    }
> -
> -    while(!Fhandler->ar_full_buffers.empty())
> -    {
> -        Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
> -//        for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
> -//        {
> -//            ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
> -//        }
> -        Fhandler->ar_full_buffers.pop();
> -    }
> -    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
> -
> -    Fhandler->reset_wait = false;
> -
> -    pthread_barrier_wait(&(Fhandler->finalize_barrier));
> -
> -    pthread_mutex_lock(&(Fhandler->m_more));
> -    pthread_cond_signal( &(Fhandler->condition_more ));
> -    pthread_mutex_unlock(&(Fhandler->m_more));
> -
> -
>  }
>  
>  void AIOwrapper::finalize_Y()
> @@ -838,7 +924,7 @@
>      Fhandler->Ar_Amount = totalR;
>      Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
>  
> -    int buff_count = min(3,(totalR+ desired_blockSize - 1)/desired_blockSize);
> +    int buff_count = 4;
>  
>      Fhandler->Ar_currentReadBuff = 0;
>      type_buffElement* tmp;
> 
> Modified: pkg/OmicABELnoMM/src/AIOwrapper.h
> ===================================================================
> --- pkg/OmicABELnoMM/src/AIOwrapper.h	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/AIOwrapper.h	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -25,7 +25,7 @@
>      string fnameY;
>  
>  
> -    string fnameOutB;
> +    string fnameOutFiles;
>  
>  
>      list< pair<int,int> >* excl_List;
> @@ -46,8 +46,8 @@
>      queue<type_buffElement*> empty_buffers;
>      queue<type_buffElement*> full_buffers;
>  
> -    queue<type_buffElement*> b_empty_buffers;
> -    queue<type_buffElement*> b_full_buffers;
> +    queue<type_buffElement*> write_empty_buffers;
> +    queue<type_buffElement*> write_full_buffers;
>  
>      queue<type_buffElement*> ar_empty_buffers;
>      queue<type_buffElement*> ar_full_buffers;
> @@ -67,7 +67,8 @@
>      int y_blockSize;
>      int y_to_readSize;
>  
> -    int b_blockSize;
> +    int b_blockSize;
> +    int max_b_blockSize;
>  
>      bool not_done;
>      bool reset_wait;
> @@ -139,7 +140,11 @@
>          void reset_Y();
>          void reset_AR();
>  
> -        void write_B(type_precision* B, int p, int blockSize);
> +        void getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P);
> +
> +        void write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P,  int blockSize);
> +
> +        void write_significantValues(int Y, int X_R, float R, float SD2, float P);
>  
>          string io_overhead;
>  
> @@ -161,8 +166,8 @@
>          void prepare_AL( int columns, int n);
>          void finalize_AL();
>  
> -        void prepare_B(int b_blockSize, int p);
> -        void finalize_B();
> +        void prepare_OutFiles(int max_b_blockSize, int p);
> +        void finalize_OutFiles();
>  
>  
>          static void* async_io(void *ptr );
> 
> Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
> ===================================================================
> --- pkg/OmicABELnoMM/src/Algorithm.cpp	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/Algorithm.cpp	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -621,7 +621,7 @@
>  
>                  get_ticks(start_tick2);
>  
> -                AIOfile.write_B(B, p, a_block_size);
> +                //AIOfile.write_B(B, p, a_block_size);
>  
>                  get_ticks(end_tick);
>                  out.acc_storeb += ticks2sec(end_tick,start_tick2);
> 
> Modified: pkg/OmicABELnoMM/src/Definitions.h
> ===================================================================
> --- pkg/OmicABELnoMM/src/Definitions.h	2014-07-23 13:50:12 UTC (rev 1767)
> +++ pkg/OmicABELnoMM/src/Definitions.h	2014-07-25 10:48:03 UTC (rev 1768)
> @@ -5,41 +5,110 @@
>      #define LINUX
>  #else
>      #define WINDOWS
> -#endif
> +#endif
>  
> +#include <unistd.h>
> +#include <limits.h>
> +#include <queue>
> +#include <iostream>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <time.h>       /* time */
>  #include <cstring>
>  #include <math.h>
> -#include <omp.h>
> +#include <omp.h>
> +#include <pthread.h>
>  
> -
> -
> +
>  #ifdef WINDOWS
>      #include <windows.h>
> -    #include <cblas.h>
>  #else
> -    //#include "mpi.h"
> -    //#define cpu_freq 3.0
> -    #define cpu_freq 3.2
> -    #include "cblas.h"
> +
> +#endif
> +
> +//!For intel use propetary MKL, it will be preferred over others
> +#ifdef __INTEL_MKL__
> +    #pragma message("MKL will Probably NOT compile")
> +    #include "mkl.h"
> +    #include "cblas.h"
> +    #include <lapacke.h>
> +    #define blas_set_num_threads(n) mkl_set_num_threads(n)
> +    #define STORAGE_TYPE LAPACK_COL_MAJOR
> +#else
> +
> +    //!For AMD systems use the proper ACML library, preferred over openblas ON AMD
> +    #ifdef _acml_
> +        #pragma message("Compiled with AMD ACML")
> +        #define blas_set_num_threads(n) omp_set_num_threads(n)
> +
> +        #include <acml.h>
> +
> +        #define lapack_int int
> +
> +        #define CblasTrans 'T'
> +        #define CblasNoTrans 'N'
> +        #define CblasUpper 'U'
> +        #define CblasColMajor 1
> +
> +
> +        #define STORAGE_TYPE CblasColMajor
> +
> +
> +        #define cblas_snrm2 snrm2
> +        #define cblas_saxpy saxpy
> +
> +        #ifndef BLASdefs_H_INCLUDED
> +        #define BLASdefs_H_INCLUDED
> +
> +        inline  void cblas_sgemm(int storage, char transa, char transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
> +        {
> +           sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
> +        }
> +
> +        inline  lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n,  lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
> +        {
> +            int info;
> +            sposv( uplo, n,  nrhs, a, lda, b,  ldb, &info);
> +            return info;
> +        }
> +
> +        inline  void cblas_ssyrk(int Order, char uplo, char Trans,
> +		 int N, int K, float alpha, float *A, int lda,  float beta, float *C, int ldc)
> +        {
> +            ssyrk(uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
> +        }
> +
> +        inline  lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a,  lapack_int lda, float* b, lapack_int ldb )
> +        {
> +            int info;
> +            sgels(trans, m, n, nrhs, a, lda, b, ldb,&info);
> +            return info;
> +        }
> +
> +        #endif
> +
> +
> +
> +
> +
> +    #else
> +
> +        //!IF MKL is not present on INTEL, use openblas
> +        #ifdef _openblas_
> +            #pragma message("Compiled with OPENBLAS")
> +            #define STORAGE_TYPE LAPACK_COL_MAJOR
> +            #include "cblas.h"
> +            #include <lapacke.h>
> +            extern "C" void openblas_set_num_threads(int num_threads);
> +            #define blas_set_num_threads(n) openblas_set_num_threads(n)
> +        #endif
> +
> +    #endif
> +
>  #endif
> -#ifdef __INTEL_MKL__
> -    #include "mkl.h"
> -    #define blas_set_num_threads(n) mkl_set_num_threads(n)
> -#else
> -    extern "C" void openblas_set_num_threads(int num_threads);
> -    #define blas_set_num_threads(n) openblas_set_num_threads(n)
> -#endif
>  
> -#include <unistd.h>
> -#include <pthread.h>
> -#include <limits.h>
> -#include <queue>
> -#include <iostream>
> -#include <lapacke.h>
>  
> +
>  //!SETTINGS
>  
>  #define EXTENDEDTEST 0
> @@ -47,7 +116,7 @@
>  
>  #define OUTPUT 0
>  
> -#define STORAGE_TYPE LAPACK_COL_MAJOR
> +
>  #define type_precision float
>  
>  #define MIN(a,b) (((a)<(b))?(a):(b))
> @@ -56,86 +125,8 @@
>  #define _10MB 10*_1MB
>  #define _1GB 1024*1024*1024
>  
> -//!for CPU speed!
>  
> -//#ifdef WIN32
> -//#define WIN32_LEAN_AND_MEAN
> -//#include <windows.h>
> -//typedef unsigned __int64 usCount;
> -//static usCount GetUsCount()
> -//{
> -//    static LARGE_INTEGER ticksPerSec;
> -//    static double scalefactor;
> -//    LARGE_INTEGER val;
> -//    if (!scalefactor)
> -//    {
> -//        if (QueryPerformanceFrequency(&ticksPerSec))
> -//            scalefactor=ticksPerSec.QuadPart/1000000000000.0;
> -//        else
> -//            scalefactor=1;
> -//    }
> -//    if (!QueryPerformanceCounter(&val))
> -//        return (usCount) GetTickCount() * 1000000000;
> -//    return (usCount) (val.QuadPart/scalefactor);
> -//}
> -//#else
> -//#include <sys/time.h>
> -//#include <time.h>
> -//#include <sched.h>
> -//typedef unsigned long long usCount;
> -//static usCount GetUsCount()
> -//{
> -//#ifdef CLOCK_MONOTONIC
> -//    struct timespec ts;
> -//    clock_gettime(CLOCK_MONOTONIC, &ts);
> -//    return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
> -//#else
> -//    struct timeval tv;
> -//    gettimeofday(&tv, 0);
> -//    return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
> -//#endif
> -//}
> -//#endif
> -//static usCount usCountOverhead;
> -//#ifdef __GNUC__
> -//#include "x86intrin.h"
> -//#define __rdtsc() __builtin_ia32_rdtsc()
> -//#endif
>  
> -//static usCount GetClockSpeed()
> -//{
> -//    int n;
> -//    usCount start, end, start_tsc, end_tsc;
> -//    if (!usCountOverhead)
> -//    {
> -//        usCount foo = 0;
> -//        start=GetUsCount();
> -//        for (n = 0; n < 1000000; n++)
> -//        {
> -//            foo += GetUsCount();
> -//        }
> -//        end = GetUsCount();
> -//        usCountOverhead = (end - start)/n;
> -//    }
> -//
> -//    start = GetUsCount();
> -//    start_tsc = __rdtsc();
> -//    for (n = 0; n <1000; n++)
> -//    {
> -//#ifdef WIN32
> -//        Sleep(0);
> -//#else
> -//        sched_yield();
> -//#endif
> -//    }
> -//
> -//    end_tsc = __rdtsc();
> -//    end = GetUsCount();
> -//    return(usCount)((1000000000000.0 * (end_tsc - start_tsc)) /
> -//                    (end - start - usCountOverhead));
> -//}
> -
> -
>  using namespace std;
>  
>  
> @@ -149,7 +140,9 @@
>      int p;
>      int tb;
>      int mb;
> -    int id;
> +    int id;
> +
> +    float sig_threshold;
>  
>      int threads;
>  
> @@ -158,7 +151,7 @@
>      string fnameAL;
>      string fnameAR;
>      string fnameY;
> -    string fnameOutB;
> +    string fnameOutFiles;
>      string fname_excludelist;
>  
> [TRUNCATED]
> 
> To get the complete diff run:
>     svnlook diff /svnroot/genabel -r 1768
> _______________________________________________
> Genabel-commits mailing list
> Genabel-commits at lists.r-forge.r-project.org
> https://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/genabel-commits
> 

-- 
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
L.C. Karssen
Utrecht
The Netherlands

lennart at karssen.org
http://blog.karssen.org
GPG key ID: A88F554A
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 213 bytes
Desc: OpenPGP digital signature
URL: <http://lists.r-forge.r-project.org/pipermail/genabel-devel/attachments/20140725/caf54e2e/attachment-0001.sig>


More information about the genabel-devel mailing list