[Genabel-commits] r1768 - in pkg/OmicABELnoMM: . doc src tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Jul 25 12:48:04 CEST 2014


Author: afrank
Date: 2014-07-25 12:48:03 +0200 (Fri, 25 Jul 2014)
New Revision: 1768

Modified:
   pkg/OmicABELnoMM/configure.ac
   pkg/OmicABELnoMM/doc/howtocompile.txt
   pkg/OmicABELnoMM/src/AIOwrapper.cpp
   pkg/OmicABELnoMM/src/AIOwrapper.h
   pkg/OmicABELnoMM/src/Algorithm.cpp
   pkg/OmicABELnoMM/src/Definitions.h
   pkg/OmicABELnoMM/src/Utility.cpp
   pkg/OmicABELnoMM/src/main.cpp
   pkg/OmicABELnoMM/test-driver
   pkg/OmicABELnoMM/tests/test.cpp
Log:
Added AMD ACML 6 support for heteregenous systems (CPU + GPU). ACML fixes unusable performance issues under older Opteron (non AVX) systems. Several Bug fixes of IO. Added incomplete functionalities.

Modified: pkg/OmicABELnoMM/configure.ac
===================================================================
--- pkg/OmicABELnoMM/configure.ac	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/configure.ac	2014-07-25 10:48:03 UTC (rev 1768)
@@ -3,8 +3,9 @@
 
 AC_PREREQ([2.67])
 AC_INIT([OmicABELnoMM], [0.1.0], [genabel-devel at r-forge.wu-wien.ac.at])
-AM_INIT_AUTOMAKE([silent-rules subdir-objects])
-AM_SILENT_RULES([yes])
+AM_INIT_AUTOMAKE([])
+#AM_INIT_AUTOMAKE([silent-rules subdir-objects])
+#AM_SILENT_RULES([yes])
 AC_CONFIG_SRCDIR([src/Utility.h])
 AC_CONFIG_HEADERS([src/config.h])
 
@@ -17,12 +18,12 @@
 # Set some default compile flags
 if test -z "$CXXFLAGS"; then
    # User did not set CXXFLAGS, so we can put in our own defaults
-    CXXFLAGS="-O3"
+    CXXFLAGS=""
 fi
 if test -z "$CPPFLAGS"; then
    # User did not set CPPFLAGS, so we can put in our own defaults
-    CPPFLAGS="-Wall -g -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
-    #CPPFLAGS="-Wall"
+    #CPPFLAGS="-Wall -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
+    CPPFLAGS="-O0"
 fi
 # If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
 # with its own defaults
@@ -31,33 +32,49 @@
 AC_PROG_CC
 AC_PROG_CXX
 
+# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
+AC_OPENMP
+AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
 
+AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
+
 # Checks for libraries.
 # pthread library
 AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
-   AC_MSG_ERROR([Unable to find the pthread_mutex_init() function])
+   AC_MSG_ERROR([Make sure pthread is available on the system])
 ])
-# Openblas
-AC_SEARCH_LIBS([cblas_sgemm], [openblas], [], [
-   AC_MSG_ERROR([Unable to find the openblas library])
-])
-# Lapack
-AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
-   AC_MSG_ERROR([Unable to find a Lapack library])
-])
 
-#Boost
+if test -z "$LDFLAGS"; then
+	LDFLAGS="-L./libs/lib/ -L../libs/lib/"
+fi
 
+found_blas=0
 
+AC_SEARCH_LIBS([__iso_c_binding_c_f_pointer_l4],[gfortran])
 
-# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
-AC_OPENMP
-AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
+# ACML
+AC_SEARCH_LIBS(dgemm, acml_mp,[found_blas=1 AM_CXXFLAGS="-D_acml_ $AM_CXXFLAGS"], [
+   AC_MSG_NOTICE([NOT using AMD the ACML  library],[-lgfortran])
+])
 
 
 
+if test "$found_blas" -eq 0
+	then
 
+# Openblas
+AC_SEARCH_LIBS([cblas_sgemm], [openblas], [AM_CXXFLAGS="$AM_CXXFLAGS -D_openblas_"], [
+   AC_MSG_ERROR([OpenBLAS  library NOT found])
+])
+ #Lapack
+AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
+   AC_MSG_ERROR([Unable to find a Lapack library])
+])
+AC_MSG_NOTICE([Using OpenBLAS  library])
+fi
 
+
+
 # Checks for header files.
 AC_CHECK_HEADERS([limits.h stdlib.h string.h sys/time.h unistd.h])
 
@@ -73,6 +90,7 @@
 # Files to be generated by autotools
 AC_CONFIG_FILES([
         Makefile
+
 ])
 
 AC_OUTPUT

Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
===================================================================
--- pkg/OmicABELnoMM/doc/howtocompile.txt	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/doc/howtocompile.txt	2014-07-25 10:48:03 UTC (rev 1768)
@@ -1,4 +1,7 @@
-
+
+-------------------------Install BLAS----------------------------
+#STEP 1:
+#a) Install OPENBLAS:
 mkdir GWAS_PROJECT
 cd GWAS_PROJECT
 
@@ -10,21 +13,52 @@
 #make sure g++ its 4.8 or above!on rwth cluster module load gcc/4.8, choose 32 or 64 also
 make all HOSTCC=g++ FC=gfortran USE_OPENMP=1
 
-sudo make install PREFIX="/usr"
+make install PREFIX="path_to_/OmicABELnoMM/libs/"
 
-sudo ldconfig
+ldconfig
 
-cd ..
-#BLAS END
 
+cd ..
+
+-------------------------AMD ACML Alternative Version-------------------------
+#STEP 1:
+# b)
+mkdir OmicABELnoMM/libs/
+
+cd OmicABELnoMM/libs/
+
+You can use AMD ACML. Download from:
+http://developer.amd.com/tools-and-sdks/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
+
+ and copy the supplied binary 
+ libraries to "OmicABELnoMM/libs/"
+
+IF both libraries are present (Openblas + ACML), the system will use ACML.
+
+cd ../../
+
+---------------------------------#BLAS END----------------------------------------------------
+
+#STEP 2:
+
+
+ON the folder GWAS_PROJECT
+
 svn checkout svn+ssh://developername@svn.r-forge.r-project.org/svnroot/genabel/OmicABELnoMM
 
 cd OmicABELnoMM
+
+autoreconf -fi
 
-./configure LDFLAGS="-L/usr/lib/"
+./configure
 
-make
+make
 
+make check
+
+#DONE
+
+------------------------------Example--------------------------------
 #test it
 ./omicabelnomm -c examples/XL --geno examples/XR -p examples/Y -o examples/B -n 2 -t 2
 
@@ -42,12 +76,19 @@
 
 
 
--------------------------Alternative Version-------------------------
+-------------------------Alternative Version of BLAS-------------------------
 
-Install all required libraries for your system: 
+#Make sure autoconf is installed:
 
--Iinux ubuntu:
+sudo apt-get install autoconf
+autoreconf -fi
+autoconf
 
+#Install all required libraries for your system, 
+#but they will not work, 99% of the time, due to not having openmp support: 
+
+#Iinux ubuntu:
+
 sudo apt-get install libopenblas-dev
 sudo apt-get install libopenblas-base
 sudo apt-get install liblapack3gf
@@ -56,11 +97,7 @@
 sudo apt-get install liblapacke
 sudo apt-get install liblapacke-dev
 
--Make sure autoconf is installed:
 
-sudo apt-get install autoconf
-autoreconf -fi
-autoconf
 
 
 

Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/AIOwrapper.cpp	2014-07-25 10:48:03 UTC (rev 1768)
@@ -40,7 +40,7 @@
         Fhandler->fnameAL = params.fnameAL;
         Fhandler->fnameAR = params.fnameAR;
         Fhandler->fnameY = params.fnameY;
-        Fhandler->fnameOutB = params.fnameOutB;
+        Fhandler->fnameOutFiles = params.fnameOutFiles;
 
 
         Yfvi  = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
@@ -91,7 +91,7 @@
 
     prepare_AL(params.l,params.n);
     prepare_AR(  params.mb,  params.n,  params.m,  params.r);
-    prepare_B(params.tb, params.l+params.r);
+    prepare_OutFiles(params.mb, params.l+params.r);
     prepare_Y(params.tb, params.n, params.t);
 
 
@@ -118,7 +118,7 @@
     finalize_Y();
     finalize_AR();
     finalize_AL();
-    finalize_B();
+    finalize_OutFiles();
 
     pthread_attr_destroy(&(Fhandler->attr));
 
@@ -135,7 +135,7 @@
 
 
 
-void AIOwrapper::finalize_B()
+void AIOwrapper::finalize_OutFiles()
 {
 
 }
@@ -149,7 +149,10 @@
 
     struct timespec timeToWait;
     FILE*  fp_Y;
-    FILE*  fp_B;
+    FILE*  fp_B;
+    FILE*  fp_R;
+    FILE*  fp_SD2;
+    FILE*  fp_P;
     FILE*  fp_Ar;
     if(!Fhandler->fakefiles)
     {
@@ -167,11 +170,29 @@
             exit(1);
         }
 
-        fp_B = fopen((Fhandler->fnameOutB+".fvd").c_str(), "w+b");
+        fp_B = fopen((Fhandler->fnameOutFiles+"_B.fvd").c_str(), "w+b");
         if(fp_B == 0)
         {
-            cout << "Error Opening File B " << Fhandler->fnameOutB << endl;
+            cout << "Error Opening File B " << Fhandler->fnameOutFiles << "_B" << endl;
             exit(1);
+        }
+        fp_R = fopen((Fhandler->fnameOutFiles+"_R.fvd").c_str(), "w+b");
+        if(fp_R == 0)
+        {
+            cout << "Error Opening File R " << Fhandler->fnameOutFiles << "_R" << endl;
+            exit(1);
+        }
+        fp_SD2 = fopen((Fhandler->fnameOutFiles+"_SD2.fvd").c_str(), "w+b");
+        if(fp_SD2 == 0)
+        {
+            cout << "Error Opening File SD2 " << Fhandler->fnameOutFiles << "_SD2" << endl;
+            exit(1);
+        }
+        fp_P = fopen((Fhandler->fnameOutFiles+"_P.fvd").c_str(), "w+b");
+        if(fp_P == 0)
+        {
+            cout << "Error Opening File P " << Fhandler->fnameOutFiles << "_P" << endl;
+            exit(1);
         }
     }
     else
@@ -205,6 +226,24 @@
         {
             cout << "Error setting up temp File B " << endl;
             exit(1);
+        }
+        fp_R = fopen("tempR.bin", "w+b");
+        if(fp_R == 0)
+        {
+            cout << "Error setting up temp File R " << endl;
+            exit(1);
+        }
+        fp_SD2 = fopen("tempSD2.bin", "w+b");
+        if(fp_SD2 == 0)
+        {
+            cout << "Error setting up temp File SD2 " << endl;
+            exit(1);
+        }
+        fp_P = fopen("tempP.bin", "w+b");
+        if(fp_P == 0)
+        {
+            cout << "Error setting up temp File P " << endl;
+            exit(1);
         }
         //cout << "\nEnd preping files\n" << flush;
 
@@ -229,14 +268,16 @@
 
             Fhandler->y_to_readSize -= tmp_y_blockSize;
             size_buff = Fhandler->n * tmp_y_blockSize;
-            //cout << Fhandler->y_to_readSize << endl;
+
 
+
             pthread_mutex_lock(&(Fhandler->m_buff_upd));
-            //cout << " pre;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
+
             type_buffElement* tobeFilled = Fhandler->empty_buffers.front();
             Fhandler->empty_buffers.pop();
-            //pthread_mutex_unlock(&(Fhandler->m_buff_upd));
 
+
             tobeFilled->size = tmp_y_blockSize;
 
             if(Fhandler->fakefiles)
@@ -273,8 +314,8 @@
 
                     }
                 }
-//                size_t result = fread (tobeFilled->buff,sizeof(type_precision),size_buff,fp_Y);
-//                result++;
+
+
                 if(Fhandler->y_to_readSize <= 0)
                 {
                     fseek ( fp_Y , 0 , SEEK_SET );
@@ -282,10 +323,9 @@
             }
 
 
-            //pthread_mutex_lock(&(Fhandler->m_buff_upd));
+
             Fhandler->full_buffers.push(tobeFilled);
-            //  cout << "\nStoring " << tobeFilled << endl;
-            //cout << " post;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
             pthread_mutex_unlock(&(Fhandler->m_buff_upd));
 
             pthread_mutex_lock(&(Fhandler->m_read));
@@ -294,8 +334,12 @@
 
         }
 
-        while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize)
-        {
+        while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize )
+        {
+
+
+
+
             tmp_ar_blockSize = Fhandler->Ar_blockSize;
             if(Fhandler->Ar_to_readSize < Fhandler->Ar_blockSize)
                 tmp_ar_blockSize = Fhandler->Ar_to_readSize;
@@ -343,12 +387,13 @@
                     }
                 }
 
-//                size_t result = fread(tobeFilled->buff,sizeof(type_precision),size_buff,fp_Ar);
-//                result++;
-                if (Fhandler->Ar_to_readSize <= 0)
-                {
-                    fseek ( fp_Ar , 0 , SEEK_SET );
-                }
+
+            }
+
+            if(Fhandler->Ar_to_readSize <= 0)
+            {
+                Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+                fseek ( fp_Ar , 0 , SEEK_SET );
             }
 
             Fhandler->ar_full_buffers.push(tobeFilled);
@@ -362,23 +407,29 @@
         }
         //B write
 
-        while(!Fhandler->b_full_buffers.empty())
+        while(!Fhandler->write_full_buffers.empty())
         {
 
 
             pthread_mutex_lock(&(Fhandler->m_buff_upd));
-            type_buffElement* tobeWritten = Fhandler->b_full_buffers.front();
-            Fhandler->b_full_buffers.pop();
+            type_buffElement* tobeWritten = Fhandler->write_full_buffers.front();
+            Fhandler->write_full_buffers.pop();
             int size = Fhandler->p*Fhandler->b_blockSize;
 
             if(Fhandler->fakefiles)
             {
-                fseek ( fp_B , 0 , SEEK_SET );
+                fseek ( fp_B , 0 , SEEK_SET );
+                fseek ( fp_R , 0 , SEEK_SET );
+                fseek ( fp_SD2 , 0 , SEEK_SET );
+                fseek ( fp_P , 0 , SEEK_SET );
             }
-            fwrite (tobeWritten->buff,sizeof(type_precision),size,fp_B);
+            fwrite (&(tobeWritten->buff[0]),sizeof(type_precision),size,fp_B);
+            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*Fhandler->p]),sizeof(type_precision),Fhandler->b_blockSize,fp_R);
+            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]),sizeof(type_precision),Fhandler->b_blockSize,fp_SD2);
+            fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]),sizeof(type_precision),size,fp_P);
 
 
-            Fhandler->b_empty_buffers.push(tobeWritten);
+            Fhandler->write_empty_buffers.push(tobeWritten);
             //  cout << "\nStoring " << tobeWritten << endl;
             pthread_mutex_unlock(&(Fhandler->m_buff_upd));
 
@@ -411,12 +462,31 @@
         pthread_cond_signal( &(Fhandler->condition_read ));
         pthread_mutex_unlock(&(Fhandler->m_read));
 
-        if(Fhandler->reset_wait)
-        {
-            pthread_barrier_wait(&(Fhandler->finalize_barrier));
-            //wait for main thread to reset everything
-            pthread_barrier_wait(&(Fhandler->finalize_barrier));
-        }
+//        if(Fhandler->reset_wait)
+//        {
+//            pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//            //wait for main thread to reset everything
+//
+//            pthread_mutex_lock(&(Fhandler->m_buff_upd));
+//            Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+//
+//            if(Fhandler->Ar_currentReadBuff)
+//            {
+//                Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+//                Fhandler->Ar_currentReadBuff=0;
+//            }
+//            while(!Fhandler->ar_full_buffers.empty())
+//            {
+//                Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
+//                Fhandler->ar_full_buffers.pop();
+//            }
+//            pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+//
+//            Fhandler->reset_wait = false;
+//
+//
+//            pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//        }
 
 
     }
@@ -425,7 +495,14 @@
     pthread_barrier_wait(&(Fhandler->finalize_barrier));
 
     {
-    type_buffElement* tmp;
+    type_buffElement* tmp;
+
+    if(Fhandler->currentReadBuff)
+    {
+        Fhandler->full_buffers.push(Fhandler->currentReadBuff);
+        Fhandler->currentReadBuff=0;
+    }
+
     while(!Fhandler->full_buffers.empty())
     {
        tmp= Fhandler->full_buffers.front();
@@ -438,8 +515,15 @@
     {
        tmp= Fhandler->empty_buffers.front();
        Fhandler->empty_buffers.pop();
-       delete []tmp->buff;
-       delete tmp;
+        delete []tmp->buff;
+        delete tmp;
+
+    }
+
+    if(Fhandler->Ar_currentReadBuff)
+    {
+        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+        Fhandler->Ar_currentReadBuff=0;
     }
 
     while(!Fhandler->ar_full_buffers.empty())
@@ -458,18 +542,18 @@
        delete tmp;
     }
 
-    while(!Fhandler->b_full_buffers.empty())
+    while(!Fhandler->write_full_buffers.empty())
     {
-       tmp= Fhandler->b_full_buffers.front();
-       Fhandler->b_full_buffers.pop();
+       tmp= Fhandler->write_full_buffers.front();
+       Fhandler->write_full_buffers.pop();
        delete []tmp->buff;
        delete tmp;
     }
 
-    while(!Fhandler->b_empty_buffers.empty())
+    while(!Fhandler->write_empty_buffers.empty())
     {
-       tmp= Fhandler->b_empty_buffers.front();
-       Fhandler->b_empty_buffers.pop();
+       tmp= Fhandler->write_empty_buffers.front();
+       Fhandler->write_empty_buffers.pop();
        delete []tmp->buff;
        delete tmp;
     }
@@ -482,6 +566,9 @@
         fclose(fp_Y);
         fclose(fp_Ar);
         fclose(fp_B);
+        fclose(fp_R);
+        fclose(fp_SD2);
+        fclose(fp_P);
 
         //cout << "\nexited io\n";
 
@@ -514,15 +601,16 @@
 
 
     //!read new rdy buffer
-    pthread_mutex_lock(&(Fhandler->m_buff_upd));
-        if(Fhandler->Ar_currentReadBuff)
-        {
-            Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
-        }
+    pthread_mutex_lock(&(Fhandler->m_buff_upd));
 
-        Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
-        Fhandler->ar_full_buffers.pop();
+    if(Fhandler->Ar_currentReadBuff)
+    {
+        Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
+    }
 
+    Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
+    Fhandler->ar_full_buffers.pop();
+
     //cout << "\nReading " << Fhandler->Ar_currentReadBuff << endl;
     Fhandler->Ar = Fhandler->Ar_currentReadBuff->buff;
     Ar_blockSize = Fhandler->Ar_currentReadBuff->size;
@@ -580,24 +668,23 @@
 
     //!read new rdy buffer
     pthread_mutex_lock(&(Fhandler->m_buff_upd));
-    //cout << " pre," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
 
+
         if(Fhandler->currentReadBuff)
         {
-            //memset(Fhandler->currentReadBuff->buff,0,y_blockSize);
             Fhandler->empty_buffers.push(Fhandler->currentReadBuff);
         }
         Fhandler->currentReadBuff = Fhandler->full_buffers.front();
         Fhandler->full_buffers.pop();
 
-    //cout << "\nReading " << Fhandler->currentReadBuff << endl;
+
     Fhandler->Yb = Fhandler->currentReadBuff->buff;
     y_blockSize = Fhandler->currentReadBuff->size;
 
     (*Y) = Fhandler->Yb;
 
-     //cout << " post," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
 
+
     pthread_mutex_unlock(&(Fhandler->m_buff_upd));
 
 
@@ -611,50 +698,7 @@
 
 }
 
-void AIOwrapper::write_B(type_precision* B, int p, int blockSize)
-{
-
-    while(Fhandler->b_empty_buffers.empty())
-    {
-        pthread_mutex_lock(&(Fhandler->m_more));
-        pthread_cond_signal( &(Fhandler->condition_more ));
-        pthread_mutex_unlock(&(Fhandler->m_more));
-
-        io_overhead = "b";
-
-        pthread_mutex_lock(&(Fhandler->m_read));
-        pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
-        pthread_mutex_unlock(&(Fhandler->m_read));
-
-    }
-
-
-    pthread_mutex_lock(&(Fhandler->m_buff_upd));
-
-
-
-        //cout << Fhandler->b_empty_buffers.size() << flush;
-        Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
-        Fhandler->b_empty_buffers.pop();
-
-
-
-    Fhandler->B = Fhandler->currentWriteBuff->buff;
-    Fhandler->b_blockSize = blockSize;
-    copy_vec(B,Fhandler->B,p*blockSize);
-
-    Fhandler->b_full_buffers.push(Fhandler->currentWriteBuff);
-
-
-
-    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
-
-
-    pthread_mutex_lock(&(Fhandler->m_more));
-    pthread_cond_signal( &(Fhandler->condition_more ));
-    pthread_mutex_unlock(&(Fhandler->m_more));
-}
-
+
 void AIOwrapper::prepare_Y(int y_blockSize, int n, int totalY)
 {
     //for fake files
@@ -679,10 +723,6 @@
         tmp = new type_buffElement();
         tmp->buff = new type_precision[Fhandler->n*Fhandler->y_blockSize];
         tmp->size = y_blockSize;
-//        for( int i = 0; i < Fhandler->n*Fhandler->y_blockSize; i++)
-//        {
-//            (tmp->buff)[i] = 0;
-//        }
         Fhandler->empty_buffers.push(tmp);
         Fhandler->Yb = tmp->buff;
     }
@@ -701,52 +741,98 @@
 
 
 
-}
-
-void AIOwrapper::prepare_B(int b_blockSize, int p)
+}
+
+void AIOwrapper::getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P)
+{
+    B = &(Fhandler->currentWriteBuff->buff[0]);
+    R = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*Fhandler->p]);
+    SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]);
+    P = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]);
+}
+
+void AIOwrapper::write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P,  int blockSize)
 {
-    //for fake files
 
+    while(Fhandler->write_empty_buffers.empty())
+    {
+        pthread_mutex_lock(&(Fhandler->m_more));
+        pthread_cond_signal( &(Fhandler->condition_more ));
+        pthread_mutex_unlock(&(Fhandler->m_more));
 
-    Fhandler->b_blockSize = b_blockSize;
+        io_overhead = "W";
 
-    Fhandler->p=p;
+        pthread_mutex_lock(&(Fhandler->m_read));
+        pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
+        pthread_mutex_unlock(&(Fhandler->m_read));
+    }
 
 
-    int buff_count = 4;
+    pthread_mutex_lock(&(Fhandler->m_buff_upd));
 
-    Fhandler->currentWriteBuff = 0;
 
-    type_buffElement* tmp;
+    Fhandler->write_full_buffers.push(Fhandler->currentWriteBuff);
+    Fhandler->b_blockSize = blockSize;
+
+
+    Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
+    Fhandler->write_empty_buffers.pop();
+
+    B = &(Fhandler->currentWriteBuff->buff[0]);
+    R = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*Fhandler->p]);
+    SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+1)]);
+    P = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+2)]);
 
 
+    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+
+
+    pthread_mutex_lock(&(Fhandler->m_more));
+    pthread_cond_signal( &(Fhandler->condition_more ));
+    pthread_mutex_unlock(&(Fhandler->m_more));
+}
+
+
+
+
+
+void AIOwrapper::prepare_OutFiles(int max_b_blockSize, int p)
+{
+
+    Fhandler->max_b_blockSize = max_b_blockSize;
+    Fhandler->p=p;
+    int buff_count = 4;
+
+    type_buffElement* tmp;
+
+
     for(int i = 0; i< buff_count  ; i++)
     {
-
         tmp = new type_buffElement();
-        tmp->buff = new type_precision[Fhandler->p*Fhandler->b_blockSize];
-        tmp->size = b_blockSize;
-//        for( int i = 0; i < Fhandler->n*Fhandler->b_blockSize; i++)
-//        {
-//            (tmp->buff)[i] = 0;
-//        }
-        Fhandler->b_empty_buffers.push(tmp);
-
-//        Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
-//        Fhandler->b_empty_buffers.pop();
+        tmp->buff = new type_precision[Fhandler->max_b_blockSize*(2*Fhandler->p+2)];
+        tmp->size = max_b_blockSize;
+        Fhandler->write_empty_buffers.push(tmp);
+    }
+    Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
+    Fhandler->write_empty_buffers.pop();
 
 
-    }
-
 }
 
+
+ void AIOwrapper::write_significantValues(int Y, int X_R, float R, float SD2, float P)
+ {
+
+ }
+
+
 void AIOwrapper::reset_Y()
 {
     //void *status;
 
     Fhandler->seed = 1337;
 
-    //cout << "ry" << flush;
+    cout << "ry" << flush;
 
     Fhandler->reset_wait = true;
     pthread_barrier_wait(&(Fhandler->finalize_barrier));
@@ -789,38 +875,38 @@
 
     //cout << "ra" << flush;
 
-    Fhandler->reset_wait = true;
-    pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//    Fhandler->reset_wait = true;
+//    pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//
+////    pthread_mutex_lock(&(Fhandler->m_buff_upd));
+////    Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+////
+////    if(Fhandler->Ar_currentReadBuff)
+////    {
+////        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+////        Fhandler->Ar_currentReadBuff=0;
+////    }
+////
+////    while(!Fhandler->ar_full_buffers.empty())
+////    {
+////        Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
+//////        for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
+//////        {
+//////            ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
+//////        }
+////        Fhandler->ar_full_buffers.pop();
+////    }
+////    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+////
+////    Fhandler->reset_wait = false;
+//
+//    pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//
+//    pthread_mutex_lock(&(Fhandler->m_more));
+//    pthread_cond_signal( &(Fhandler->condition_more ));
+//    pthread_mutex_unlock(&(Fhandler->m_more));
 
-    pthread_mutex_lock(&(Fhandler->m_buff_upd));
-    Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
 
-    if(Fhandler->Ar_currentReadBuff)
-    {
-        Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
-        Fhandler->Ar_currentReadBuff=0;
-    }
-
-    while(!Fhandler->ar_full_buffers.empty())
-    {
-        Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
-//        for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
-//        {
-//            ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
-//        }
-        Fhandler->ar_full_buffers.pop();
-    }
-    pthread_mutex_unlock(&(Fhandler->m_buff_upd));
-
-    Fhandler->reset_wait = false;
-
-    pthread_barrier_wait(&(Fhandler->finalize_barrier));
-
-    pthread_mutex_lock(&(Fhandler->m_more));
-    pthread_cond_signal( &(Fhandler->condition_more ));
-    pthread_mutex_unlock(&(Fhandler->m_more));
-
-
 }
 
 void AIOwrapper::finalize_Y()
@@ -838,7 +924,7 @@
     Fhandler->Ar_Amount = totalR;
     Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
 
-    int buff_count = min(3,(totalR+ desired_blockSize - 1)/desired_blockSize);
+    int buff_count = 4;
 
     Fhandler->Ar_currentReadBuff = 0;
     type_buffElement* tmp;

Modified: pkg/OmicABELnoMM/src/AIOwrapper.h
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.h	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/AIOwrapper.h	2014-07-25 10:48:03 UTC (rev 1768)
@@ -25,7 +25,7 @@
     string fnameY;
 
 
-    string fnameOutB;
+    string fnameOutFiles;
 
 
     list< pair<int,int> >* excl_List;
@@ -46,8 +46,8 @@
     queue<type_buffElement*> empty_buffers;
     queue<type_buffElement*> full_buffers;
 
-    queue<type_buffElement*> b_empty_buffers;
-    queue<type_buffElement*> b_full_buffers;
+    queue<type_buffElement*> write_empty_buffers;
+    queue<type_buffElement*> write_full_buffers;
 
     queue<type_buffElement*> ar_empty_buffers;
     queue<type_buffElement*> ar_full_buffers;
@@ -67,7 +67,8 @@
     int y_blockSize;
     int y_to_readSize;
 
-    int b_blockSize;
+    int b_blockSize;
+    int max_b_blockSize;
 
     bool not_done;
     bool reset_wait;
@@ -139,7 +140,11 @@
         void reset_Y();
         void reset_AR();
 
-        void write_B(type_precision* B, int p, int blockSize);
+        void getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P);
+
+        void write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P,  int blockSize);
+
+        void write_significantValues(int Y, int X_R, float R, float SD2, float P);
 
         string io_overhead;
 
@@ -161,8 +166,8 @@
         void prepare_AL( int columns, int n);
         void finalize_AL();
 
-        void prepare_B(int b_blockSize, int p);
-        void finalize_B();
+        void prepare_OutFiles(int max_b_blockSize, int p);
+        void finalize_OutFiles();
 
 
         static void* async_io(void *ptr );

Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp	2014-07-25 10:48:03 UTC (rev 1768)
@@ -621,7 +621,7 @@
 
                 get_ticks(start_tick2);
 
-                AIOfile.write_B(B, p, a_block_size);
+                //AIOfile.write_B(B, p, a_block_size);
 
                 get_ticks(end_tick);
                 out.acc_storeb += ticks2sec(end_tick,start_tick2);

Modified: pkg/OmicABELnoMM/src/Definitions.h
===================================================================
--- pkg/OmicABELnoMM/src/Definitions.h	2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/Definitions.h	2014-07-25 10:48:03 UTC (rev 1768)
@@ -5,41 +5,110 @@
     #define LINUX
 #else
     #define WINDOWS
-#endif
+#endif
 
+#include <unistd.h>
+#include <limits.h>
+#include <queue>
+#include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>       /* time */
 #include <cstring>
 #include <math.h>
-#include <omp.h>
+#include <omp.h>
+#include <pthread.h>
 
-
-
+
 #ifdef WINDOWS
     #include <windows.h>
-    #include <cblas.h>
 #else
-    //#include "mpi.h"
-    //#define cpu_freq 3.0
-    #define cpu_freq 3.2
-    #include "cblas.h"
+
+#endif
+
+//!For intel use propetary MKL, it will be preferred over others
+#ifdef __INTEL_MKL__
+    #pragma message("MKL will Probably NOT compile")
+    #include "mkl.h"
+    #include "cblas.h"
+    #include <lapacke.h>
+    #define blas_set_num_threads(n) mkl_set_num_threads(n)
+    #define STORAGE_TYPE LAPACK_COL_MAJOR
+#else
+
+    //!For AMD systems use the proper ACML library, preferred over openblas ON AMD
+    #ifdef _acml_
+        #pragma message("Compiled with AMD ACML")
+        #define blas_set_num_threads(n) omp_set_num_threads(n)
+
+        #include <acml.h>
+
+        #define lapack_int int
+
+        #define CblasTrans 'T'
+        #define CblasNoTrans 'N'
+        #define CblasUpper 'U'
+        #define CblasColMajor 1
+
+
+        #define STORAGE_TYPE CblasColMajor
+
+
+        #define cblas_snrm2 snrm2
+        #define cblas_saxpy saxpy
+
+        #ifndef BLASdefs_H_INCLUDED
+        #define BLASdefs_H_INCLUDED
+
+        inline  void cblas_sgemm(int storage, char transa, char transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
+        {
+           sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+        }
+
+        inline  lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n,  lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
+        {
+            int info;
+            sposv( uplo, n,  nrhs, a, lda, b,  ldb, &info);
+            return info;
+        }
+
+        inline  void cblas_ssyrk(int Order, char uplo, char Trans,
+		 int N, int K, float alpha, float *A, int lda,  float beta, float *C, int ldc)
+        {
+            ssyrk(uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
+        }
+
+        inline  lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a,  lapack_int lda, float* b, lapack_int ldb )
+        {
+            int info;
+            sgels(trans, m, n, nrhs, a, lda, b, ldb,&info);
+            return info;
+        }
+
+        #endif
+
+
+
+
+
+    #else
+
+        //!IF MKL is not present on INTEL, use openblas
+        #ifdef _openblas_
+            #pragma message("Compiled with OPENBLAS")
+            #define STORAGE_TYPE LAPACK_COL_MAJOR
+            #include "cblas.h"
+            #include <lapacke.h>
+            extern "C" void openblas_set_num_threads(int num_threads);
+            #define blas_set_num_threads(n) openblas_set_num_threads(n)
+        #endif
+
+    #endif
+
 #endif
-#ifdef __INTEL_MKL__
-    #include "mkl.h"
-    #define blas_set_num_threads(n) mkl_set_num_threads(n)
-#else
-    extern "C" void openblas_set_num_threads(int num_threads);
-    #define blas_set_num_threads(n) openblas_set_num_threads(n)
-#endif
 
-#include <unistd.h>
-#include <pthread.h>
-#include <limits.h>
-#include <queue>
-#include <iostream>
-#include <lapacke.h>
 
+
 //!SETTINGS
 
 #define EXTENDEDTEST 0
@@ -47,7 +116,7 @@
 
 #define OUTPUT 0
 
-#define STORAGE_TYPE LAPACK_COL_MAJOR
+
 #define type_precision float
 
 #define MIN(a,b) (((a)<(b))?(a):(b))
@@ -56,86 +125,8 @@
 #define _10MB 10*_1MB
 #define _1GB 1024*1024*1024
 
-//!for CPU speed!
 
-//#ifdef WIN32
-//#define WIN32_LEAN_AND_MEAN
-//#include <windows.h>
-//typedef unsigned __int64 usCount;
-//static usCount GetUsCount()
-//{
-//    static LARGE_INTEGER ticksPerSec;
-//    static double scalefactor;
-//    LARGE_INTEGER val;
-//    if (!scalefactor)
-//    {
-//        if (QueryPerformanceFrequency(&ticksPerSec))
-//            scalefactor=ticksPerSec.QuadPart/1000000000000.0;
-//        else
-//            scalefactor=1;
-//    }
-//    if (!QueryPerformanceCounter(&val))
-//        return (usCount) GetTickCount() * 1000000000;
-//    return (usCount) (val.QuadPart/scalefactor);
-//}
-//#else
-//#include <sys/time.h>
-//#include <time.h>
-//#include <sched.h>
-//typedef unsigned long long usCount;
-//static usCount GetUsCount()
-//{
-//#ifdef CLOCK_MONOTONIC
-//    struct timespec ts;
-//    clock_gettime(CLOCK_MONOTONIC, &ts);
-//    return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
-//#else
-//    struct timeval tv;
-//    gettimeofday(&tv, 0);
-//    return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
-//#endif
-//}
-//#endif
-//static usCount usCountOverhead;
-//#ifdef __GNUC__
-//#include "x86intrin.h"
-//#define __rdtsc() __builtin_ia32_rdtsc()
-//#endif
 
-//static usCount GetClockSpeed()
-//{
-//    int n;
-//    usCount start, end, start_tsc, end_tsc;
-//    if (!usCountOverhead)
-//    {
-//        usCount foo = 0;
-//        start=GetUsCount();
-//        for (n = 0; n < 1000000; n++)
-//        {
-//            foo += GetUsCount();
-//        }
-//        end = GetUsCount();
-//        usCountOverhead = (end - start)/n;
-//    }
-//
-//    start = GetUsCount();
-//    start_tsc = __rdtsc();
-//    for (n = 0; n <1000; n++)
-//    {
-//#ifdef WIN32
-//        Sleep(0);
-//#else
-//        sched_yield();
-//#endif
-//    }
-//
-//    end_tsc = __rdtsc();
-//    end = GetUsCount();
-//    return(usCount)((1000000000000.0 * (end_tsc - start_tsc)) /
-//                    (end - start - usCountOverhead));
-//}
-
-
 using namespace std;
 
 
@@ -149,7 +140,9 @@
     int p;
     int tb;
     int mb;
-    int id;
+    int id;
+
+    float sig_threshold;
 
     int threads;
 
@@ -158,7 +151,7 @@
     string fnameAL;
     string fnameAR;
     string fnameY;
-    string fnameOutB;
+    string fnameOutFiles;
     string fname_excludelist;
 
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/genabel -r 1768


More information about the Genabel-commits mailing list