[Genabel-commits] r1768 - in pkg/OmicABELnoMM: . doc src tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 25 12:48:04 CEST 2014
Author: afrank
Date: 2014-07-25 12:48:03 +0200 (Fri, 25 Jul 2014)
New Revision: 1768
Modified:
pkg/OmicABELnoMM/configure.ac
pkg/OmicABELnoMM/doc/howtocompile.txt
pkg/OmicABELnoMM/src/AIOwrapper.cpp
pkg/OmicABELnoMM/src/AIOwrapper.h
pkg/OmicABELnoMM/src/Algorithm.cpp
pkg/OmicABELnoMM/src/Definitions.h
pkg/OmicABELnoMM/src/Utility.cpp
pkg/OmicABELnoMM/src/main.cpp
pkg/OmicABELnoMM/test-driver
pkg/OmicABELnoMM/tests/test.cpp
Log:
Added AMD ACML 6 support for heteregenous systems (CPU + GPU). ACML fixes unusable performance issues under older Opteron (non AVX) systems. Several Bug fixes of IO. Added incomplete functionalities.
Modified: pkg/OmicABELnoMM/configure.ac
===================================================================
--- pkg/OmicABELnoMM/configure.ac 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/configure.ac 2014-07-25 10:48:03 UTC (rev 1768)
@@ -3,8 +3,9 @@
AC_PREREQ([2.67])
AC_INIT([OmicABELnoMM], [0.1.0], [genabel-devel at r-forge.wu-wien.ac.at])
-AM_INIT_AUTOMAKE([silent-rules subdir-objects])
-AM_SILENT_RULES([yes])
+AM_INIT_AUTOMAKE([])
+#AM_INIT_AUTOMAKE([silent-rules subdir-objects])
+#AM_SILENT_RULES([yes])
AC_CONFIG_SRCDIR([src/Utility.h])
AC_CONFIG_HEADERS([src/config.h])
@@ -17,12 +18,12 @@
# Set some default compile flags
if test -z "$CXXFLAGS"; then
# User did not set CXXFLAGS, so we can put in our own defaults
- CXXFLAGS="-O3"
+ CXXFLAGS=""
fi
if test -z "$CPPFLAGS"; then
# User did not set CPPFLAGS, so we can put in our own defaults
- CPPFLAGS="-Wall -g -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
- #CPPFLAGS="-Wall"
+ #CPPFLAGS="-Wall -pedantic -Wunused-result -Wmaybe-uninitialized -Wformat"
+ CPPFLAGS="-O0"
fi
# If CXXFLAGS/CPPFLAGS are already set AC_PROG_CXX will not overwrite them
# with its own defaults
@@ -31,33 +32,49 @@
AC_PROG_CC
AC_PROG_CXX
+# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
+AC_OPENMP
+AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
+AM_CXXFLAGS="-static -O3 -I../libs/include/ -I./libs/include/ $AM_CXXFLAGS"
+
# Checks for libraries.
# pthread library
AC_SEARCH_LIBS([pthread_mutex_init], [pthread], [], [
- AC_MSG_ERROR([Unable to find the pthread_mutex_init() function])
+ AC_MSG_ERROR([Make sure pthread is available on the system])
])
-# Openblas
-AC_SEARCH_LIBS([cblas_sgemm], [openblas], [], [
- AC_MSG_ERROR([Unable to find the openblas library])
-])
-# Lapack
-AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
- AC_MSG_ERROR([Unable to find a Lapack library])
-])
-#Boost
+if test -z "$LDFLAGS"; then
+ LDFLAGS="-L./libs/lib/ -L../libs/lib/"
+fi
+found_blas=0
+AC_SEARCH_LIBS([__iso_c_binding_c_f_pointer_l4],[gfortran])
-# Check for openMP. If found the OPENMP_CXXFLAGS is set automatically
-AC_OPENMP
-AC_SUBST(AM_CXXFLAGS, "$OPENMP_CFLAGS")
+# ACML
+AC_SEARCH_LIBS(dgemm, acml_mp,[found_blas=1 AM_CXXFLAGS="-D_acml_ $AM_CXXFLAGS"], [
+ AC_MSG_NOTICE([NOT using AMD the ACML library],[-lgfortran])
+])
+if test "$found_blas" -eq 0
+ then
+# Openblas
+AC_SEARCH_LIBS([cblas_sgemm], [openblas], [AM_CXXFLAGS="$AM_CXXFLAGS -D_openblas_"], [
+ AC_MSG_ERROR([OpenBLAS library NOT found])
+])
+ #Lapack
+AC_SEARCH_LIBS([LAPACKE_sgeqrf], [lapack, lapacke], [], [
+ AC_MSG_ERROR([Unable to find a Lapack library])
+])
+AC_MSG_NOTICE([Using OpenBLAS library])
+fi
+
+
# Checks for header files.
AC_CHECK_HEADERS([limits.h stdlib.h string.h sys/time.h unistd.h])
@@ -73,6 +90,7 @@
# Files to be generated by autotools
AC_CONFIG_FILES([
Makefile
+
])
AC_OUTPUT
Modified: pkg/OmicABELnoMM/doc/howtocompile.txt
===================================================================
--- pkg/OmicABELnoMM/doc/howtocompile.txt 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/doc/howtocompile.txt 2014-07-25 10:48:03 UTC (rev 1768)
@@ -1,4 +1,7 @@
-
+
+-------------------------Install BLAS----------------------------
+#STEP 1:
+#a) Install OPENBLAS:
mkdir GWAS_PROJECT
cd GWAS_PROJECT
@@ -10,21 +13,52 @@
#make sure g++ its 4.8 or above!on rwth cluster module load gcc/4.8, choose 32 or 64 also
make all HOSTCC=g++ FC=gfortran USE_OPENMP=1
-sudo make install PREFIX="/usr"
+make install PREFIX="path_to_/OmicABELnoMM/libs/"
-sudo ldconfig
+ldconfig
-cd ..
-#BLAS END
+cd ..
+
+-------------------------AMD ACML Alternative Version-------------------------
+#STEP 1:
+# b)
+mkdir OmicABELnoMM/libs/
+
+cd OmicABELnoMM/libs/
+
+You can use AMD ACML. Download from:
+http://developer.amd.com/tools-and-sdks/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
+
+ and copy the supplied binary
+ libraries to "OmicABELnoMM/libs/"
+
+IF both libraries are present (Openblas + ACML), the system will use ACML.
+
+cd ../../
+
+---------------------------------#BLAS END----------------------------------------------------
+
+#STEP 2:
+
+
+ON the folder GWAS_PROJECT
+
svn checkout svn+ssh://developername@svn.r-forge.r-project.org/svnroot/genabel/OmicABELnoMM
cd OmicABELnoMM
+
+autoreconf -fi
-./configure LDFLAGS="-L/usr/lib/"
+./configure
-make
+make
+make check
+
+#DONE
+
+------------------------------Example--------------------------------
#test it
./omicabelnomm -c examples/XL --geno examples/XR -p examples/Y -o examples/B -n 2 -t 2
@@ -42,12 +76,19 @@
--------------------------Alternative Version-------------------------
+-------------------------Alternative Version of BLAS-------------------------
-Install all required libraries for your system:
+#Make sure autoconf is installed:
--Iinux ubuntu:
+sudo apt-get install autoconf
+autoreconf -fi
+autoconf
+#Install all required libraries for your system,
+#but they will not work, 99% of the time, due to not having openmp support:
+
+#Iinux ubuntu:
+
sudo apt-get install libopenblas-dev
sudo apt-get install libopenblas-base
sudo apt-get install liblapack3gf
@@ -56,11 +97,7 @@
sudo apt-get install liblapacke
sudo apt-get install liblapacke-dev
--Make sure autoconf is installed:
-sudo apt-get install autoconf
-autoreconf -fi
-autoconf
Modified: pkg/OmicABELnoMM/src/AIOwrapper.cpp
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/AIOwrapper.cpp 2014-07-25 10:48:03 UTC (rev 1768)
@@ -40,7 +40,7 @@
Fhandler->fnameAL = params.fnameAL;
Fhandler->fnameAR = params.fnameAR;
Fhandler->fnameY = params.fnameY;
- Fhandler->fnameOutB = params.fnameOutB;
+ Fhandler->fnameOutFiles = params.fnameOutFiles;
Yfvi = load_databel_fvi( (Fhandler->fnameY+".fvi").c_str() );
@@ -91,7 +91,7 @@
prepare_AL(params.l,params.n);
prepare_AR( params.mb, params.n, params.m, params.r);
- prepare_B(params.tb, params.l+params.r);
+ prepare_OutFiles(params.mb, params.l+params.r);
prepare_Y(params.tb, params.n, params.t);
@@ -118,7 +118,7 @@
finalize_Y();
finalize_AR();
finalize_AL();
- finalize_B();
+ finalize_OutFiles();
pthread_attr_destroy(&(Fhandler->attr));
@@ -135,7 +135,7 @@
-void AIOwrapper::finalize_B()
+void AIOwrapper::finalize_OutFiles()
{
}
@@ -149,7 +149,10 @@
struct timespec timeToWait;
FILE* fp_Y;
- FILE* fp_B;
+ FILE* fp_B;
+ FILE* fp_R;
+ FILE* fp_SD2;
+ FILE* fp_P;
FILE* fp_Ar;
if(!Fhandler->fakefiles)
{
@@ -167,11 +170,29 @@
exit(1);
}
- fp_B = fopen((Fhandler->fnameOutB+".fvd").c_str(), "w+b");
+ fp_B = fopen((Fhandler->fnameOutFiles+"_B.fvd").c_str(), "w+b");
if(fp_B == 0)
{
- cout << "Error Opening File B " << Fhandler->fnameOutB << endl;
+ cout << "Error Opening File B " << Fhandler->fnameOutFiles << "_B" << endl;
exit(1);
+ }
+ fp_R = fopen((Fhandler->fnameOutFiles+"_R.fvd").c_str(), "w+b");
+ if(fp_R == 0)
+ {
+ cout << "Error Opening File R " << Fhandler->fnameOutFiles << "_R" << endl;
+ exit(1);
+ }
+ fp_SD2 = fopen((Fhandler->fnameOutFiles+"_SD2.fvd").c_str(), "w+b");
+ if(fp_SD2 == 0)
+ {
+ cout << "Error Opening File SD2 " << Fhandler->fnameOutFiles << "_SD2" << endl;
+ exit(1);
+ }
+ fp_P = fopen((Fhandler->fnameOutFiles+"_P.fvd").c_str(), "w+b");
+ if(fp_P == 0)
+ {
+ cout << "Error Opening File P " << Fhandler->fnameOutFiles << "_P" << endl;
+ exit(1);
}
}
else
@@ -205,6 +226,24 @@
{
cout << "Error setting up temp File B " << endl;
exit(1);
+ }
+ fp_R = fopen("tempR.bin", "w+b");
+ if(fp_R == 0)
+ {
+ cout << "Error setting up temp File R " << endl;
+ exit(1);
+ }
+ fp_SD2 = fopen("tempSD2.bin", "w+b");
+ if(fp_SD2 == 0)
+ {
+ cout << "Error setting up temp File SD2 " << endl;
+ exit(1);
+ }
+ fp_P = fopen("tempP.bin", "w+b");
+ if(fp_P == 0)
+ {
+ cout << "Error setting up temp File P " << endl;
+ exit(1);
}
//cout << "\nEnd preping files\n" << flush;
@@ -229,14 +268,16 @@
Fhandler->y_to_readSize -= tmp_y_blockSize;
size_buff = Fhandler->n * tmp_y_blockSize;
- //cout << Fhandler->y_to_readSize << endl;
+
+
pthread_mutex_lock(&(Fhandler->m_buff_upd));
- //cout << " pre;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
+
type_buffElement* tobeFilled = Fhandler->empty_buffers.front();
Fhandler->empty_buffers.pop();
- //pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+
tobeFilled->size = tmp_y_blockSize;
if(Fhandler->fakefiles)
@@ -273,8 +314,8 @@
}
}
-// size_t result = fread (tobeFilled->buff,sizeof(type_precision),size_buff,fp_Y);
-// result++;
+
+
if(Fhandler->y_to_readSize <= 0)
{
fseek ( fp_Y , 0 , SEEK_SET );
@@ -282,10 +323,9 @@
}
- //pthread_mutex_lock(&(Fhandler->m_buff_upd));
+
Fhandler->full_buffers.push(tobeFilled);
- // cout << "\nStoring " << tobeFilled << endl;
- //cout << " post;" << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
pthread_mutex_unlock(&(Fhandler->m_buff_upd));
pthread_mutex_lock(&(Fhandler->m_read));
@@ -294,8 +334,12 @@
}
- while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize)
- {
+ while(!Fhandler->ar_empty_buffers.empty() && Fhandler->Ar_to_readSize )
+ {
+
+
+
+
tmp_ar_blockSize = Fhandler->Ar_blockSize;
if(Fhandler->Ar_to_readSize < Fhandler->Ar_blockSize)
tmp_ar_blockSize = Fhandler->Ar_to_readSize;
@@ -343,12 +387,13 @@
}
}
-// size_t result = fread(tobeFilled->buff,sizeof(type_precision),size_buff,fp_Ar);
-// result++;
- if (Fhandler->Ar_to_readSize <= 0)
- {
- fseek ( fp_Ar , 0 , SEEK_SET );
- }
+
+ }
+
+ if(Fhandler->Ar_to_readSize <= 0)
+ {
+ Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+ fseek ( fp_Ar , 0 , SEEK_SET );
}
Fhandler->ar_full_buffers.push(tobeFilled);
@@ -362,23 +407,29 @@
}
//B write
- while(!Fhandler->b_full_buffers.empty())
+ while(!Fhandler->write_full_buffers.empty())
{
pthread_mutex_lock(&(Fhandler->m_buff_upd));
- type_buffElement* tobeWritten = Fhandler->b_full_buffers.front();
- Fhandler->b_full_buffers.pop();
+ type_buffElement* tobeWritten = Fhandler->write_full_buffers.front();
+ Fhandler->write_full_buffers.pop();
int size = Fhandler->p*Fhandler->b_blockSize;
if(Fhandler->fakefiles)
{
- fseek ( fp_B , 0 , SEEK_SET );
+ fseek ( fp_B , 0 , SEEK_SET );
+ fseek ( fp_R , 0 , SEEK_SET );
+ fseek ( fp_SD2 , 0 , SEEK_SET );
+ fseek ( fp_P , 0 , SEEK_SET );
}
- fwrite (tobeWritten->buff,sizeof(type_precision),size,fp_B);
+ fwrite (&(tobeWritten->buff[0]),sizeof(type_precision),size,fp_B);
+ fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*Fhandler->p]),sizeof(type_precision),Fhandler->b_blockSize,fp_R);
+ fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]),sizeof(type_precision),Fhandler->b_blockSize,fp_SD2);
+ fwrite (&(tobeWritten->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]),sizeof(type_precision),size,fp_P);
- Fhandler->b_empty_buffers.push(tobeWritten);
+ Fhandler->write_empty_buffers.push(tobeWritten);
// cout << "\nStoring " << tobeWritten << endl;
pthread_mutex_unlock(&(Fhandler->m_buff_upd));
@@ -411,12 +462,31 @@
pthread_cond_signal( &(Fhandler->condition_read ));
pthread_mutex_unlock(&(Fhandler->m_read));
- if(Fhandler->reset_wait)
- {
- pthread_barrier_wait(&(Fhandler->finalize_barrier));
- //wait for main thread to reset everything
- pthread_barrier_wait(&(Fhandler->finalize_barrier));
- }
+// if(Fhandler->reset_wait)
+// {
+// pthread_barrier_wait(&(Fhandler->finalize_barrier));
+// //wait for main thread to reset everything
+//
+// pthread_mutex_lock(&(Fhandler->m_buff_upd));
+// Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+//
+// if(Fhandler->Ar_currentReadBuff)
+// {
+// Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+// Fhandler->Ar_currentReadBuff=0;
+// }
+// while(!Fhandler->ar_full_buffers.empty())
+// {
+// Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
+// Fhandler->ar_full_buffers.pop();
+// }
+// pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+//
+// Fhandler->reset_wait = false;
+//
+//
+// pthread_barrier_wait(&(Fhandler->finalize_barrier));
+// }
}
@@ -425,7 +495,14 @@
pthread_barrier_wait(&(Fhandler->finalize_barrier));
{
- type_buffElement* tmp;
+ type_buffElement* tmp;
+
+ if(Fhandler->currentReadBuff)
+ {
+ Fhandler->full_buffers.push(Fhandler->currentReadBuff);
+ Fhandler->currentReadBuff=0;
+ }
+
while(!Fhandler->full_buffers.empty())
{
tmp= Fhandler->full_buffers.front();
@@ -438,8 +515,15 @@
{
tmp= Fhandler->empty_buffers.front();
Fhandler->empty_buffers.pop();
- delete []tmp->buff;
- delete tmp;
+ delete []tmp->buff;
+ delete tmp;
+
+ }
+
+ if(Fhandler->Ar_currentReadBuff)
+ {
+ Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+ Fhandler->Ar_currentReadBuff=0;
}
while(!Fhandler->ar_full_buffers.empty())
@@ -458,18 +542,18 @@
delete tmp;
}
- while(!Fhandler->b_full_buffers.empty())
+ while(!Fhandler->write_full_buffers.empty())
{
- tmp= Fhandler->b_full_buffers.front();
- Fhandler->b_full_buffers.pop();
+ tmp= Fhandler->write_full_buffers.front();
+ Fhandler->write_full_buffers.pop();
delete []tmp->buff;
delete tmp;
}
- while(!Fhandler->b_empty_buffers.empty())
+ while(!Fhandler->write_empty_buffers.empty())
{
- tmp= Fhandler->b_empty_buffers.front();
- Fhandler->b_empty_buffers.pop();
+ tmp= Fhandler->write_empty_buffers.front();
+ Fhandler->write_empty_buffers.pop();
delete []tmp->buff;
delete tmp;
}
@@ -482,6 +566,9 @@
fclose(fp_Y);
fclose(fp_Ar);
fclose(fp_B);
+ fclose(fp_R);
+ fclose(fp_SD2);
+ fclose(fp_P);
//cout << "\nexited io\n";
@@ -514,15 +601,16 @@
//!read new rdy buffer
- pthread_mutex_lock(&(Fhandler->m_buff_upd));
- if(Fhandler->Ar_currentReadBuff)
- {
- Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
- }
+ pthread_mutex_lock(&(Fhandler->m_buff_upd));
- Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
- Fhandler->ar_full_buffers.pop();
+ if(Fhandler->Ar_currentReadBuff)
+ {
+ Fhandler->ar_empty_buffers.push(Fhandler->Ar_currentReadBuff);
+ }
+ Fhandler->Ar_currentReadBuff = Fhandler->ar_full_buffers.front();
+ Fhandler->ar_full_buffers.pop();
+
//cout << "\nReading " << Fhandler->Ar_currentReadBuff << endl;
Fhandler->Ar = Fhandler->Ar_currentReadBuff->buff;
Ar_blockSize = Fhandler->Ar_currentReadBuff->size;
@@ -580,24 +668,23 @@
//!read new rdy buffer
pthread_mutex_lock(&(Fhandler->m_buff_upd));
- //cout << " pre," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
if(Fhandler->currentReadBuff)
{
- //memset(Fhandler->currentReadBuff->buff,0,y_blockSize);
Fhandler->empty_buffers.push(Fhandler->currentReadBuff);
}
Fhandler->currentReadBuff = Fhandler->full_buffers.front();
Fhandler->full_buffers.pop();
- //cout << "\nReading " << Fhandler->currentReadBuff << endl;
+
Fhandler->Yb = Fhandler->currentReadBuff->buff;
y_blockSize = Fhandler->currentReadBuff->size;
(*Y) = Fhandler->Yb;
- //cout << " post," << Fhandler->full_buffers.size() << ";" << Fhandler->empty_buffers.size() << endl;
+
pthread_mutex_unlock(&(Fhandler->m_buff_upd));
@@ -611,50 +698,7 @@
}
-void AIOwrapper::write_B(type_precision* B, int p, int blockSize)
-{
-
- while(Fhandler->b_empty_buffers.empty())
- {
- pthread_mutex_lock(&(Fhandler->m_more));
- pthread_cond_signal( &(Fhandler->condition_more ));
- pthread_mutex_unlock(&(Fhandler->m_more));
-
- io_overhead = "b";
-
- pthread_mutex_lock(&(Fhandler->m_read));
- pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
- pthread_mutex_unlock(&(Fhandler->m_read));
-
- }
-
-
- pthread_mutex_lock(&(Fhandler->m_buff_upd));
-
-
-
- //cout << Fhandler->b_empty_buffers.size() << flush;
- Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
- Fhandler->b_empty_buffers.pop();
-
-
-
- Fhandler->B = Fhandler->currentWriteBuff->buff;
- Fhandler->b_blockSize = blockSize;
- copy_vec(B,Fhandler->B,p*blockSize);
-
- Fhandler->b_full_buffers.push(Fhandler->currentWriteBuff);
-
-
-
- pthread_mutex_unlock(&(Fhandler->m_buff_upd));
-
-
- pthread_mutex_lock(&(Fhandler->m_more));
- pthread_cond_signal( &(Fhandler->condition_more ));
- pthread_mutex_unlock(&(Fhandler->m_more));
-}
-
+
void AIOwrapper::prepare_Y(int y_blockSize, int n, int totalY)
{
//for fake files
@@ -679,10 +723,6 @@
tmp = new type_buffElement();
tmp->buff = new type_precision[Fhandler->n*Fhandler->y_blockSize];
tmp->size = y_blockSize;
-// for( int i = 0; i < Fhandler->n*Fhandler->y_blockSize; i++)
-// {
-// (tmp->buff)[i] = 0;
-// }
Fhandler->empty_buffers.push(tmp);
Fhandler->Yb = tmp->buff;
}
@@ -701,52 +741,98 @@
-}
-
-void AIOwrapper::prepare_B(int b_blockSize, int p)
+}
+
+void AIOwrapper::getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P)
+{
+ B = &(Fhandler->currentWriteBuff->buff[0]);
+ R = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*Fhandler->p]);
+ SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+1)]);
+ P = &(Fhandler->currentWriteBuff->buff[Fhandler->max_b_blockSize*(Fhandler->p+2)]);
+}
+
+void AIOwrapper::write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P, int blockSize)
{
- //for fake files
+ while(Fhandler->write_empty_buffers.empty())
+ {
+ pthread_mutex_lock(&(Fhandler->m_more));
+ pthread_cond_signal( &(Fhandler->condition_more ));
+ pthread_mutex_unlock(&(Fhandler->m_more));
- Fhandler->b_blockSize = b_blockSize;
+ io_overhead = "W";
- Fhandler->p=p;
+ pthread_mutex_lock(&(Fhandler->m_read));
+ pthread_cond_wait( &(Fhandler->condition_read), &(Fhandler->m_read ));
+ pthread_mutex_unlock(&(Fhandler->m_read));
+ }
- int buff_count = 4;
+ pthread_mutex_lock(&(Fhandler->m_buff_upd));
- Fhandler->currentWriteBuff = 0;
- type_buffElement* tmp;
+ Fhandler->write_full_buffers.push(Fhandler->currentWriteBuff);
+ Fhandler->b_blockSize = blockSize;
+
+
+ Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
+ Fhandler->write_empty_buffers.pop();
+
+ B = &(Fhandler->currentWriteBuff->buff[0]);
+ R = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*Fhandler->p]);
+ SD2 = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+1)]);
+ P = &(Fhandler->currentWriteBuff->buff[Fhandler->b_blockSize*(Fhandler->p+2)]);
+ pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+
+
+ pthread_mutex_lock(&(Fhandler->m_more));
+ pthread_cond_signal( &(Fhandler->condition_more ));
+ pthread_mutex_unlock(&(Fhandler->m_more));
+}
+
+
+
+
+
+void AIOwrapper::prepare_OutFiles(int max_b_blockSize, int p)
+{
+
+ Fhandler->max_b_blockSize = max_b_blockSize;
+ Fhandler->p=p;
+ int buff_count = 4;
+
+ type_buffElement* tmp;
+
+
for(int i = 0; i< buff_count ; i++)
{
-
tmp = new type_buffElement();
- tmp->buff = new type_precision[Fhandler->p*Fhandler->b_blockSize];
- tmp->size = b_blockSize;
-// for( int i = 0; i < Fhandler->n*Fhandler->b_blockSize; i++)
-// {
-// (tmp->buff)[i] = 0;
-// }
- Fhandler->b_empty_buffers.push(tmp);
-
-// Fhandler->currentWriteBuff = Fhandler->b_empty_buffers.front();
-// Fhandler->b_empty_buffers.pop();
+ tmp->buff = new type_precision[Fhandler->max_b_blockSize*(2*Fhandler->p+2)];
+ tmp->size = max_b_blockSize;
+ Fhandler->write_empty_buffers.push(tmp);
+ }
+ Fhandler->currentWriteBuff = Fhandler->write_empty_buffers.front();
+ Fhandler->write_empty_buffers.pop();
- }
-
}
+
+ void AIOwrapper::write_significantValues(int Y, int X_R, float R, float SD2, float P)
+ {
+
+ }
+
+
void AIOwrapper::reset_Y()
{
//void *status;
Fhandler->seed = 1337;
- //cout << "ry" << flush;
+ cout << "ry" << flush;
Fhandler->reset_wait = true;
pthread_barrier_wait(&(Fhandler->finalize_barrier));
@@ -789,38 +875,38 @@
//cout << "ra" << flush;
- Fhandler->reset_wait = true;
- pthread_barrier_wait(&(Fhandler->finalize_barrier));
+// Fhandler->reset_wait = true;
+// pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//
+//// pthread_mutex_lock(&(Fhandler->m_buff_upd));
+//// Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
+////
+//// if(Fhandler->Ar_currentReadBuff)
+//// {
+//// Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
+//// Fhandler->Ar_currentReadBuff=0;
+//// }
+////
+//// while(!Fhandler->ar_full_buffers.empty())
+//// {
+//// Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
+////// for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
+////// {
+////// ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
+////// }
+//// Fhandler->ar_full_buffers.pop();
+//// }
+//// pthread_mutex_unlock(&(Fhandler->m_buff_upd));
+////
+//// Fhandler->reset_wait = false;
+//
+// pthread_barrier_wait(&(Fhandler->finalize_barrier));
+//
+// pthread_mutex_lock(&(Fhandler->m_more));
+// pthread_cond_signal( &(Fhandler->condition_more ));
+// pthread_mutex_unlock(&(Fhandler->m_more));
- pthread_mutex_lock(&(Fhandler->m_buff_upd));
- Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
- if(Fhandler->Ar_currentReadBuff)
- {
- Fhandler->ar_full_buffers.push(Fhandler->Ar_currentReadBuff);
- Fhandler->Ar_currentReadBuff=0;
- }
-
- while(!Fhandler->ar_full_buffers.empty())
- {
- Fhandler->ar_empty_buffers.push(Fhandler->ar_full_buffers.front());
-// for( int i = 0; i < Fhandler->n*Fhandler->r*Fhandler->Ar_blockSize; i++)
-// {
-// ((Fhandler->ar_full_buffers.front())->buff)[i] = 0;
-// }
- Fhandler->ar_full_buffers.pop();
- }
- pthread_mutex_unlock(&(Fhandler->m_buff_upd));
-
- Fhandler->reset_wait = false;
-
- pthread_barrier_wait(&(Fhandler->finalize_barrier));
-
- pthread_mutex_lock(&(Fhandler->m_more));
- pthread_cond_signal( &(Fhandler->condition_more ));
- pthread_mutex_unlock(&(Fhandler->m_more));
-
-
}
void AIOwrapper::finalize_Y()
@@ -838,7 +924,7 @@
Fhandler->Ar_Amount = totalR;
Fhandler->Ar_to_readSize = Fhandler->Ar_Amount;
- int buff_count = min(3,(totalR+ desired_blockSize - 1)/desired_blockSize);
+ int buff_count = 4;
Fhandler->Ar_currentReadBuff = 0;
type_buffElement* tmp;
Modified: pkg/OmicABELnoMM/src/AIOwrapper.h
===================================================================
--- pkg/OmicABELnoMM/src/AIOwrapper.h 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/AIOwrapper.h 2014-07-25 10:48:03 UTC (rev 1768)
@@ -25,7 +25,7 @@
string fnameY;
- string fnameOutB;
+ string fnameOutFiles;
list< pair<int,int> >* excl_List;
@@ -46,8 +46,8 @@
queue<type_buffElement*> empty_buffers;
queue<type_buffElement*> full_buffers;
- queue<type_buffElement*> b_empty_buffers;
- queue<type_buffElement*> b_full_buffers;
+ queue<type_buffElement*> write_empty_buffers;
+ queue<type_buffElement*> write_full_buffers;
queue<type_buffElement*> ar_empty_buffers;
queue<type_buffElement*> ar_full_buffers;
@@ -67,7 +67,8 @@
int y_blockSize;
int y_to_readSize;
- int b_blockSize;
+ int b_blockSize;
+ int max_b_blockSize;
bool not_done;
bool reset_wait;
@@ -139,7 +140,11 @@
void reset_Y();
void reset_AR();
- void write_B(type_precision* B, int p, int blockSize);
+ void getCurrentWriteBuffers(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P);
+
+ void write_OutFiles(type_precision* &B,type_precision* &R,type_precision* &SD2,type_precision* &P, int blockSize);
+
+ void write_significantValues(int Y, int X_R, float R, float SD2, float P);
string io_overhead;
@@ -161,8 +166,8 @@
void prepare_AL( int columns, int n);
void finalize_AL();
- void prepare_B(int b_blockSize, int p);
- void finalize_B();
+ void prepare_OutFiles(int max_b_blockSize, int p);
+ void finalize_OutFiles();
static void* async_io(void *ptr );
Modified: pkg/OmicABELnoMM/src/Algorithm.cpp
===================================================================
--- pkg/OmicABELnoMM/src/Algorithm.cpp 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/Algorithm.cpp 2014-07-25 10:48:03 UTC (rev 1768)
@@ -621,7 +621,7 @@
get_ticks(start_tick2);
- AIOfile.write_B(B, p, a_block_size);
+ //AIOfile.write_B(B, p, a_block_size);
get_ticks(end_tick);
out.acc_storeb += ticks2sec(end_tick,start_tick2);
Modified: pkg/OmicABELnoMM/src/Definitions.h
===================================================================
--- pkg/OmicABELnoMM/src/Definitions.h 2014-07-23 13:50:12 UTC (rev 1767)
+++ pkg/OmicABELnoMM/src/Definitions.h 2014-07-25 10:48:03 UTC (rev 1768)
@@ -5,41 +5,110 @@
#define LINUX
#else
#define WINDOWS
-#endif
+#endif
+#include <unistd.h>
+#include <limits.h>
+#include <queue>
+#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h> /* time */
#include <cstring>
#include <math.h>
-#include <omp.h>
+#include <omp.h>
+#include <pthread.h>
-
-
+
#ifdef WINDOWS
#include <windows.h>
- #include <cblas.h>
#else
- //#include "mpi.h"
- //#define cpu_freq 3.0
- #define cpu_freq 3.2
- #include "cblas.h"
+
+#endif
+
+//!For intel use propetary MKL, it will be preferred over others
+#ifdef __INTEL_MKL__
+ #pragma message("MKL will Probably NOT compile")
+ #include "mkl.h"
+ #include "cblas.h"
+ #include <lapacke.h>
+ #define blas_set_num_threads(n) mkl_set_num_threads(n)
+ #define STORAGE_TYPE LAPACK_COL_MAJOR
+#else
+
+ //!For AMD systems use the proper ACML library, preferred over openblas ON AMD
+ #ifdef _acml_
+ #pragma message("Compiled with AMD ACML")
+ #define blas_set_num_threads(n) omp_set_num_threads(n)
+
+ #include <acml.h>
+
+ #define lapack_int int
+
+ #define CblasTrans 'T'
+ #define CblasNoTrans 'N'
+ #define CblasUpper 'U'
+ #define CblasColMajor 1
+
+
+ #define STORAGE_TYPE CblasColMajor
+
+
+ #define cblas_snrm2 snrm2
+ #define cblas_saxpy saxpy
+
+ #ifndef BLASdefs_H_INCLUDED
+ #define BLASdefs_H_INCLUDED
+
+ inline void cblas_sgemm(int storage, char transa, char transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
+ {
+ sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+ }
+
+ inline lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
+ {
+ int info;
+ sposv( uplo, n, nrhs, a, lda, b, ldb, &info);
+ return info;
+ }
+
+ inline void cblas_ssyrk(int Order, char uplo, char Trans,
+ int N, int K, float alpha, float *A, int lda, float beta, float *C, int ldc)
+ {
+ ssyrk(uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
+ }
+
+ inline lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb )
+ {
+ int info;
+ sgels(trans, m, n, nrhs, a, lda, b, ldb,&info);
+ return info;
+ }
+
+ #endif
+
+
+
+
+
+ #else
+
+ //!IF MKL is not present on INTEL, use openblas
+ #ifdef _openblas_
+ #pragma message("Compiled with OPENBLAS")
+ #define STORAGE_TYPE LAPACK_COL_MAJOR
+ #include "cblas.h"
+ #include <lapacke.h>
+ extern "C" void openblas_set_num_threads(int num_threads);
+ #define blas_set_num_threads(n) openblas_set_num_threads(n)
+ #endif
+
+ #endif
+
#endif
-#ifdef __INTEL_MKL__
- #include "mkl.h"
- #define blas_set_num_threads(n) mkl_set_num_threads(n)
-#else
- extern "C" void openblas_set_num_threads(int num_threads);
- #define blas_set_num_threads(n) openblas_set_num_threads(n)
-#endif
-#include <unistd.h>
-#include <pthread.h>
-#include <limits.h>
-#include <queue>
-#include <iostream>
-#include <lapacke.h>
+
//!SETTINGS
#define EXTENDEDTEST 0
@@ -47,7 +116,7 @@
#define OUTPUT 0
-#define STORAGE_TYPE LAPACK_COL_MAJOR
+
#define type_precision float
#define MIN(a,b) (((a)<(b))?(a):(b))
@@ -56,86 +125,8 @@
#define _10MB 10*_1MB
#define _1GB 1024*1024*1024
-//!for CPU speed!
-//#ifdef WIN32
-//#define WIN32_LEAN_AND_MEAN
-//#include <windows.h>
-//typedef unsigned __int64 usCount;
-//static usCount GetUsCount()
-//{
-// static LARGE_INTEGER ticksPerSec;
-// static double scalefactor;
-// LARGE_INTEGER val;
-// if (!scalefactor)
-// {
-// if (QueryPerformanceFrequency(&ticksPerSec))
-// scalefactor=ticksPerSec.QuadPart/1000000000000.0;
-// else
-// scalefactor=1;
-// }
-// if (!QueryPerformanceCounter(&val))
-// return (usCount) GetTickCount() * 1000000000;
-// return (usCount) (val.QuadPart/scalefactor);
-//}
-//#else
-//#include <sys/time.h>
-//#include <time.h>
-//#include <sched.h>
-//typedef unsigned long long usCount;
-//static usCount GetUsCount()
-//{
-//#ifdef CLOCK_MONOTONIC
-// struct timespec ts;
-// clock_gettime(CLOCK_MONOTONIC, &ts);
-// return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
-//#else
-// struct timeval tv;
-// gettimeofday(&tv, 0);
-// return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
-//#endif
-//}
-//#endif
-//static usCount usCountOverhead;
-//#ifdef __GNUC__
-//#include "x86intrin.h"
-//#define __rdtsc() __builtin_ia32_rdtsc()
-//#endif
-//static usCount GetClockSpeed()
-//{
-// int n;
-// usCount start, end, start_tsc, end_tsc;
-// if (!usCountOverhead)
-// {
-// usCount foo = 0;
-// start=GetUsCount();
-// for (n = 0; n < 1000000; n++)
-// {
-// foo += GetUsCount();
-// }
-// end = GetUsCount();
-// usCountOverhead = (end - start)/n;
-// }
-//
-// start = GetUsCount();
-// start_tsc = __rdtsc();
-// for (n = 0; n <1000; n++)
-// {
-//#ifdef WIN32
-// Sleep(0);
-//#else
-// sched_yield();
-//#endif
-// }
-//
-// end_tsc = __rdtsc();
-// end = GetUsCount();
-// return(usCount)((1000000000000.0 * (end_tsc - start_tsc)) /
-// (end - start - usCountOverhead));
-//}
-
-
using namespace std;
@@ -149,7 +140,9 @@
int p;
int tb;
int mb;
- int id;
+ int id;
+
+ float sig_threshold;
int threads;
@@ -158,7 +151,7 @@
string fnameAL;
string fnameAR;
string fnameY;
- string fnameOutB;
+ string fnameOutFiles;
string fname_excludelist;
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/genabel -r 1768
More information about the Genabel-commits
mailing list