[Genabel-commits] r1058 - pkg/OmicABEL/src

Tue Dec 18 16:31:37 CET 2012

Author: dfabregat
Date: 2012-12-18 16:31:36 +0100 (Tue, 18 Dec 2012)
New Revision: 1058

Modified:
   pkg/OmicABEL/src/REML.c
   pkg/OmicABEL/src/fgls_eigen.c
   pkg/OmicABEL/src/utils.c
   pkg/OmicABEL/src/utils.h
Log:
[METHODOLOGY] Phi should be SPD. The negatives eigenvalues are set to
  eps * max( positive eigenvalue )
- REML adjusted to the method above
- fgle_eigen adjusted to above and cleaned up a bit
- Added a function to calculate the machine epsilon
- Removed the function to load problem dimensions from a config file


Modified: pkg/OmicABEL/src/REML.c
===================================================================

--- pkg/OmicABEL/src/REML.c	2012-12-15 16:11:08 UTC (rev 1057)
+++ pkg/OmicABEL/src/REML.c	2012-12-18 15:31:36 UTC (rev 1058)
@@ -261,21 +261,31 @@
 void estimates_eig( FGLS_config_t *cf )
 {
 	estimates_eig_t data;
+	double eps; // machine epsilon
 	int j, k;
-#if DEBUG
-	FILE *f_ests;
-#endif
 	
 	// Dimensions
 	data.n   = cf->n;
 	data.wXL = cf->wXL;
-	// Load data
+	// Load Phi
 	data.Phi = cf->Phi;
 	// Compute eigenvectors and eigenvalues in data.Z and data.W
 	cf->Z = data.Z = fgls_malloc( data.n * data.n * sizeof(double) );
 	cf->W = data.W = fgls_malloc( data.n * sizeof(double) );
-	set_multi_threaded_BLAS(cf->num_threads); // Probably can be refined
+	set_multi_threaded_BLAS(cf->num_threads); // Probably can be refined for the iterations below
 	eigen_mr3(data.n, data.Phi, data.Z, data.W);
+	// Phi should be SPD. We set the negative eigenvalues to 
+	//   "epsilon * max( positive eigvalue )"
+	// Eigenvalues are sorted in ascendent order:
+	//   - min eigenvalue in W[0]
+	//   - max eigenvalue in W[n-1]
+	eps = get_epsilon();
+	j = 0;
+	while ( cf->W[j] < 0.0 && j < cf->n )
+	{
+		cf->W[j] = eps * cf->W[cf->n-1];
+		j++;
+	}
 	// Phi is not used anymore
 	free( cf->Phi ); cf->Phi = NULL;
 	// Load XL
@@ -301,12 +311,11 @@
 		sync_read( data.ZtY, cf->ZtY, cf->n, cf->n * j );
 		data.sigma = variance(data.Y, data.n);
 
-		// [a,b] = [0,0.98], tol = 0.01
+		// range [a,b] = [0,0.99], tolerance = 10^{-8}
 		minimize( 0.0, 0.99, 1e-8, &polygenic_REML_logLik_eig_wrapper, (void*)&data );
 #if 0 //DEBUG
 		printf("h: %.15e - sigma: %.15e - res_sigma: %.15e\n", data.h, data.sigma, data.res_sigma);
 #endif
-
 		cf->h2[j] = data.h;
 		cf->sigma2[j] = data.sigma;
 		cf->res_sigma2[j] = data.res_sigma;
@@ -314,17 +323,10 @@
 			cf->beta_ests[cf->wXL*j+k] = data.beta[k];
 	}
 
-#if DEBUG
-	// Store data - For testing purposes
-/*	f_ests = fgls_fopen( cf->ests_path, "wb" );
-	sync_write( cf->ests, f_ests, 3 * cf->t, 0 );
-	fclose( f_ests );*/
-#endif
 	// Encapsulate (maybe reuse)
 	free( data.Y );
 	free( data.beta );
 	free( data.ZtY );
-	// Free Phi? Not used anymore, plus wrong data (from the eigensolver overwritting it)
 }
 
 double polygenic_REML_logLik_eig (
@@ -336,7 +338,6 @@
 )
 {
     double alpha, gamma, // to scale W
-//           *beta,
            *ZtY_upd, *ZtX_upd, *YmXB, *V, *D, det, // temporary
            ZERO = 0.0,
            ONE = 1.0,
@@ -344,138 +345,66 @@
 
     int    wXL = widthXL,
            iONE = 1, info,
-           i, j; //, k;
+           i, j;
 
-	/*printf("n: %d\n", n);*/
-	/*printf("widthXL: %d\n", widthXL);*/
-	/*checkNoNans(widthXL*n, X, "X");*/
-	/*checkNoNans(n, Y, "Y");*/
-	/*checkNoNans(n*n, Z, "Z");*/
-	/*checkNoNans(n, W, "W");*/
-	/*checkNoNans(n, ZtY, "ZtY");*/
-	/*checkNoNans(widthXL * n, ZtX, "ZtX");*/
-	/*printf("sigma: %f\n", sigma);*/
-	/*printf("h2: %f\n", h2);*/
-	/*beta    = (double *) fgls_malloc ( wXL * sizeof(double) );*/
     V       = (double *) fgls_malloc ( wXL * wXL * sizeof(double) );
 	ZtY_upd = (double *) fgls_malloc ( n * sizeof(double) );
 	ZtX_upd = (double *) fgls_malloc ( n * wXL * sizeof(double) );
 	D = (double *) fgls_malloc ( n * sizeof(double) );
 	YmXB = (double *) fgls_malloc ( n * sizeof(double) );
 
-	/* W = sqrt( inv ( W ) ) */
+	/* 
+	 * W = sqrt( inv ( W ) ) 
+	 * det(M) = sum ( log( eigvalues(M) ) ) 
+	 */
     alpha = h2; // * sigma2;
     gamma = (1 - h2); // * sigma2;
 	det = 0.0;
-	/*checkNoNans(n, W, "\n[ERROR] W\n");*/
 	for ( i = 0; i < n; i++ )
 	{
 		D[i] = alpha * W[i] + gamma;
-		det += log(fabs(D[i]));
-		D[i] = 1.0 / D[i];
+		det += log( D[i] );
+		D[i] = sqrt( 1.0 / D[i] );
 	}
 
 	/* W * ZtX */
 	/* W * ZtY */
-#if 0
-	/*printf("ZtX: %x\n", ZtX);*/
-	for (i = 0; i < wXL; i++)
-	{
-		for (j = 0; j < wXL; j++)
-			printf("%.16e ", ZtX[j*n + i]);
-		printf("\n");
-	}
-	printf("\n");
-	for (i = 0; i < wXL; i++)
-		printf("%.16e ", D[i]);
-	printf("\n\n");
-#endif
-	/*checkNoNans(n, D, "\n[ERROR] D\n");*/
-	// inv( X' inv(M) X) X' inv(M) y
-	// inv(M) -> inv( h Phi + (1-h) I )
-	// inv(M) -> inv( h Z W Z' + (1-h) I )
-	// inv(M) -> Z inv( h W + (1-h) I ) Z'
-	// inv( X' Z D Z' X) X' Z D Z' y
-	// inv( X' Z D Z' X) X' Z D Z' y
-	// inv(V) y
 	for (i = 0; i < n; i++ )
 	{
 		for (j = 0; j < wXL; j++ )
 			ZtX_upd[j*n + i] = ZtX[j*n + i] * D[i];
-		/*ZtY_upd[i] = ZtY[i] * D[i];*/
-		ZtY_upd[i] = ZtY[i];
+		ZtY_upd[i] = ZtY[i] * D[i];
 	}
 
     /* 5) beta := X' * y */
-	/*checkNoNans(n, ZtY_upd, "\n[ERROR] ZtY_upd\n");*/
     dgemv_(TRANS, &n, &widthXL, &ONE, ZtX_upd, &n, ZtY_upd, &iONE, &ZERO, beta, &iONE);
 
     /* 6) V := X' * X */
-#if 0
-	for (i = 0; i < wXL; i++)
-	{
-		for (j = 0; j < wXL; j++)
-			printf("%.16e ", ZtX_upd[j*n + i]);
-		printf("\n");
-	}
-	printf("\n");
-#endif
-	/*checkNoNans(wXL*n, ZtX_upd, "\n[ERROR] ZtX_upd\n");*/
-    dgemm_(TRANS, NO_TRANS, &wXL, &wXL, &n, &ONE, ZtX, &n, ZtX_upd, &n, &ZERO, V, &wXL);
-	/*dsyrk_(LOWER, TRANS, &wXL, &n, &ONE, ZtX_upd, &n, &ZERO, V, &wXL);*/
+	dsyrk_(LOWER, TRANS, &wXL, &n, &ONE, ZtX_upd, &n, &ZERO, V, &wXL);
 
-#if 0
-	printf("wXL: %d\n", wXL);
-	for (i = 0; i < wXL; i++)
-	{
-		for (j = 0; j < wXL; j++)
-			printf("%.16e ", V[j*wXL + i]);
-		printf("\n");
-	}
-	printf("\n");
-#endif
-	/*checkNoNans(wXL*wXL, V, "V");*/
-	int *ipiv = (int *) fgls_malloc (wXL * sizeof(int));
-	int lwork = wXL * 192; // nb
-	double *work = (double *) fgls_malloc ( lwork * sizeof(double));
-    dsysv_(LOWER, &wXL, &iONE, V, &wXL, ipiv, beta, &wXL, work, &lwork, &info);
-	/*dpotrf_(LOWER, &wXL, V, &wXL, &info);*/
+	/*dposv_(LOWER, &wXL, &iONE, V, &wXL, beta, &wXL, &info);*/
+	dpotrf_(LOWER, &wXL,  V, &wXL, &info);
     if (info != 0)
     {
         fprintf(stderr, __FILE__ ": [ERROR] inv(V) y failed - info: %d\n", info);
         exit(-1);
     }
     /* beta := inv( V ) beta */
-	/*dtrsv_(LOWER, NO_TRANS, NON_UNIT, &wXL, V, &wXL, beta, &iONE);*/
-	/*dtrsv_(LOWER,    TRANS, NON_UNIT, &wXL, V, &wXL, beta, &iONE);*/
+	dtrsv_(LOWER, NO_TRANS, NON_UNIT, &wXL, V, &wXL, beta, &iONE);
+	dtrsv_(LOWER,    TRANS, NON_UNIT, &wXL, V, &wXL, beta, &iONE);
 
     // y - X beta
 	memcpy( YmXB, Y, n * sizeof(double) );
-    dgemv_(NO_TRANS, 
+    dgemv_( NO_TRANS, 
             &n, &wXL, 
             &MINUS_ONE, X, &n, beta, &iONE,
-            &ONE, YmXB, &iONE);
+            &ONE, YmXB, &iONE );
 
-/*	memcpy( YmXB, ZtY, n * sizeof(double) );
-    dgemv_(NO_TRANS, 
-            &n, &wXL, 
-            &MINUS_ONE, ZtX, &n, beta, &iONE,
-            &ONE, YmXB, &iONE); // YmXB == y_0
-*/
     // residual sigma and loglik
 	*res_sigma = variance( YmXB, n );
-
 	// loglik = a + b
 	//  a -> log(det(M))
-	//  b -> qt' inv(M) qt
-	//  qt = YmXB
-
-	// Z' * YmXB
-	// 
-	
-	//  qt = YmXB
-	//  b -> qt' 1/sigma Z D Z' qt
-	//  b -> 1/sigma (ZtY' D ZtY)
+	//  b -> YmXB' inv(M) YmXB
     dgemv_(TRANS, 
             &n, &n, 
             &ONE, Z, &n, YmXB, &iONE,
@@ -483,24 +412,19 @@
 	// YmXB' * inv( M ) * YmXB
 	*loglik = 0.0;
 	for (i = 0; i < n; i++ )
-	//	*loglik += YmXB[i] * D[i] * YmXB[i];
-		*loglik += ZtY_upd[i] * D[i] * ZtY_upd[i];
-	/**loglik += ZtY_upd[i] * D[i] * D[i] * ZtY_upd[i];*/
-	/**res_sigma = (*loglik) / (n - wXL);*/
+		*loglik += ZtY_upd[i] * D[i] * D[i] * ZtY_upd[i];
 	*loglik = (*loglik) / (*res_sigma); // <- b
-    *loglik = (*loglik) + det + n * log(*res_sigma);
+	// a + b -> det + loglik above
+	//   det incomplete, we must take into account the sigma scaling M -> "+ n * log(*res_sigma)"
+    *loglik = det + n * log(*res_sigma) + (*loglik) ;
 
     // Clean up
-	/*free( beta );*/
 	free( V );
 	free( ZtY_upd );
 	free( ZtX_upd );
 	free( YmXB );
 	free( D );
 
-	free( ipiv );
-	free( work );
-	
 	return *loglik;
 }
 
@@ -561,21 +485,16 @@
         exit(-1);
     }
 
+#if 0
 	int i;
 	int negs = 0;
 	for (i = 0; i < n; i++)
 	{
 		if (W[i] < 0)
 			negs++;
-		/*else*/
-		/*break;*/
-		if (i > 0 && W[i-1] > W[i])
-			printf("Not sorted!!!!!!!!!!!!!\n");
 	}
 	/*printf("# of negative eigenvalues: %d\n", negs);*/
-	/*printf("First eigenvalue: %.15e\n", W[0]);*/
-	/*printf("Lowest eigen values [-1]: %.15e\n", W[n-1]);*/
-	/*printf("Lowest eigen values [-2]: %.15e\n", W[n-2]);*/
+#endif
 
 	free( work );
 	free( iwork );

Modified: pkg/OmicABEL/src/fgls_eigen.c
===================================================================
--- pkg/OmicABEL/src/fgls_eigen.c	2012-12-15 16:11:08 UTC (rev 1057)
+++ pkg/OmicABEL/src/fgls_eigen.c	2012-12-18 15:31:36 UTC (rev 1058)
@@ -33,7 +33,6 @@
 #include <sys/time.h>
 #include <time.h>
 
-/*#include <pthread.h>*/
 #include <omp.h>
 #include <aio.h>
 
@@ -101,7 +100,7 @@
     return 0;
 }
 
-void ooc_loops( eigen_loops_t *loops_t) 
+void ooc_loops( eigen_loops_t *loops_t ) 
 {
 	/*eigen_loops_t *loops_t = ( eigen_loops_t* ) in;*/
     FGLS_config_t *cf = loops_t->cf;
@@ -205,32 +204,11 @@
         }
 
         /* Winv := sqrt( inv( alpha W - beta I ) ) */
-        // Best order? sqrt - inv
-        //
         // Possibly GER
         for (k = 0; k < y_inc; k++)
-        {
             for (l = 0; l < n; l++)
-			{
-                Winv[k*n + l] = sqrt(1.0 / fabs(loops_t->alpha[k] * cf->W[l] + loops_t->beta[k])); // FABS!
-				if (isnan(Winv[k*n+l]))
-				{
-					fprintf(stderr, "k: %d\n", k);
-					fprintf(stderr, "l: %d\n", l);
-					fprintf(stderr, "Eigenvalue: %.15e\n", cf->W[l]);
-					fprintf(stderr, "h: %.15e\n", cf->h2[jb + k]);
-					fprintf(stderr, "sigma: %.15e\n", cf->res_sigma2[jb + k]);
-					fprintf(stderr, "alpha: %.15e\n", loops_t->alpha[k]);
-					fprintf(stderr, "beta:  %.15e\n", loops_t->beta[k]);
-					fprintf(stderr, "alpha ev: %.15e\n", loops_t->alpha[k] * cf->W[l]);
-					fprintf(stderr, "With sigma: %.15e\n", loops_t->alpha[k] * cf->W[l] + loops_t->beta[k]);
-					fprintf(stderr, "Without: %.15e\n", (loops_t->alpha[k] * cf->W[l] + loops_t->beta[k])/cf->res_sigma2[jb + k]);
-					/*fprintf(stderr, "%.15e\n", 1.0 / (loops_t->alpha[k] * cf->W[l] + loops_t->beta[k]));*/
-				}
-			}
-        }
-		checkNoNans(n, cf->W, "Nans in W\n");
-		checkNoNans(n*y_inc, Winv, "Nans in Winv\n");
+                Winv[k*n + l] = sqrt(1.0 / (loops_t->alpha[k] * cf->W[l] + loops_t->beta[k]));
+		//checkNoNans(n*y_inc, Winv, "Nans in Winv\n");
 
         /* y := sqrt(Winv) * Z' * y */
         for (k = 0; k < y_inc; k++)
@@ -242,7 +220,7 @@
           for ( k = 0; k < wXL; k++ )
               for ( l = 0; l < n; l++ )
                   XL_copy[ ll * wXL * n + k * n + l ] *= Winv[ll * n + l];
-		checkNoNans(n*wXL, XL_copy, "Nans in XL\n");
+		//checkNoNans(n*wXL, XL_copy, "Nans in XL\n");
           
         /* B_t := XL' * y */
         for (ll = 0; ll < y_inc; ll++)
@@ -285,7 +263,7 @@
             VT_USER_END("WAIT_X");
 #endif
             XR_comp = double_buffering_get_comp_buffer( db_XR );
-			checkNoNans(n*x_inc, XR_comp, "Nans in XR\n");
+			//checkNoNans(n*x_inc, XR_comp, "Nans in XR\n");
 
 			double *Bij;
 			int jt, it, xtile = cf->x_tile, ytile = cf->y_tile;
@@ -325,7 +303,7 @@
 									   &ONE, XR_copy, &int_n, &Y_comp[j* n], &iONE, 
 									   &ZERO, &oneB[wXL], &iONE);
 
-								checkNoNans(p, oneB, "Nans in oneB\n");
+								//checkNoNans(p, oneB, "Nans in oneB\n");
 			  
 								  /* Building V */
 								  // Copy V_TL
@@ -342,14 +320,14 @@
 										 &ONE, XR_copy, &int_n, 
 										 &ZERO, &oneV[wXL * p + wXL], &int_p);
 
-								checkNoNans(p*p, oneV, "Nans in oneV\n");
+								//checkNoNans(p*p, oneV, "Nans in oneV\n");
 			  
 								  /* B := inv(V) * y 
 									int ii, jj;
 									for (ii = 0; ii < p; ii++)
 									{
 										for (jj = 0; jj < p; jj++)
-											printf("%.16e ", oneV[jj*n + ii]);
+											printf("%.16e ", oneV[jj*p + ii]);
 										printf("\n");
 									}
 									printf("\n");*/
@@ -376,11 +354,7 @@
 									for ( k = 0; k < p; k++ )
 										Bij[k] = (float) oneB[k];
 									for ( k = 0; k < p; k++ )
-									{
-										/*if (oneV[k*p+k] < 0)*/
-										/*printf("%f < 0\n", oneV[k*p+k]);*/
 										Bij[p+k] = (float)sqrt(oneV[k*p+k]);
-									}
 									int idx = 0;
 									for ( k = 0; k < p-1; k++ ) // Cols of V
 										for ( l = k+1; l < p; l++ ) // Rows of V
@@ -389,11 +363,6 @@
 											idx++;
 										}
 #if 0
-								  printf("\nh2: %.16e\n",       cf->h2[jb+j]);
-								  printf("s2: %.16e\n"        , cf->sigma2[jb+j]);
-								  printf("res_sigma2: %.16e\n", cf->res_sigma2[jb+j]);
-								  for ( k = 0; k < (p+ p*(p+1)/2); k++)
-									  printf("%.16e\n", Bij[k]);
 								  printf("Chi square: %.6f\n", ( (oneB[p-1] / Bij[p+p-1]) * (oneB[p-1] / Bij[p+p-1]) ) );
 #endif
 							}
@@ -415,9 +384,7 @@
       VT_USER_START("WAIT_B");
 #endif
       if ( jb > 0 || ib > 0 )
-      {
-                double_buffering_wait( db_B, IO_BUFF );
-      }
+		double_buffering_wait( db_B, IO_BUFF );
 #if VAMPIR
       VT_USER_END("WAIT_B");
 #endif
@@ -453,13 +420,6 @@
       VT_USER_END("WAIT_B");
 #endif
 
-	  /*long long  int flops = t * (3 + 5*n + 3*n*wXL + n*wXL*wXL) + m*n*t*wXR + */
-	  /*m * t * (p*p*p + 2*n*wXR + 2*n*wXL*wXR + n*wXR*wXR);*/
-
-/*	long long  int flops = m * t * (p*p*p + n*wXR + 2*n*wXR + 2*n*wXL*wXR + n*wXR*wXR);
-	printf("\n\nKernel efficiency: %.6f\n", (float)flops / (total/1000.) / (320*1e9));
-	printf("\n\nTotal time: %.6f\n",  total/1000.);
-*/
 	  free(XR_copy_all);
 }
 

Modified: pkg/OmicABEL/src/utils.c
===================================================================
--- pkg/OmicABEL/src/utils.c	2012-12-15 16:11:08 UTC (rev 1057)
+++ pkg/OmicABEL/src/utils.c	2012-12-18 15:31:36 UTC (rev 1058)
@@ -71,6 +71,16 @@
 #endif
 }
 
+double get_epsilon( void )
+{
+	double eps = 1.0;
+
+	while ( (1.0 + eps/2.0) > 1.0 )
+		eps = eps / 2.0;
+
+	return eps;
+}
+
 void get_main_memory_size( size_t *totalMem, size_t *availMem )
 {
     FILE *fp;
@@ -124,40 +134,6 @@
 #endif
 }
 
-void load_dimensions( FGLS_config_t *cf, char *dir )
-{
-    FILE *fp;
-    char config_file[STR_BUFFER_SIZE], buff[STR_BUFFER_SIZE];
-    char dim; int size;
-
-    snprintf( config_file, STR_BUFFER_SIZE, "%s/dims.in", dir );
-    fp = fgls_fopen( config_file, "r" );
-    fgets( buff, STR_BUFFER_SIZE, fp );
-    while ( ! feof( fp ) )
-    {
-        sscanf( buff, "%c:%d", &dim, &size );
-        switch (dim)
-        {
-            case 'n':
-                cf->n = size;
-                break;
-            case 'p':
-                cf->p = size + 2;
-                break;
-            case 'm':
-                cf->m = size;
-                break;
-            case 't':
-                cf->t = size;
-                break;
-            default:
-                fprintf(stderr, "Malformed input file. See doc for the expected format\n" );
-        }
-        fgets( buff, STR_BUFFER_SIZE, fp );
-    }
-    fclose( fp );
-}
-
 void estimate_block_sizes( FGLS_config_t *cf, char var, int estimate_inc )
 {
     size_t avail_mem = cf->availMem;

Modified: pkg/OmicABEL/src/utils.h
===================================================================
--- pkg/OmicABEL/src/utils.h	2012-12-15 16:11:08 UTC (rev 1057)
+++ pkg/OmicABEL/src/utils.h	2012-12-18 15:31:36 UTC (rev 1058)
@@ -34,8 +34,11 @@
 void set_multi_threaded_BLAS( int nths );
 void set_single_threaded_BLAS( void );
 
+// Compute an approximation of the machine epsilon
+// in double precision (within a factor of two)
+double get_epsilon( void );
+
 // Startup
-void load_dimensions( FGLS_config_t *cf, char *dir );
 void get_main_memory_size( size_t *totalMem, size_t *availMem );
 void estimate_block_sizes( FGLS_config_t *cf, char var, int estimate_inc );