[Rcpp-commits] r3056 - pkg/RcppEigen/inst/include/Eigen/src/LU/arch

Mon Jun 13 23:01:52 CEST 2011

Author: dmbates
Date: 2011-06-13 23:01:50 +0200 (Mon, 13 Jun 2011)
New Revision: 3056

Modified:
   pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h
Log:
Use an _m128d type instead of long long int for the mask.


Modified: pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h
===================================================================

--- pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h	2011-06-13 21:01:12 UTC (rev 3055)
+++ pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h	2011-06-13 21:01:50 UTC (rev 3056)
@@ -182,8 +182,11 @@
   };
   static void run(const MatrixType& matrix, ResultType& result)
   {
-    const EIGEN_ALIGN16 long long int _Sign_NP[2] = { 0x8000000000000000ll, 0x0000000000000000ll };
-    const EIGEN_ALIGN16 long long int _Sign_PN[2] = { 0x0000000000000000ll, 0x8000000000000000ll };
+//    const EIGEN_ALIGN16 long long int _Sign_NP[2] = { 0x8000000000000000ll, 0x0000000000000000ll };
+//    const EIGEN_ALIGN16 long long int _Sign_PN[2] = { 0x0000000000000000ll, 0x8000000000000000ll };
+      
+      const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+      const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
 
     // The inverse is calculated using "Divide and Conquer" technique. The
     // original matrix is divide into four 2x2 sub-matrices. Since each
@@ -316,8 +319,10 @@
     iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
     iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
 
-    d1 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_PN));
-    d2 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_NP));
+    d1 = _mm_xor_pd(rd, _Sign_PN);
+    d2 = _mm_xor_pd(rd, _Sign_NP);
+//    d1 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_PN));
+//    d2 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_NP));
 
     //  iC = B*|C| - A*C#*D;
     dC = _mm_shuffle_pd(dC,dC,0);